1/* 2 * segment.c - NILFS segment constructor. 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Written by Ryusuke Konishi <ryusuke@osrg.net> 21 * 22 */ 23 24#include <linux/pagemap.h> 25#include <linux/buffer_head.h> 26#include <linux/writeback.h> 27#include <linux/bio.h> 28#include <linux/completion.h> 29#include <linux/blkdev.h> 30#include <linux/backing-dev.h> 31#include <linux/freezer.h> 32#include <linux/kthread.h> 33#include <linux/crc32.h> 34#include <linux/pagevec.h> 35#include <linux/slab.h> 36#include "nilfs.h" 37#include "btnode.h" 38#include "page.h" 39#include "segment.h" 40#include "sufile.h" 41#include "cpfile.h" 42#include "ifile.h" 43#include "segbuf.h" 44 45 46/* 47 * Segment constructor 48 */ 49#define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */ 50 51#define SC_MAX_SEGDELTA 64 /* Upper limit of the number of segments 52 appended in collection retry loop */ 53 54/* Construction mode */ 55enum { 56 SC_LSEG_SR = 1, /* Make a logical segment having a super root */ 57 SC_LSEG_DSYNC, /* Flush data blocks of a given file and make 58 a logical segment without a super root */ 59 SC_FLUSH_FILE, /* Flush data files, leads to segment writes without 60 creating a checkpoint */ 61 SC_FLUSH_DAT, /* Flush DAT file. This also creates segments without 62 a checkpoint */ 63}; 64 65/* Stage numbers of dirty block collection */ 66enum { 67 NILFS_ST_INIT = 0, 68 NILFS_ST_GC, /* Collecting dirty blocks for GC */ 69 NILFS_ST_FILE, 70 NILFS_ST_IFILE, 71 NILFS_ST_CPFILE, 72 NILFS_ST_SUFILE, 73 NILFS_ST_DAT, 74 NILFS_ST_SR, /* Super root */ 75 NILFS_ST_DSYNC, /* Data sync blocks */ 76 NILFS_ST_DONE, 77}; 78 79/* State flags of collection */ 80#define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ 81#define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ 82#define NILFS_CF_SUFREED 0x0004 /* segment usages has been freed */ 83#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED) 84 85/* Operations depending on the construction mode and file type */ 86struct nilfs_sc_operations { 87 int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *, 88 struct inode *); 89 int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *, 90 struct inode *); 91 int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *, 92 struct inode *); 93 void (*write_data_binfo)(struct nilfs_sc_info *, 94 struct nilfs_segsum_pointer *, 95 union nilfs_binfo *); 96 void (*write_node_binfo)(struct nilfs_sc_info *, 97 struct nilfs_segsum_pointer *, 98 union nilfs_binfo *); 99}; 100 101/* 102 * Other definitions 103 */ 104static void nilfs_segctor_start_timer(struct nilfs_sc_info *); 105static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); 106static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); 107static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, 108 int); 109 110#define nilfs_cnt32_gt(a, b) \ 111 (typecheck(__u32, a) && typecheck(__u32, b) && \ 112 ((__s32)(b) - (__s32)(a) < 0)) 113#define nilfs_cnt32_ge(a, b) \ 114 (typecheck(__u32, a) && typecheck(__u32, b) && \ 115 ((__s32)(a) - (__s32)(b) >= 0)) 116#define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a) 117#define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a) 118 119static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) 120{ 121 struct nilfs_transaction_info *cur_ti = current->journal_info; 122 void *save = NULL; 123 124 if (cur_ti) { 125 if (cur_ti->ti_magic == NILFS_TI_MAGIC) 126 return ++cur_ti->ti_count; 127 else { 128 /* 129 * If journal_info field is occupied by other FS, 130 * it is saved and will be restored on 131 * nilfs_transaction_commit(). 132 */ 133 printk(KERN_WARNING 134 "NILFS warning: journal info from a different " 135 "FS\n"); 136 save = current->journal_info; 137 } 138 } 139 if (!ti) { 140 ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS); 141 if (!ti) 142 return -ENOMEM; 143 ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC; 144 } else { 145 ti->ti_flags = 0; 146 } 147 ti->ti_count = 0; 148 ti->ti_save = save; 149 ti->ti_magic = NILFS_TI_MAGIC; 150 current->journal_info = ti; 151 return 0; 152} 153 154/** 155 * nilfs_transaction_begin - start indivisible file operations. 156 * @sb: super block 157 * @ti: nilfs_transaction_info 158 * @vacancy_check: flags for vacancy rate checks 159 * 160 * nilfs_transaction_begin() acquires a reader/writer semaphore, called 161 * the segment semaphore, to make a segment construction and write tasks 162 * exclusive. The function is used with nilfs_transaction_commit() in pairs. 163 * The region enclosed by these two functions can be nested. To avoid a 164 * deadlock, the semaphore is only acquired or released in the outermost call. 165 * 166 * This function allocates a nilfs_transaction_info struct to keep context 167 * information on it. It is initialized and hooked onto the current task in 168 * the outermost call. If a pre-allocated struct is given to @ti, it is used 169 * instead; otherwise a new struct is assigned from a slab. 170 * 171 * When @vacancy_check flag is set, this function will check the amount of 172 * free space, and will wait for the GC to reclaim disk space if low capacity. 173 * 174 * Return Value: On success, 0 is returned. On error, one of the following 175 * negative error code is returned. 176 * 177 * %-ENOMEM - Insufficient memory available. 178 * 179 * %-ENOSPC - No space left on device 180 */ 181int nilfs_transaction_begin(struct super_block *sb, 182 struct nilfs_transaction_info *ti, 183 int vacancy_check) 184{ 185 struct nilfs_sb_info *sbi; 186 struct the_nilfs *nilfs; 187 int ret = nilfs_prepare_segment_lock(ti); 188 189 if (unlikely(ret < 0)) 190 return ret; 191 if (ret > 0) 192 return 0; 193 194 sbi = NILFS_SB(sb); 195 nilfs = sbi->s_nilfs; 196 down_read(&nilfs->ns_segctor_sem); 197 if (vacancy_check && nilfs_near_disk_full(nilfs)) { 198 up_read(&nilfs->ns_segctor_sem); 199 ret = -ENOSPC; 200 goto failed; 201 } 202 return 0; 203 204 failed: 205 ti = current->journal_info; 206 current->journal_info = ti->ti_save; 207 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 208 kmem_cache_free(nilfs_transaction_cachep, ti); 209 return ret; 210} 211 212/** 213 * nilfs_transaction_commit - commit indivisible file operations. 214 * @sb: super block 215 * 216 * nilfs_transaction_commit() releases the read semaphore which is 217 * acquired by nilfs_transaction_begin(). This is only performed 218 * in outermost call of this function. If a commit flag is set, 219 * nilfs_transaction_commit() sets a timer to start the segment 220 * constructor. If a sync flag is set, it starts construction 221 * directly. 222 */ 223int nilfs_transaction_commit(struct super_block *sb) 224{ 225 struct nilfs_transaction_info *ti = current->journal_info; 226 struct nilfs_sb_info *sbi; 227 struct nilfs_sc_info *sci; 228 int err = 0; 229 230 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 231 ti->ti_flags |= NILFS_TI_COMMIT; 232 if (ti->ti_count > 0) { 233 ti->ti_count--; 234 return 0; 235 } 236 sbi = NILFS_SB(sb); 237 sci = NILFS_SC(sbi); 238 if (sci != NULL) { 239 if (ti->ti_flags & NILFS_TI_COMMIT) 240 nilfs_segctor_start_timer(sci); 241 if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) > 242 sci->sc_watermark) 243 nilfs_segctor_do_flush(sci, 0); 244 } 245 up_read(&sbi->s_nilfs->ns_segctor_sem); 246 current->journal_info = ti->ti_save; 247 248 if (ti->ti_flags & NILFS_TI_SYNC) 249 err = nilfs_construct_segment(sb); 250 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 251 kmem_cache_free(nilfs_transaction_cachep, ti); 252 return err; 253} 254 255void nilfs_transaction_abort(struct super_block *sb) 256{ 257 struct nilfs_transaction_info *ti = current->journal_info; 258 259 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 260 if (ti->ti_count > 0) { 261 ti->ti_count--; 262 return; 263 } 264 up_read(&NILFS_SB(sb)->s_nilfs->ns_segctor_sem); 265 266 current->journal_info = ti->ti_save; 267 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 268 kmem_cache_free(nilfs_transaction_cachep, ti); 269} 270 271void nilfs_relax_pressure_in_lock(struct super_block *sb) 272{ 273 struct nilfs_sb_info *sbi = NILFS_SB(sb); 274 struct nilfs_sc_info *sci = NILFS_SC(sbi); 275 struct the_nilfs *nilfs = sbi->s_nilfs; 276 277 if (!sci || !sci->sc_flush_request) 278 return; 279 280 set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); 281 up_read(&nilfs->ns_segctor_sem); 282 283 down_write(&nilfs->ns_segctor_sem); 284 if (sci->sc_flush_request && 285 test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) { 286 struct nilfs_transaction_info *ti = current->journal_info; 287 288 ti->ti_flags |= NILFS_TI_WRITER; 289 nilfs_segctor_do_immediate_flush(sci); 290 ti->ti_flags &= ~NILFS_TI_WRITER; 291 } 292 downgrade_write(&nilfs->ns_segctor_sem); 293} 294 295static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, 296 struct nilfs_transaction_info *ti, 297 int gcflag) 298{ 299 struct nilfs_transaction_info *cur_ti = current->journal_info; 300 301 WARN_ON(cur_ti); 302 ti->ti_flags = NILFS_TI_WRITER; 303 ti->ti_count = 0; 304 ti->ti_save = cur_ti; 305 ti->ti_magic = NILFS_TI_MAGIC; 306 INIT_LIST_HEAD(&ti->ti_garbage); 307 current->journal_info = ti; 308 309 for (;;) { 310 down_write(&sbi->s_nilfs->ns_segctor_sem); 311 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags)) 312 break; 313 314 nilfs_segctor_do_immediate_flush(NILFS_SC(sbi)); 315 316 up_write(&sbi->s_nilfs->ns_segctor_sem); 317 yield(); 318 } 319 if (gcflag) 320 ti->ti_flags |= NILFS_TI_GC; 321} 322 323static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi) 324{ 325 struct nilfs_transaction_info *ti = current->journal_info; 326 327 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 328 BUG_ON(ti->ti_count > 0); 329 330 up_write(&sbi->s_nilfs->ns_segctor_sem); 331 current->journal_info = ti->ti_save; 332 if (!list_empty(&ti->ti_garbage)) 333 nilfs_dispose_list(sbi, &ti->ti_garbage, 0); 334} 335 336static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, 337 struct nilfs_segsum_pointer *ssp, 338 unsigned bytes) 339{ 340 struct nilfs_segment_buffer *segbuf = sci->sc_curseg; 341 unsigned blocksize = sci->sc_super->s_blocksize; 342 void *p; 343 344 if (unlikely(ssp->offset + bytes > blocksize)) { 345 ssp->offset = 0; 346 BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh, 347 &segbuf->sb_segsum_buffers)); 348 ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh); 349 } 350 p = ssp->bh->b_data + ssp->offset; 351 ssp->offset += bytes; 352 return p; 353} 354 355/** 356 * nilfs_segctor_reset_segment_buffer - reset the current segment buffer 357 * @sci: nilfs_sc_info 358 */ 359static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) 360{ 361 struct nilfs_segment_buffer *segbuf = sci->sc_curseg; 362 struct buffer_head *sumbh; 363 unsigned sumbytes; 364 unsigned flags = 0; 365 int err; 366 367 if (nilfs_doing_gc()) 368 flags = NILFS_SS_GC; 369 err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, 370 sci->sc_sbi->s_nilfs->ns_cno); 371 if (unlikely(err)) 372 return err; 373 374 sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); 375 sumbytes = segbuf->sb_sum.sumbytes; 376 sci->sc_finfo_ptr.bh = sumbh; sci->sc_finfo_ptr.offset = sumbytes; 377 sci->sc_binfo_ptr.bh = sumbh; sci->sc_binfo_ptr.offset = sumbytes; 378 sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; 379 return 0; 380} 381 382static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) 383{ 384 sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; 385 if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs)) 386 return -E2BIG; /* The current segment is filled up 387 (internal code) */ 388 sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); 389 return nilfs_segctor_reset_segment_buffer(sci); 390} 391 392static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci) 393{ 394 struct nilfs_segment_buffer *segbuf = sci->sc_curseg; 395 int err; 396 397 if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) { 398 err = nilfs_segctor_feed_segment(sci); 399 if (err) 400 return err; 401 segbuf = sci->sc_curseg; 402 } 403 err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root); 404 if (likely(!err)) 405 segbuf->sb_sum.flags |= NILFS_SS_SR; 406 return err; 407} 408 409/* 410 * Functions for making segment summary and payloads 411 */ 412static int nilfs_segctor_segsum_block_required( 413 struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp, 414 unsigned binfo_size) 415{ 416 unsigned blocksize = sci->sc_super->s_blocksize; 417 /* Size of finfo and binfo is enough small against blocksize */ 418 419 return ssp->offset + binfo_size + 420 (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) > 421 blocksize; 422} 423 424static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci, 425 struct inode *inode) 426{ 427 sci->sc_curseg->sb_sum.nfinfo++; 428 sci->sc_binfo_ptr = sci->sc_finfo_ptr; 429 nilfs_segctor_map_segsum_entry( 430 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); 431 432 if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 433 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 434 /* skip finfo */ 435} 436 437static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, 438 struct inode *inode) 439{ 440 struct nilfs_finfo *finfo; 441 struct nilfs_inode_info *ii; 442 struct nilfs_segment_buffer *segbuf; 443 444 if (sci->sc_blk_cnt == 0) 445 return; 446 447 ii = NILFS_I(inode); 448 finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, 449 sizeof(*finfo)); 450 finfo->fi_ino = cpu_to_le64(inode->i_ino); 451 finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); 452 finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); 453 finfo->fi_cno = cpu_to_le64(ii->i_cno); 454 455 segbuf = sci->sc_curseg; 456 segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + 457 sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1); 458 sci->sc_finfo_ptr = sci->sc_binfo_ptr; 459 sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; 460} 461 462static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, 463 struct buffer_head *bh, 464 struct inode *inode, 465 unsigned binfo_size) 466{ 467 struct nilfs_segment_buffer *segbuf; 468 int required, err = 0; 469 470 retry: 471 segbuf = sci->sc_curseg; 472 required = nilfs_segctor_segsum_block_required( 473 sci, &sci->sc_binfo_ptr, binfo_size); 474 if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) { 475 nilfs_segctor_end_finfo(sci, inode); 476 err = nilfs_segctor_feed_segment(sci); 477 if (err) 478 return err; 479 goto retry; 480 } 481 if (unlikely(required)) { 482 err = nilfs_segbuf_extend_segsum(segbuf); 483 if (unlikely(err)) 484 goto failed; 485 } 486 if (sci->sc_blk_cnt == 0) 487 nilfs_segctor_begin_finfo(sci, inode); 488 489 nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size); 490 /* Substitution to vblocknr is delayed until update_blocknr() */ 491 nilfs_segbuf_add_file_buffer(segbuf, bh); 492 sci->sc_blk_cnt++; 493 failed: 494 return err; 495} 496 497static int nilfs_handle_bmap_error(int err, const char *fname, 498 struct inode *inode, struct super_block *sb) 499{ 500 if (err == -EINVAL) { 501 nilfs_error(sb, fname, "broken bmap (inode=%lu)\n", 502 inode->i_ino); 503 err = -EIO; 504 } 505 return err; 506} 507 508/* 509 * Callback functions that enumerate, mark, and collect dirty blocks 510 */ 511static int nilfs_collect_file_data(struct nilfs_sc_info *sci, 512 struct buffer_head *bh, struct inode *inode) 513{ 514 int err; 515 516 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 517 if (unlikely(err < 0)) 518 return nilfs_handle_bmap_error(err, __func__, inode, 519 sci->sc_super); 520 521 err = nilfs_segctor_add_file_block(sci, bh, inode, 522 sizeof(struct nilfs_binfo_v)); 523 if (!err) 524 sci->sc_datablk_cnt++; 525 return err; 526} 527 528static int nilfs_collect_file_node(struct nilfs_sc_info *sci, 529 struct buffer_head *bh, 530 struct inode *inode) 531{ 532 int err; 533 534 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 535 if (unlikely(err < 0)) 536 return nilfs_handle_bmap_error(err, __func__, inode, 537 sci->sc_super); 538 return 0; 539} 540 541static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, 542 struct buffer_head *bh, 543 struct inode *inode) 544{ 545 WARN_ON(!buffer_dirty(bh)); 546 return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); 547} 548 549static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci, 550 struct nilfs_segsum_pointer *ssp, 551 union nilfs_binfo *binfo) 552{ 553 struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry( 554 sci, ssp, sizeof(*binfo_v)); 555 *binfo_v = binfo->bi_v; 556} 557 558static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci, 559 struct nilfs_segsum_pointer *ssp, 560 union nilfs_binfo *binfo) 561{ 562 __le64 *vblocknr = nilfs_segctor_map_segsum_entry( 563 sci, ssp, sizeof(*vblocknr)); 564 *vblocknr = binfo->bi_v.bi_vblocknr; 565} 566 567static struct nilfs_sc_operations nilfs_sc_file_ops = { 568 .collect_data = nilfs_collect_file_data, 569 .collect_node = nilfs_collect_file_node, 570 .collect_bmap = nilfs_collect_file_bmap, 571 .write_data_binfo = nilfs_write_file_data_binfo, 572 .write_node_binfo = nilfs_write_file_node_binfo, 573}; 574 575static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, 576 struct buffer_head *bh, struct inode *inode) 577{ 578 int err; 579 580 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 581 if (unlikely(err < 0)) 582 return nilfs_handle_bmap_error(err, __func__, inode, 583 sci->sc_super); 584 585 err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); 586 if (!err) 587 sci->sc_datablk_cnt++; 588 return err; 589} 590 591static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci, 592 struct buffer_head *bh, struct inode *inode) 593{ 594 WARN_ON(!buffer_dirty(bh)); 595 return nilfs_segctor_add_file_block(sci, bh, inode, 596 sizeof(struct nilfs_binfo_dat)); 597} 598 599static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci, 600 struct nilfs_segsum_pointer *ssp, 601 union nilfs_binfo *binfo) 602{ 603 __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp, 604 sizeof(*blkoff)); 605 *blkoff = binfo->bi_dat.bi_blkoff; 606} 607 608static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci, 609 struct nilfs_segsum_pointer *ssp, 610 union nilfs_binfo *binfo) 611{ 612 struct nilfs_binfo_dat *binfo_dat = 613 nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat)); 614 *binfo_dat = binfo->bi_dat; 615} 616 617static struct nilfs_sc_operations nilfs_sc_dat_ops = { 618 .collect_data = nilfs_collect_dat_data, 619 .collect_node = nilfs_collect_file_node, 620 .collect_bmap = nilfs_collect_dat_bmap, 621 .write_data_binfo = nilfs_write_dat_data_binfo, 622 .write_node_binfo = nilfs_write_dat_node_binfo, 623}; 624 625static struct nilfs_sc_operations nilfs_sc_dsync_ops = { 626 .collect_data = nilfs_collect_file_data, 627 .collect_node = NULL, 628 .collect_bmap = NULL, 629 .write_data_binfo = nilfs_write_file_data_binfo, 630 .write_node_binfo = NULL, 631}; 632 633static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, 634 struct list_head *listp, 635 size_t nlimit, 636 loff_t start, loff_t end) 637{ 638 struct address_space *mapping = inode->i_mapping; 639 struct pagevec pvec; 640 pgoff_t index = 0, last = ULONG_MAX; 641 size_t ndirties = 0; 642 int i; 643 644 if (unlikely(start != 0 || end != LLONG_MAX)) { 645 /* 646 * A valid range is given for sync-ing data pages. The 647 * range is rounded to per-page; extra dirty buffers 648 * may be included if blocksize < pagesize. 649 */ 650 index = start >> PAGE_SHIFT; 651 last = end >> PAGE_SHIFT; 652 } 653 pagevec_init(&pvec, 0); 654 repeat: 655 if (unlikely(index > last) || 656 !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, 657 min_t(pgoff_t, last - index, 658 PAGEVEC_SIZE - 1) + 1)) 659 return ndirties; 660 661 for (i = 0; i < pagevec_count(&pvec); i++) { 662 struct buffer_head *bh, *head; 663 struct page *page = pvec.pages[i]; 664 665 if (unlikely(page->index > last)) 666 break; 667 668 if (mapping->host) { 669 lock_page(page); 670 if (!page_has_buffers(page)) 671 create_empty_buffers(page, 672 1 << inode->i_blkbits, 0); 673 unlock_page(page); 674 } 675 676 bh = head = page_buffers(page); 677 do { 678 if (!buffer_dirty(bh)) 679 continue; 680 get_bh(bh); 681 list_add_tail(&bh->b_assoc_buffers, listp); 682 ndirties++; 683 if (unlikely(ndirties >= nlimit)) { 684 pagevec_release(&pvec); 685 cond_resched(); 686 return ndirties; 687 } 688 } while (bh = bh->b_this_page, bh != head); 689 } 690 pagevec_release(&pvec); 691 cond_resched(); 692 goto repeat; 693} 694 695static void nilfs_lookup_dirty_node_buffers(struct inode *inode, 696 struct list_head *listp) 697{ 698 struct nilfs_inode_info *ii = NILFS_I(inode); 699 struct address_space *mapping = &ii->i_btnode_cache; 700 struct pagevec pvec; 701 struct buffer_head *bh, *head; 702 unsigned int i; 703 pgoff_t index = 0; 704 705 pagevec_init(&pvec, 0); 706 707 while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, 708 PAGEVEC_SIZE)) { 709 for (i = 0; i < pagevec_count(&pvec); i++) { 710 bh = head = page_buffers(pvec.pages[i]); 711 do { 712 if (buffer_dirty(bh)) { 713 get_bh(bh); 714 list_add_tail(&bh->b_assoc_buffers, 715 listp); 716 } 717 bh = bh->b_this_page; 718 } while (bh != head); 719 } 720 pagevec_release(&pvec); 721 cond_resched(); 722 } 723} 724 725static void nilfs_dispose_list(struct nilfs_sb_info *sbi, 726 struct list_head *head, int force) 727{ 728 struct nilfs_inode_info *ii, *n; 729 struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii; 730 unsigned nv = 0; 731 732 while (!list_empty(head)) { 733 spin_lock(&sbi->s_inode_lock); 734 list_for_each_entry_safe(ii, n, head, i_dirty) { 735 list_del_init(&ii->i_dirty); 736 if (force) { 737 if (unlikely(ii->i_bh)) { 738 brelse(ii->i_bh); 739 ii->i_bh = NULL; 740 } 741 } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { 742 set_bit(NILFS_I_QUEUED, &ii->i_state); 743 list_add_tail(&ii->i_dirty, 744 &sbi->s_dirty_files); 745 continue; 746 } 747 ivec[nv++] = ii; 748 if (nv == SC_N_INODEVEC) 749 break; 750 } 751 spin_unlock(&sbi->s_inode_lock); 752 753 for (pii = ivec; nv > 0; pii++, nv--) 754 iput(&(*pii)->vfs_inode); 755 } 756} 757 758static int nilfs_test_metadata_dirty(struct nilfs_sb_info *sbi) 759{ 760 struct the_nilfs *nilfs = sbi->s_nilfs; 761 int ret = 0; 762 763 if (nilfs_mdt_fetch_dirty(sbi->s_ifile)) 764 ret++; 765 if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) 766 ret++; 767 if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) 768 ret++; 769 if (ret || nilfs_doing_gc()) 770 if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs))) 771 ret++; 772 return ret; 773} 774 775static int nilfs_segctor_clean(struct nilfs_sc_info *sci) 776{ 777 return list_empty(&sci->sc_dirty_files) && 778 !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) && 779 sci->sc_nfreesegs == 0 && 780 (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes)); 781} 782 783static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) 784{ 785 struct nilfs_sb_info *sbi = sci->sc_sbi; 786 int ret = 0; 787 788 if (nilfs_test_metadata_dirty(sbi)) 789 set_bit(NILFS_SC_DIRTY, &sci->sc_flags); 790 791 spin_lock(&sbi->s_inode_lock); 792 if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci)) 793 ret++; 794 795 spin_unlock(&sbi->s_inode_lock); 796 return ret; 797} 798 799static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) 800{ 801 struct nilfs_sb_info *sbi = sci->sc_sbi; 802 struct the_nilfs *nilfs = sbi->s_nilfs; 803 804 nilfs_mdt_clear_dirty(sbi->s_ifile); 805 nilfs_mdt_clear_dirty(nilfs->ns_cpfile); 806 nilfs_mdt_clear_dirty(nilfs->ns_sufile); 807 nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); 808} 809 810static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) 811{ 812 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 813 struct buffer_head *bh_cp; 814 struct nilfs_checkpoint *raw_cp; 815 int err; 816 817 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1, 818 &raw_cp, &bh_cp); 819 if (likely(!err)) { 820 /* The following code is duplicated with cpfile. But, it is 821 needed to collect the checkpoint even if it was not newly 822 created */ 823 nilfs_mdt_mark_buffer_dirty(bh_cp); 824 nilfs_mdt_mark_dirty(nilfs->ns_cpfile); 825 nilfs_cpfile_put_checkpoint( 826 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); 827 } else 828 WARN_ON(err == -EINVAL || err == -ENOENT); 829 830 return err; 831} 832 833static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) 834{ 835 struct nilfs_sb_info *sbi = sci->sc_sbi; 836 struct the_nilfs *nilfs = sbi->s_nilfs; 837 struct buffer_head *bh_cp; 838 struct nilfs_checkpoint *raw_cp; 839 int err; 840 841 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, 842 &raw_cp, &bh_cp); 843 if (unlikely(err)) { 844 WARN_ON(err == -EINVAL || err == -ENOENT); 845 goto failed_ibh; 846 } 847 raw_cp->cp_snapshot_list.ssl_next = 0; 848 raw_cp->cp_snapshot_list.ssl_prev = 0; 849 raw_cp->cp_inodes_count = 850 cpu_to_le64(atomic_read(&sbi->s_inodes_count)); 851 raw_cp->cp_blocks_count = 852 cpu_to_le64(atomic_read(&sbi->s_blocks_count)); 853 raw_cp->cp_nblk_inc = 854 cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); 855 raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); 856 raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); 857 858 if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 859 nilfs_checkpoint_clear_minor(raw_cp); 860 else 861 nilfs_checkpoint_set_minor(raw_cp); 862 863 nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); 864 nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); 865 return 0; 866 867 failed_ibh: 868 return err; 869} 870 871static void nilfs_fill_in_file_bmap(struct inode *ifile, 872 struct nilfs_inode_info *ii) 873 874{ 875 struct buffer_head *ibh; 876 struct nilfs_inode *raw_inode; 877 878 if (test_bit(NILFS_I_BMAP, &ii->i_state)) { 879 ibh = ii->i_bh; 880 BUG_ON(!ibh); 881 raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino, 882 ibh); 883 nilfs_bmap_write(ii->i_bmap, raw_inode); 884 nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh); 885 } 886} 887 888static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci, 889 struct inode *ifile) 890{ 891 struct nilfs_inode_info *ii; 892 893 list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { 894 nilfs_fill_in_file_bmap(ifile, ii); 895 set_bit(NILFS_I_COLLECTED, &ii->i_state); 896 } 897} 898 899static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, 900 struct the_nilfs *nilfs) 901{ 902 struct buffer_head *bh_sr; 903 struct nilfs_super_root *raw_sr; 904 unsigned isz = nilfs->ns_inode_size; 905 906 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; 907 raw_sr = (struct nilfs_super_root *)bh_sr->b_data; 908 909 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); 910 raw_sr->sr_nongc_ctime 911 = cpu_to_le64(nilfs_doing_gc() ? 912 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 913 raw_sr->sr_flags = 0; 914 915 nilfs_write_inode_common(nilfs_dat_inode(nilfs), (void *)raw_sr + 916 NILFS_SR_DAT_OFFSET(isz), 1); 917 nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + 918 NILFS_SR_CPFILE_OFFSET(isz), 1); 919 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + 920 NILFS_SR_SUFILE_OFFSET(isz), 1); 921} 922 923static void nilfs_redirty_inodes(struct list_head *head) 924{ 925 struct nilfs_inode_info *ii; 926 927 list_for_each_entry(ii, head, i_dirty) { 928 if (test_bit(NILFS_I_COLLECTED, &ii->i_state)) 929 clear_bit(NILFS_I_COLLECTED, &ii->i_state); 930 } 931} 932 933static void nilfs_drop_collected_inodes(struct list_head *head) 934{ 935 struct nilfs_inode_info *ii; 936 937 list_for_each_entry(ii, head, i_dirty) { 938 if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state)) 939 continue; 940 941 clear_bit(NILFS_I_INODE_DIRTY, &ii->i_state); 942 set_bit(NILFS_I_UPDATED, &ii->i_state); 943 } 944} 945 946static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, 947 struct inode *inode, 948 struct list_head *listp, 949 int (*collect)(struct nilfs_sc_info *, 950 struct buffer_head *, 951 struct inode *)) 952{ 953 struct buffer_head *bh, *n; 954 int err = 0; 955 956 if (collect) { 957 list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) { 958 list_del_init(&bh->b_assoc_buffers); 959 err = collect(sci, bh, inode); 960 brelse(bh); 961 if (unlikely(err)) 962 goto dispose_buffers; 963 } 964 return 0; 965 } 966 967 dispose_buffers: 968 while (!list_empty(listp)) { 969 bh = list_entry(listp->next, struct buffer_head, 970 b_assoc_buffers); 971 list_del_init(&bh->b_assoc_buffers); 972 brelse(bh); 973 } 974 return err; 975} 976 977static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci) 978{ 979 /* Remaining number of blocks within segment buffer */ 980 return sci->sc_segbuf_nblocks - 981 (sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks); 982} 983 984static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci, 985 struct inode *inode, 986 struct nilfs_sc_operations *sc_ops) 987{ 988 LIST_HEAD(data_buffers); 989 LIST_HEAD(node_buffers); 990 int err; 991 992 if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { 993 size_t n, rest = nilfs_segctor_buffer_rest(sci); 994 995 n = nilfs_lookup_dirty_data_buffers( 996 inode, &data_buffers, rest + 1, 0, LLONG_MAX); 997 if (n > rest) { 998 err = nilfs_segctor_apply_buffers( 999 sci, inode, &data_buffers, 1000 sc_ops->collect_data); 1001 BUG_ON(!err); /* always receive -E2BIG or true error */ 1002 goto break_or_fail; 1003 } 1004 } 1005 nilfs_lookup_dirty_node_buffers(inode, &node_buffers); 1006 1007 if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { 1008 err = nilfs_segctor_apply_buffers( 1009 sci, inode, &data_buffers, sc_ops->collect_data); 1010 if (unlikely(err)) { 1011 /* dispose node list */ 1012 nilfs_segctor_apply_buffers( 1013 sci, inode, &node_buffers, NULL); 1014 goto break_or_fail; 1015 } 1016 sci->sc_stage.flags |= NILFS_CF_NODE; 1017 } 1018 /* Collect node */ 1019 err = nilfs_segctor_apply_buffers( 1020 sci, inode, &node_buffers, sc_ops->collect_node); 1021 if (unlikely(err)) 1022 goto break_or_fail; 1023 1024 nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers); 1025 err = nilfs_segctor_apply_buffers( 1026 sci, inode, &node_buffers, sc_ops->collect_bmap); 1027 if (unlikely(err)) 1028 goto break_or_fail; 1029 1030 nilfs_segctor_end_finfo(sci, inode); 1031 sci->sc_stage.flags &= ~NILFS_CF_NODE; 1032 1033 break_or_fail: 1034 return err; 1035} 1036 1037static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, 1038 struct inode *inode) 1039{ 1040 LIST_HEAD(data_buffers); 1041 size_t n, rest = nilfs_segctor_buffer_rest(sci); 1042 int err; 1043 1044 n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1, 1045 sci->sc_dsync_start, 1046 sci->sc_dsync_end); 1047 1048 err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers, 1049 nilfs_collect_file_data); 1050 if (!err) { 1051 nilfs_segctor_end_finfo(sci, inode); 1052 BUG_ON(n > rest); 1053 /* always receive -E2BIG or true error if n > rest */ 1054 } 1055 return err; 1056} 1057 1058static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) 1059{ 1060 struct nilfs_sb_info *sbi = sci->sc_sbi; 1061 struct the_nilfs *nilfs = sbi->s_nilfs; 1062 struct list_head *head; 1063 struct nilfs_inode_info *ii; 1064 size_t ndone; 1065 int err = 0; 1066 1067 switch (sci->sc_stage.scnt) { 1068 case NILFS_ST_INIT: 1069 /* Pre-processes */ 1070 sci->sc_stage.flags = 0; 1071 1072 if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) { 1073 sci->sc_nblk_inc = 0; 1074 sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN; 1075 if (mode == SC_LSEG_DSYNC) { 1076 sci->sc_stage.scnt = NILFS_ST_DSYNC; 1077 goto dsync_mode; 1078 } 1079 } 1080 1081 sci->sc_stage.dirty_file_ptr = NULL; 1082 sci->sc_stage.gc_inode_ptr = NULL; 1083 if (mode == SC_FLUSH_DAT) { 1084 sci->sc_stage.scnt = NILFS_ST_DAT; 1085 goto dat_stage; 1086 } 1087 sci->sc_stage.scnt++; /* Fall through */ 1088 case NILFS_ST_GC: 1089 if (nilfs_doing_gc()) { 1090 head = &sci->sc_gc_inodes; 1091 ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr, 1092 head, i_dirty); 1093 list_for_each_entry_continue(ii, head, i_dirty) { 1094 err = nilfs_segctor_scan_file( 1095 sci, &ii->vfs_inode, 1096 &nilfs_sc_file_ops); 1097 if (unlikely(err)) { 1098 sci->sc_stage.gc_inode_ptr = list_entry( 1099 ii->i_dirty.prev, 1100 struct nilfs_inode_info, 1101 i_dirty); 1102 goto break_or_fail; 1103 } 1104 set_bit(NILFS_I_COLLECTED, &ii->i_state); 1105 } 1106 sci->sc_stage.gc_inode_ptr = NULL; 1107 } 1108 sci->sc_stage.scnt++; /* Fall through */ 1109 case NILFS_ST_FILE: 1110 head = &sci->sc_dirty_files; 1111 ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, 1112 i_dirty); 1113 list_for_each_entry_continue(ii, head, i_dirty) { 1114 clear_bit(NILFS_I_DIRTY, &ii->i_state); 1115 1116 err = nilfs_segctor_scan_file(sci, &ii->vfs_inode, 1117 &nilfs_sc_file_ops); 1118 if (unlikely(err)) { 1119 sci->sc_stage.dirty_file_ptr = 1120 list_entry(ii->i_dirty.prev, 1121 struct nilfs_inode_info, 1122 i_dirty); 1123 goto break_or_fail; 1124 } 1125 /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */ 1126 } 1127 sci->sc_stage.dirty_file_ptr = NULL; 1128 if (mode == SC_FLUSH_FILE) { 1129 sci->sc_stage.scnt = NILFS_ST_DONE; 1130 return 0; 1131 } 1132 sci->sc_stage.scnt++; 1133 sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; 1134 /* Fall through */ 1135 case NILFS_ST_IFILE: 1136 err = nilfs_segctor_scan_file(sci, sbi->s_ifile, 1137 &nilfs_sc_file_ops); 1138 if (unlikely(err)) 1139 break; 1140 sci->sc_stage.scnt++; 1141 /* Creating a checkpoint */ 1142 err = nilfs_segctor_create_checkpoint(sci); 1143 if (unlikely(err)) 1144 break; 1145 /* Fall through */ 1146 case NILFS_ST_CPFILE: 1147 err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile, 1148 &nilfs_sc_file_ops); 1149 if (unlikely(err)) 1150 break; 1151 sci->sc_stage.scnt++; /* Fall through */ 1152 case NILFS_ST_SUFILE: 1153 err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, 1154 sci->sc_nfreesegs, &ndone); 1155 if (unlikely(err)) { 1156 nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1157 sci->sc_freesegs, ndone, 1158 NULL); 1159 break; 1160 } 1161 sci->sc_stage.flags |= NILFS_CF_SUFREED; 1162 1163 err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, 1164 &nilfs_sc_file_ops); 1165 if (unlikely(err)) 1166 break; 1167 sci->sc_stage.scnt++; /* Fall through */ 1168 case NILFS_ST_DAT: 1169 dat_stage: 1170 err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs), 1171 &nilfs_sc_dat_ops); 1172 if (unlikely(err)) 1173 break; 1174 if (mode == SC_FLUSH_DAT) { 1175 sci->sc_stage.scnt = NILFS_ST_DONE; 1176 return 0; 1177 } 1178 sci->sc_stage.scnt++; /* Fall through */ 1179 case NILFS_ST_SR: 1180 if (mode == SC_LSEG_SR) { 1181 /* Appending a super root */ 1182 err = nilfs_segctor_add_super_root(sci); 1183 if (unlikely(err)) 1184 break; 1185 } 1186 /* End of a logical segment */ 1187 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; 1188 sci->sc_stage.scnt = NILFS_ST_DONE; 1189 return 0; 1190 case NILFS_ST_DSYNC: 1191 dsync_mode: 1192 sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT; 1193 ii = sci->sc_dsync_inode; 1194 if (!test_bit(NILFS_I_BUSY, &ii->i_state)) 1195 break; 1196 1197 err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode); 1198 if (unlikely(err)) 1199 break; 1200 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; 1201 sci->sc_stage.scnt = NILFS_ST_DONE; 1202 return 0; 1203 case NILFS_ST_DONE: 1204 return 0; 1205 default: 1206 BUG(); 1207 } 1208 1209 break_or_fail: 1210 return err; 1211} 1212 1213/** 1214 * nilfs_segctor_begin_construction - setup segment buffer to make a new log 1215 * @sci: nilfs_sc_info 1216 * @nilfs: nilfs object 1217 */ 1218static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, 1219 struct the_nilfs *nilfs) 1220{ 1221 struct nilfs_segment_buffer *segbuf, *prev; 1222 __u64 nextnum; 1223 int err, alloc = 0; 1224 1225 segbuf = nilfs_segbuf_new(sci->sc_super); 1226 if (unlikely(!segbuf)) 1227 return -ENOMEM; 1228 1229 if (list_empty(&sci->sc_write_logs)) { 1230 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 1231 nilfs->ns_pseg_offset, nilfs); 1232 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { 1233 nilfs_shift_to_next_segment(nilfs); 1234 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); 1235 } 1236 1237 segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; 1238 nextnum = nilfs->ns_nextnum; 1239 1240 if (nilfs->ns_segnum == nilfs->ns_nextnum) 1241 /* Start from the head of a new full segment */ 1242 alloc++; 1243 } else { 1244 /* Continue logs */ 1245 prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs); 1246 nilfs_segbuf_map_cont(segbuf, prev); 1247 segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq; 1248 nextnum = prev->sb_nextnum; 1249 1250 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { 1251 nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); 1252 segbuf->sb_sum.seg_seq++; 1253 alloc++; 1254 } 1255 } 1256 1257 err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum); 1258 if (err) 1259 goto failed; 1260 1261 if (alloc) { 1262 err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); 1263 if (err) 1264 goto failed; 1265 } 1266 nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); 1267 1268 BUG_ON(!list_empty(&sci->sc_segbufs)); 1269 list_add_tail(&segbuf->sb_list, &sci->sc_segbufs); 1270 sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; 1271 return 0; 1272 1273 failed: 1274 nilfs_segbuf_free(segbuf); 1275 return err; 1276} 1277 1278static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, 1279 struct the_nilfs *nilfs, int nadd) 1280{ 1281 struct nilfs_segment_buffer *segbuf, *prev; 1282 struct inode *sufile = nilfs->ns_sufile; 1283 __u64 nextnextnum; 1284 LIST_HEAD(list); 1285 int err, ret, i; 1286 1287 prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs); 1288 /* 1289 * Since the segment specified with nextnum might be allocated during 1290 * the previous construction, the buffer including its segusage may 1291 * not be dirty. The following call ensures that the buffer is dirty 1292 * and will pin the buffer on memory until the sufile is written. 1293 */ 1294 err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum); 1295 if (unlikely(err)) 1296 return err; 1297 1298 for (i = 0; i < nadd; i++) { 1299 /* extend segment info */ 1300 err = -ENOMEM; 1301 segbuf = nilfs_segbuf_new(sci->sc_super); 1302 if (unlikely(!segbuf)) 1303 goto failed; 1304 1305 /* map this buffer to region of segment on-disk */ 1306 nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); 1307 sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks; 1308 1309 /* allocate the next next full segment */ 1310 err = nilfs_sufile_alloc(sufile, &nextnextnum); 1311 if (unlikely(err)) 1312 goto failed_segbuf; 1313 1314 segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1; 1315 nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs); 1316 1317 list_add_tail(&segbuf->sb_list, &list); 1318 prev = segbuf; 1319 } 1320 list_splice_tail(&list, &sci->sc_segbufs); 1321 return 0; 1322 1323 failed_segbuf: 1324 nilfs_segbuf_free(segbuf); 1325 failed: 1326 list_for_each_entry(segbuf, &list, sb_list) { 1327 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1328 WARN_ON(ret); /* never fails */ 1329 } 1330 nilfs_destroy_logs(&list); 1331 return err; 1332} 1333 1334static void nilfs_free_incomplete_logs(struct list_head *logs, 1335 struct the_nilfs *nilfs) 1336{ 1337 struct nilfs_segment_buffer *segbuf, *prev; 1338 struct inode *sufile = nilfs->ns_sufile; 1339 int ret; 1340 1341 segbuf = NILFS_FIRST_SEGBUF(logs); 1342 if (nilfs->ns_nextnum != segbuf->sb_nextnum) { 1343 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1344 WARN_ON(ret); /* never fails */ 1345 } 1346 if (atomic_read(&segbuf->sb_err)) { 1347 /* Case 1: The first segment failed */ 1348 if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) 1349 /* Case 1a: Partial segment appended into an existing 1350 segment */ 1351 nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start, 1352 segbuf->sb_fseg_end); 1353 else /* Case 1b: New full segment */ 1354 set_nilfs_discontinued(nilfs); 1355 } 1356 1357 prev = segbuf; 1358 list_for_each_entry_continue(segbuf, logs, sb_list) { 1359 if (prev->sb_nextnum != segbuf->sb_nextnum) { 1360 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1361 WARN_ON(ret); /* never fails */ 1362 } 1363 if (atomic_read(&segbuf->sb_err) && 1364 segbuf->sb_segnum != nilfs->ns_nextnum) 1365 /* Case 2: extended segment (!= next) failed */ 1366 nilfs_sufile_set_error(sufile, segbuf->sb_segnum); 1367 prev = segbuf; 1368 } 1369} 1370 1371static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, 1372 struct inode *sufile) 1373{ 1374 struct nilfs_segment_buffer *segbuf; 1375 unsigned long live_blocks; 1376 int ret; 1377 1378 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1379 live_blocks = segbuf->sb_sum.nblocks + 1380 (segbuf->sb_pseg_start - segbuf->sb_fseg_start); 1381 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 1382 live_blocks, 1383 sci->sc_seg_ctime); 1384 WARN_ON(ret); /* always succeed because the segusage is dirty */ 1385 } 1386} 1387 1388static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile) 1389{ 1390 struct nilfs_segment_buffer *segbuf; 1391 int ret; 1392 1393 segbuf = NILFS_FIRST_SEGBUF(logs); 1394 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 1395 segbuf->sb_pseg_start - 1396 segbuf->sb_fseg_start, 0); 1397 WARN_ON(ret); /* always succeed because the segusage is dirty */ 1398 1399 list_for_each_entry_continue(segbuf, logs, sb_list) { 1400 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 1401 0, 0); 1402 WARN_ON(ret); /* always succeed */ 1403 } 1404} 1405 1406static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, 1407 struct nilfs_segment_buffer *last, 1408 struct inode *sufile) 1409{ 1410 struct nilfs_segment_buffer *segbuf = last; 1411 int ret; 1412 1413 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { 1414 sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; 1415 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1416 WARN_ON(ret); 1417 } 1418 nilfs_truncate_logs(&sci->sc_segbufs, last); 1419} 1420 1421 1422static int nilfs_segctor_collect(struct nilfs_sc_info *sci, 1423 struct the_nilfs *nilfs, int mode) 1424{ 1425 struct nilfs_cstage prev_stage = sci->sc_stage; 1426 int err, nadd = 1; 1427 1428 /* Collection retry loop */ 1429 for (;;) { 1430 sci->sc_nblk_this_inc = 0; 1431 sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); 1432 1433 err = nilfs_segctor_reset_segment_buffer(sci); 1434 if (unlikely(err)) 1435 goto failed; 1436 1437 err = nilfs_segctor_collect_blocks(sci, mode); 1438 sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; 1439 if (!err) 1440 break; 1441 1442 if (unlikely(err != -E2BIG)) 1443 goto failed; 1444 1445 /* The current segment is filled up */ 1446 if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) 1447 break; 1448 1449 nilfs_clear_logs(&sci->sc_segbufs); 1450 1451 err = nilfs_segctor_extend_segments(sci, nilfs, nadd); 1452 if (unlikely(err)) 1453 return err; 1454 1455 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1456 err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1457 sci->sc_freesegs, 1458 sci->sc_nfreesegs, 1459 NULL); 1460 WARN_ON(err); /* do not happen */ 1461 } 1462 nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); 1463 sci->sc_stage = prev_stage; 1464 } 1465 nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); 1466 return 0; 1467 1468 failed: 1469 return err; 1470} 1471 1472static void nilfs_list_replace_buffer(struct buffer_head *old_bh, 1473 struct buffer_head *new_bh) 1474{ 1475 BUG_ON(!list_empty(&new_bh->b_assoc_buffers)); 1476 1477 list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers); 1478 /* The caller must release old_bh */ 1479} 1480 1481static int 1482nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, 1483 struct nilfs_segment_buffer *segbuf, 1484 int mode) 1485{ 1486 struct inode *inode = NULL; 1487 sector_t blocknr; 1488 unsigned long nfinfo = segbuf->sb_sum.nfinfo; 1489 unsigned long nblocks = 0, ndatablk = 0; 1490 struct nilfs_sc_operations *sc_op = NULL; 1491 struct nilfs_segsum_pointer ssp; 1492 struct nilfs_finfo *finfo = NULL; 1493 union nilfs_binfo binfo; 1494 struct buffer_head *bh, *bh_org; 1495 ino_t ino = 0; 1496 int err = 0; 1497 1498 if (!nfinfo) 1499 goto out; 1500 1501 blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk; 1502 ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); 1503 ssp.offset = sizeof(struct nilfs_segment_summary); 1504 1505 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { 1506 if (bh == segbuf->sb_super_root) 1507 break; 1508 if (!finfo) { 1509 finfo = nilfs_segctor_map_segsum_entry( 1510 sci, &ssp, sizeof(*finfo)); 1511 ino = le64_to_cpu(finfo->fi_ino); 1512 nblocks = le32_to_cpu(finfo->fi_nblocks); 1513 ndatablk = le32_to_cpu(finfo->fi_ndatablk); 1514 1515 if (buffer_nilfs_node(bh)) 1516 inode = NILFS_BTNC_I(bh->b_page->mapping); 1517 else 1518 inode = NILFS_AS_I(bh->b_page->mapping); 1519 1520 if (mode == SC_LSEG_DSYNC) 1521 sc_op = &nilfs_sc_dsync_ops; 1522 else if (ino == NILFS_DAT_INO) 1523 sc_op = &nilfs_sc_dat_ops; 1524 else /* file blocks */ 1525 sc_op = &nilfs_sc_file_ops; 1526 } 1527 bh_org = bh; 1528 get_bh(bh_org); 1529 err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr, 1530 &binfo); 1531 if (bh != bh_org) 1532 nilfs_list_replace_buffer(bh_org, bh); 1533 brelse(bh_org); 1534 if (unlikely(err)) 1535 goto failed_bmap; 1536 1537 if (ndatablk > 0) 1538 sc_op->write_data_binfo(sci, &ssp, &binfo); 1539 else 1540 sc_op->write_node_binfo(sci, &ssp, &binfo); 1541 1542 blocknr++; 1543 if (--nblocks == 0) { 1544 finfo = NULL; 1545 if (--nfinfo == 0) 1546 break; 1547 } else if (ndatablk > 0) 1548 ndatablk--; 1549 } 1550 out: 1551 return 0; 1552 1553 failed_bmap: 1554 err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super); 1555 return err; 1556} 1557 1558static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) 1559{ 1560 struct nilfs_segment_buffer *segbuf; 1561 int err; 1562 1563 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1564 err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode); 1565 if (unlikely(err)) 1566 return err; 1567 nilfs_segbuf_fill_in_segsum(segbuf); 1568 } 1569 return 0; 1570} 1571 1572static int 1573nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out) 1574{ 1575 struct page *clone_page; 1576 struct buffer_head *bh, *head, *bh2; 1577 void *kaddr; 1578 1579 bh = head = page_buffers(page); 1580 1581 clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0); 1582 if (unlikely(!clone_page)) 1583 return -ENOMEM; 1584 1585 bh2 = page_buffers(clone_page); 1586 kaddr = kmap_atomic(page, KM_USER0); 1587 do { 1588 if (list_empty(&bh->b_assoc_buffers)) 1589 continue; 1590 get_bh(bh2); 1591 page_cache_get(clone_page); /* for each bh */ 1592 memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size); 1593 bh2->b_blocknr = bh->b_blocknr; 1594 list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers); 1595 list_add_tail(&bh->b_assoc_buffers, out); 1596 } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head); 1597 kunmap_atomic(kaddr, KM_USER0); 1598 1599 if (!TestSetPageWriteback(clone_page)) 1600 inc_zone_page_state(clone_page, NR_WRITEBACK); 1601 unlock_page(clone_page); 1602 1603 return 0; 1604} 1605 1606static int nilfs_test_page_to_be_frozen(struct page *page) 1607{ 1608 struct address_space *mapping = page->mapping; 1609 1610 if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode)) 1611 return 0; 1612 1613 if (page_mapped(page)) { 1614 ClearPageChecked(page); 1615 return 1; 1616 } 1617 return PageChecked(page); 1618} 1619 1620static int nilfs_begin_page_io(struct page *page, struct list_head *out) 1621{ 1622 if (!page || PageWriteback(page)) 1623 /* For split b-tree node pages, this function may be called 1624 twice. We ignore the 2nd or later calls by this check. */ 1625 return 0; 1626 1627 lock_page(page); 1628 clear_page_dirty_for_io(page); 1629 set_page_writeback(page); 1630 unlock_page(page); 1631 1632 if (nilfs_test_page_to_be_frozen(page)) { 1633 int err = nilfs_copy_replace_page_buffers(page, out); 1634 if (unlikely(err)) 1635 return err; 1636 } 1637 return 0; 1638} 1639 1640static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, 1641 struct page **failed_page) 1642{ 1643 struct nilfs_segment_buffer *segbuf; 1644 struct page *bd_page = NULL, *fs_page = NULL; 1645 struct list_head *list = &sci->sc_copied_buffers; 1646 int err; 1647 1648 *failed_page = NULL; 1649 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1650 struct buffer_head *bh; 1651 1652 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1653 b_assoc_buffers) { 1654 if (bh->b_page != bd_page) { 1655 if (bd_page) { 1656 lock_page(bd_page); 1657 clear_page_dirty_for_io(bd_page); 1658 set_page_writeback(bd_page); 1659 unlock_page(bd_page); 1660 } 1661 bd_page = bh->b_page; 1662 } 1663 } 1664 1665 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1666 b_assoc_buffers) { 1667 if (bh == segbuf->sb_super_root) { 1668 if (bh->b_page != bd_page) { 1669 lock_page(bd_page); 1670 clear_page_dirty_for_io(bd_page); 1671 set_page_writeback(bd_page); 1672 unlock_page(bd_page); 1673 bd_page = bh->b_page; 1674 } 1675 break; 1676 } 1677 if (bh->b_page != fs_page) { 1678 err = nilfs_begin_page_io(fs_page, list); 1679 if (unlikely(err)) { 1680 *failed_page = fs_page; 1681 goto out; 1682 } 1683 fs_page = bh->b_page; 1684 } 1685 } 1686 } 1687 if (bd_page) { 1688 lock_page(bd_page); 1689 clear_page_dirty_for_io(bd_page); 1690 set_page_writeback(bd_page); 1691 unlock_page(bd_page); 1692 } 1693 err = nilfs_begin_page_io(fs_page, list); 1694 if (unlikely(err)) 1695 *failed_page = fs_page; 1696 out: 1697 return err; 1698} 1699 1700static int nilfs_segctor_write(struct nilfs_sc_info *sci, 1701 struct the_nilfs *nilfs) 1702{ 1703 int ret; 1704 1705 ret = nilfs_write_logs(&sci->sc_segbufs, nilfs); 1706 list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs); 1707 return ret; 1708} 1709 1710static void __nilfs_end_page_io(struct page *page, int err) 1711{ 1712 if (!err) { 1713 if (!nilfs_page_buffers_clean(page)) 1714 __set_page_dirty_nobuffers(page); 1715 ClearPageError(page); 1716 } else { 1717 __set_page_dirty_nobuffers(page); 1718 SetPageError(page); 1719 } 1720 1721 if (buffer_nilfs_allocated(page_buffers(page))) { 1722 if (TestClearPageWriteback(page)) 1723 dec_zone_page_state(page, NR_WRITEBACK); 1724 } else 1725 end_page_writeback(page); 1726} 1727 1728static void nilfs_end_page_io(struct page *page, int err) 1729{ 1730 if (!page) 1731 return; 1732 1733 if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) { 1734 /* 1735 * For b-tree node pages, this function may be called twice 1736 * or more because they might be split in a segment. 1737 */ 1738 if (PageDirty(page)) { 1739 /* 1740 * For pages holding split b-tree node buffers, dirty 1741 * flag on the buffers may be cleared discretely. 1742 * In that case, the page is once redirtied for 1743 * remaining buffers, and it must be cancelled if 1744 * all the buffers get cleaned later. 1745 */ 1746 lock_page(page); 1747 if (nilfs_page_buffers_clean(page)) 1748 __nilfs_clear_page_dirty(page); 1749 unlock_page(page); 1750 } 1751 return; 1752 } 1753 1754 __nilfs_end_page_io(page, err); 1755} 1756 1757static void nilfs_clear_copied_buffers(struct list_head *list, int err) 1758{ 1759 struct buffer_head *bh, *head; 1760 struct page *page; 1761 1762 while (!list_empty(list)) { 1763 bh = list_entry(list->next, struct buffer_head, 1764 b_assoc_buffers); 1765 page = bh->b_page; 1766 page_cache_get(page); 1767 head = bh = page_buffers(page); 1768 do { 1769 if (!list_empty(&bh->b_assoc_buffers)) { 1770 list_del_init(&bh->b_assoc_buffers); 1771 if (!err) { 1772 set_buffer_uptodate(bh); 1773 clear_buffer_dirty(bh); 1774 clear_buffer_nilfs_volatile(bh); 1775 } 1776 brelse(bh); /* for b_assoc_buffers */ 1777 } 1778 } while ((bh = bh->b_this_page) != head); 1779 1780 __nilfs_end_page_io(page, err); 1781 page_cache_release(page); 1782 } 1783} 1784 1785static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, 1786 int err) 1787{ 1788 struct nilfs_segment_buffer *segbuf; 1789 struct page *bd_page = NULL, *fs_page = NULL; 1790 struct buffer_head *bh; 1791 1792 if (list_empty(logs)) 1793 return; 1794 1795 list_for_each_entry(segbuf, logs, sb_list) { 1796 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1797 b_assoc_buffers) { 1798 if (bh->b_page != bd_page) { 1799 if (bd_page) 1800 end_page_writeback(bd_page); 1801 bd_page = bh->b_page; 1802 } 1803 } 1804 1805 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1806 b_assoc_buffers) { 1807 if (bh == segbuf->sb_super_root) { 1808 if (bh->b_page != bd_page) { 1809 end_page_writeback(bd_page); 1810 bd_page = bh->b_page; 1811 } 1812 break; 1813 } 1814 if (bh->b_page != fs_page) { 1815 nilfs_end_page_io(fs_page, err); 1816 if (fs_page && fs_page == failed_page) 1817 return; 1818 fs_page = bh->b_page; 1819 } 1820 } 1821 } 1822 if (bd_page) 1823 end_page_writeback(bd_page); 1824 1825 nilfs_end_page_io(fs_page, err); 1826} 1827 1828static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, 1829 struct the_nilfs *nilfs, int err) 1830{ 1831 LIST_HEAD(logs); 1832 int ret; 1833 1834 list_splice_tail_init(&sci->sc_write_logs, &logs); 1835 ret = nilfs_wait_on_logs(&logs); 1836 nilfs_abort_logs(&logs, NULL, ret ? : err); 1837 1838 list_splice_tail_init(&sci->sc_segbufs, &logs); 1839 nilfs_cancel_segusage(&logs, nilfs->ns_sufile); 1840 nilfs_free_incomplete_logs(&logs, nilfs); 1841 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); 1842 1843 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1844 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1845 sci->sc_freesegs, 1846 sci->sc_nfreesegs, 1847 NULL); 1848 WARN_ON(ret); /* do not happen */ 1849 } 1850 1851 nilfs_destroy_logs(&logs); 1852} 1853 1854static void nilfs_set_next_segment(struct the_nilfs *nilfs, 1855 struct nilfs_segment_buffer *segbuf) 1856{ 1857 nilfs->ns_segnum = segbuf->sb_segnum; 1858 nilfs->ns_nextnum = segbuf->sb_nextnum; 1859 nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start 1860 + segbuf->sb_sum.nblocks; 1861 nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq; 1862 nilfs->ns_ctime = segbuf->sb_sum.ctime; 1863} 1864 1865static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) 1866{ 1867 struct nilfs_segment_buffer *segbuf; 1868 struct page *bd_page = NULL, *fs_page = NULL; 1869 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 1870 int update_sr = false; 1871 1872 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { 1873 struct buffer_head *bh; 1874 1875 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1876 b_assoc_buffers) { 1877 set_buffer_uptodate(bh); 1878 clear_buffer_dirty(bh); 1879 if (bh->b_page != bd_page) { 1880 if (bd_page) 1881 end_page_writeback(bd_page); 1882 bd_page = bh->b_page; 1883 } 1884 } 1885 /* 1886 * We assume that the buffers which belong to the same page 1887 * continue over the buffer list. 1888 * Under this assumption, the last BHs of pages is 1889 * identifiable by the discontinuity of bh->b_page 1890 * (page != fs_page). 1891 * 1892 * For B-tree node blocks, however, this assumption is not 1893 * guaranteed. The cleanup code of B-tree node pages needs 1894 * special care. 1895 */ 1896 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1897 b_assoc_buffers) { 1898 set_buffer_uptodate(bh); 1899 clear_buffer_dirty(bh); 1900 clear_buffer_nilfs_volatile(bh); 1901 if (bh == segbuf->sb_super_root) { 1902 if (bh->b_page != bd_page) { 1903 end_page_writeback(bd_page); 1904 bd_page = bh->b_page; 1905 } 1906 update_sr = true; 1907 break; 1908 } 1909 if (bh->b_page != fs_page) { 1910 nilfs_end_page_io(fs_page, 0); 1911 fs_page = bh->b_page; 1912 } 1913 } 1914 1915 if (!nilfs_segbuf_simplex(segbuf)) { 1916 if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) { 1917 set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); 1918 sci->sc_lseg_stime = jiffies; 1919 } 1920 if (segbuf->sb_sum.flags & NILFS_SS_LOGEND) 1921 clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); 1922 } 1923 } 1924 /* 1925 * Since pages may continue over multiple segment buffers, 1926 * end of the last page must be checked outside of the loop. 1927 */ 1928 if (bd_page) 1929 end_page_writeback(bd_page); 1930 1931 nilfs_end_page_io(fs_page, 0); 1932 1933 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0); 1934 1935 nilfs_drop_collected_inodes(&sci->sc_dirty_files); 1936 1937 if (nilfs_doing_gc()) { 1938 nilfs_drop_collected_inodes(&sci->sc_gc_inodes); 1939 if (update_sr) 1940 nilfs_commit_gcdat_inode(nilfs); 1941 } else 1942 nilfs->ns_nongc_ctime = sci->sc_seg_ctime; 1943 1944 sci->sc_nblk_inc += sci->sc_nblk_this_inc; 1945 1946 segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs); 1947 nilfs_set_next_segment(nilfs, segbuf); 1948 1949 if (update_sr) { 1950 nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, 1951 segbuf->sb_sum.seg_seq, nilfs->ns_cno++); 1952 1953 clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 1954 clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); 1955 set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); 1956 nilfs_segctor_clear_metadata_dirty(sci); 1957 } else 1958 clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); 1959} 1960 1961static int nilfs_segctor_wait(struct nilfs_sc_info *sci) 1962{ 1963 int ret; 1964 1965 ret = nilfs_wait_on_logs(&sci->sc_write_logs); 1966 if (!ret) { 1967 nilfs_segctor_complete_write(sci); 1968 nilfs_destroy_logs(&sci->sc_write_logs); 1969 } 1970 return ret; 1971} 1972 1973static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, 1974 struct nilfs_sb_info *sbi) 1975{ 1976 struct nilfs_inode_info *ii, *n; 1977 __u64 cno = sbi->s_nilfs->ns_cno; 1978 1979 spin_lock(&sbi->s_inode_lock); 1980 retry: 1981 list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) { 1982 if (!ii->i_bh) { 1983 struct buffer_head *ibh; 1984 int err; 1985 1986 spin_unlock(&sbi->s_inode_lock); 1987 err = nilfs_ifile_get_inode_block( 1988 sbi->s_ifile, ii->vfs_inode.i_ino, &ibh); 1989 if (unlikely(err)) { 1990 nilfs_warning(sbi->s_super, __func__, 1991 "failed to get inode block.\n"); 1992 return err; 1993 } 1994 nilfs_mdt_mark_buffer_dirty(ibh); 1995 nilfs_mdt_mark_dirty(sbi->s_ifile); 1996 spin_lock(&sbi->s_inode_lock); 1997 if (likely(!ii->i_bh)) 1998 ii->i_bh = ibh; 1999 else 2000 brelse(ibh); 2001 goto retry; 2002 } 2003 ii->i_cno = cno; 2004 2005 clear_bit(NILFS_I_QUEUED, &ii->i_state); 2006 set_bit(NILFS_I_BUSY, &ii->i_state); 2007 list_del(&ii->i_dirty); 2008 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files); 2009 } 2010 spin_unlock(&sbi->s_inode_lock); 2011 2012 NILFS_I(sbi->s_ifile)->i_cno = cno; 2013 2014 return 0; 2015} 2016 2017static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, 2018 struct nilfs_sb_info *sbi) 2019{ 2020 struct nilfs_transaction_info *ti = current->journal_info; 2021 struct nilfs_inode_info *ii, *n; 2022 __u64 cno = sbi->s_nilfs->ns_cno; 2023 2024 spin_lock(&sbi->s_inode_lock); 2025 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { 2026 if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || 2027 test_bit(NILFS_I_DIRTY, &ii->i_state)) { 2028 /* The current checkpoint number (=nilfs->ns_cno) is 2029 changed between check-in and check-out only if the 2030 super root is written out. So, we can update i_cno 2031 for the inodes that remain in the dirty list. */ 2032 ii->i_cno = cno; 2033 continue; 2034 } 2035 clear_bit(NILFS_I_BUSY, &ii->i_state); 2036 brelse(ii->i_bh); 2037 ii->i_bh = NULL; 2038 list_del(&ii->i_dirty); 2039 list_add_tail(&ii->i_dirty, &ti->ti_garbage); 2040 } 2041 spin_unlock(&sbi->s_inode_lock); 2042} 2043 2044/* 2045 * Main procedure of segment constructor 2046 */ 2047static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) 2048{ 2049 struct nilfs_sb_info *sbi = sci->sc_sbi; 2050 struct the_nilfs *nilfs = sbi->s_nilfs; 2051 struct page *failed_page; 2052 int err; 2053 2054 sci->sc_stage.scnt = NILFS_ST_INIT; 2055 2056 err = nilfs_segctor_check_in_files(sci, sbi); 2057 if (unlikely(err)) 2058 goto out; 2059 2060 if (nilfs_test_metadata_dirty(sbi)) 2061 set_bit(NILFS_SC_DIRTY, &sci->sc_flags); 2062 2063 if (nilfs_segctor_clean(sci)) 2064 goto out; 2065 2066 do { 2067 sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK; 2068 2069 err = nilfs_segctor_begin_construction(sci, nilfs); 2070 if (unlikely(err)) 2071 goto out; 2072 2073 /* Update time stamp */ 2074 sci->sc_seg_ctime = get_seconds(); 2075 2076 err = nilfs_segctor_collect(sci, nilfs, mode); 2077 if (unlikely(err)) 2078 goto failed; 2079 2080 /* Avoid empty segment */ 2081 if (sci->sc_stage.scnt == NILFS_ST_DONE && 2082 nilfs_segbuf_empty(sci->sc_curseg)) { 2083 nilfs_segctor_abort_construction(sci, nilfs, 1); 2084 goto out; 2085 } 2086 2087 err = nilfs_segctor_assign(sci, mode); 2088 if (unlikely(err)) 2089 goto failed; 2090 2091 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) 2092 nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); 2093 2094 if (mode == SC_LSEG_SR && 2095 sci->sc_stage.scnt >= NILFS_ST_CPFILE) { 2096 err = nilfs_segctor_fill_in_checkpoint(sci); 2097 if (unlikely(err)) 2098 goto failed_to_write; 2099 2100 nilfs_segctor_fill_in_super_root(sci, nilfs); 2101 } 2102 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); 2103 2104 /* Write partial segments */ 2105 err = nilfs_segctor_prepare_write(sci, &failed_page); 2106 if (err) { 2107 nilfs_abort_logs(&sci->sc_segbufs, failed_page, err); 2108 goto failed_to_write; 2109 } 2110 2111 nilfs_add_checksums_on_logs(&sci->sc_segbufs, 2112 nilfs->ns_crc_seed); 2113 2114 err = nilfs_segctor_write(sci, nilfs); 2115 if (unlikely(err)) 2116 goto failed_to_write; 2117 2118 if (sci->sc_stage.scnt == NILFS_ST_DONE || 2119 nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) { 2120 /* 2121 * At this point, we avoid double buffering 2122 * for blocksize < pagesize because page dirty 2123 * flag is turned off during write and dirty 2124 * buffers are not properly collected for 2125 * pages crossing over segments. 2126 */ 2127 err = nilfs_segctor_wait(sci); 2128 if (err) 2129 goto failed_to_write; 2130 } 2131 } while (sci->sc_stage.scnt != NILFS_ST_DONE); 2132 2133 out: 2134 nilfs_segctor_check_out_files(sci, sbi); 2135 return err; 2136 2137 failed_to_write: 2138 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) 2139 nilfs_redirty_inodes(&sci->sc_dirty_files); 2140 2141 failed: 2142 if (nilfs_doing_gc()) 2143 nilfs_redirty_inodes(&sci->sc_gc_inodes); 2144 nilfs_segctor_abort_construction(sci, nilfs, err); 2145 goto out; 2146} 2147 2148/** 2149 * nilfs_segctor_start_timer - set timer of background write 2150 * @sci: nilfs_sc_info 2151 * 2152 * If the timer has already been set, it ignores the new request. 2153 * This function MUST be called within a section locking the segment 2154 * semaphore. 2155 */ 2156static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) 2157{ 2158 spin_lock(&sci->sc_state_lock); 2159 if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { 2160 sci->sc_timer.expires = jiffies + sci->sc_interval; 2161 add_timer(&sci->sc_timer); 2162 sci->sc_state |= NILFS_SEGCTOR_COMMIT; 2163 } 2164 spin_unlock(&sci->sc_state_lock); 2165} 2166 2167static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) 2168{ 2169 spin_lock(&sci->sc_state_lock); 2170 if (!(sci->sc_flush_request & (1 << bn))) { 2171 unsigned long prev_req = sci->sc_flush_request; 2172 2173 sci->sc_flush_request |= (1 << bn); 2174 if (!prev_req) 2175 wake_up(&sci->sc_wait_daemon); 2176 } 2177 spin_unlock(&sci->sc_state_lock); 2178} 2179 2180/** 2181 * nilfs_flush_segment - trigger a segment construction for resource control 2182 * @sb: super block 2183 * @ino: inode number of the file to be flushed out. 2184 */ 2185void nilfs_flush_segment(struct super_block *sb, ino_t ino) 2186{ 2187 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2188 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2189 2190 if (!sci || nilfs_doing_construction()) 2191 return; 2192 nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); 2193 /* assign bit 0 to data files */ 2194} 2195 2196struct nilfs_segctor_wait_request { 2197 wait_queue_t wq; 2198 __u32 seq; 2199 int err; 2200 atomic_t done; 2201}; 2202 2203static int nilfs_segctor_sync(struct nilfs_sc_info *sci) 2204{ 2205 struct nilfs_segctor_wait_request wait_req; 2206 int err = 0; 2207 2208 spin_lock(&sci->sc_state_lock); 2209 init_wait(&wait_req.wq); 2210 wait_req.err = 0; 2211 atomic_set(&wait_req.done, 0); 2212 wait_req.seq = ++sci->sc_seq_request; 2213 spin_unlock(&sci->sc_state_lock); 2214 2215 init_waitqueue_entry(&wait_req.wq, current); 2216 add_wait_queue(&sci->sc_wait_request, &wait_req.wq); 2217 set_current_state(TASK_INTERRUPTIBLE); 2218 wake_up(&sci->sc_wait_daemon); 2219 2220 for (;;) { 2221 if (atomic_read(&wait_req.done)) { 2222 err = wait_req.err; 2223 break; 2224 } 2225 if (!signal_pending(current)) { 2226 schedule(); 2227 continue; 2228 } 2229 err = -ERESTARTSYS; 2230 break; 2231 } 2232 finish_wait(&sci->sc_wait_request, &wait_req.wq); 2233 return err; 2234} 2235 2236static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) 2237{ 2238 struct nilfs_segctor_wait_request *wrq, *n; 2239 unsigned long flags; 2240 2241 spin_lock_irqsave(&sci->sc_wait_request.lock, flags); 2242 list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list, 2243 wq.task_list) { 2244 if (!atomic_read(&wrq->done) && 2245 nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { 2246 wrq->err = err; 2247 atomic_set(&wrq->done, 1); 2248 } 2249 if (atomic_read(&wrq->done)) { 2250 wrq->wq.func(&wrq->wq, 2251 TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 2252 0, NULL); 2253 } 2254 } 2255 spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags); 2256} 2257 2258/** 2259 * nilfs_construct_segment - construct a logical segment 2260 * @sb: super block 2261 * 2262 * Return Value: On success, 0 is retured. On errors, one of the following 2263 * negative error code is returned. 2264 * 2265 * %-EROFS - Read only filesystem. 2266 * 2267 * %-EIO - I/O error 2268 * 2269 * %-ENOSPC - No space left on device (only in a panic state). 2270 * 2271 * %-ERESTARTSYS - Interrupted. 2272 * 2273 * %-ENOMEM - Insufficient memory available. 2274 */ 2275int nilfs_construct_segment(struct super_block *sb) 2276{ 2277 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2278 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2279 struct nilfs_transaction_info *ti; 2280 int err; 2281 2282 if (!sci) 2283 return -EROFS; 2284 2285 /* A call inside transactions causes a deadlock. */ 2286 BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC); 2287 2288 err = nilfs_segctor_sync(sci); 2289 return err; 2290} 2291 2292/** 2293 * nilfs_construct_dsync_segment - construct a data-only logical segment 2294 * @sb: super block 2295 * @inode: inode whose data blocks should be written out 2296 * @start: start byte offset 2297 * @end: end byte offset (inclusive) 2298 * 2299 * Return Value: On success, 0 is retured. On errors, one of the following 2300 * negative error code is returned. 2301 * 2302 * %-EROFS - Read only filesystem. 2303 * 2304 * %-EIO - I/O error 2305 * 2306 * %-ENOSPC - No space left on device (only in a panic state). 2307 * 2308 * %-ERESTARTSYS - Interrupted. 2309 * 2310 * %-ENOMEM - Insufficient memory available. 2311 */ 2312int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, 2313 loff_t start, loff_t end) 2314{ 2315 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2316 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2317 struct nilfs_inode_info *ii; 2318 struct nilfs_transaction_info ti; 2319 int err = 0; 2320 2321 if (!sci) 2322 return -EROFS; 2323 2324 nilfs_transaction_lock(sbi, &ti, 0); 2325 2326 ii = NILFS_I(inode); 2327 if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || 2328 nilfs_test_opt(sbi, STRICT_ORDER) || 2329 test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || 2330 nilfs_discontinued(sbi->s_nilfs)) { 2331 nilfs_transaction_unlock(sbi); 2332 err = nilfs_segctor_sync(sci); 2333 return err; 2334 } 2335 2336 spin_lock(&sbi->s_inode_lock); 2337 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 2338 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 2339 spin_unlock(&sbi->s_inode_lock); 2340 nilfs_transaction_unlock(sbi); 2341 return 0; 2342 } 2343 spin_unlock(&sbi->s_inode_lock); 2344 sci->sc_dsync_inode = ii; 2345 sci->sc_dsync_start = start; 2346 sci->sc_dsync_end = end; 2347 2348 err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); 2349 2350 nilfs_transaction_unlock(sbi); 2351 return err; 2352} 2353 2354#define FLUSH_FILE_BIT (0x1) /* data file only */ 2355#define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */ 2356 2357/** 2358 * nilfs_segctor_accept - record accepted sequence count of log-write requests 2359 * @sci: segment constructor object 2360 */ 2361static void nilfs_segctor_accept(struct nilfs_sc_info *sci) 2362{ 2363 spin_lock(&sci->sc_state_lock); 2364 sci->sc_seq_accepted = sci->sc_seq_request; 2365 spin_unlock(&sci->sc_state_lock); 2366 del_timer_sync(&sci->sc_timer); 2367} 2368 2369/** 2370 * nilfs_segctor_notify - notify the result of request to caller threads 2371 * @sci: segment constructor object 2372 * @mode: mode of log forming 2373 * @err: error code to be notified 2374 */ 2375static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) 2376{ 2377 /* Clear requests (even when the construction failed) */ 2378 spin_lock(&sci->sc_state_lock); 2379 2380 if (mode == SC_LSEG_SR) { 2381 sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; 2382 sci->sc_seq_done = sci->sc_seq_accepted; 2383 nilfs_segctor_wakeup(sci, err); 2384 sci->sc_flush_request = 0; 2385 } else { 2386 if (mode == SC_FLUSH_FILE) 2387 sci->sc_flush_request &= ~FLUSH_FILE_BIT; 2388 else if (mode == SC_FLUSH_DAT) 2389 sci->sc_flush_request &= ~FLUSH_DAT_BIT; 2390 2391 /* re-enable timer if checkpoint creation was not done */ 2392 if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2393 time_before(jiffies, sci->sc_timer.expires)) 2394 add_timer(&sci->sc_timer); 2395 } 2396 spin_unlock(&sci->sc_state_lock); 2397} 2398 2399/** 2400 * nilfs_segctor_construct - form logs and write them to disk 2401 * @sci: segment constructor object 2402 * @mode: mode of log forming 2403 */ 2404static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) 2405{ 2406 struct nilfs_sb_info *sbi = sci->sc_sbi; 2407 struct the_nilfs *nilfs = sbi->s_nilfs; 2408 struct nilfs_super_block **sbp; 2409 int err = 0; 2410 2411 nilfs_segctor_accept(sci); 2412 2413 if (nilfs_discontinued(nilfs)) 2414 mode = SC_LSEG_SR; 2415 if (!nilfs_segctor_confirm(sci)) 2416 err = nilfs_segctor_do_construct(sci, mode); 2417 2418 if (likely(!err)) { 2419 if (mode != SC_FLUSH_DAT) 2420 atomic_set(&nilfs->ns_ndirtyblks, 0); 2421 if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && 2422 nilfs_discontinued(nilfs)) { 2423 down_write(&nilfs->ns_sem); 2424 err = -EIO; 2425 sbp = nilfs_prepare_super(sbi, 2426 nilfs_sb_will_flip(nilfs)); 2427 if (likely(sbp)) { 2428 nilfs_set_log_cursor(sbp[0], nilfs); 2429 err = nilfs_commit_super(sbi, NILFS_SB_COMMIT); 2430 } 2431 up_write(&nilfs->ns_sem); 2432 } 2433 } 2434 2435 nilfs_segctor_notify(sci, mode, err); 2436 return err; 2437} 2438 2439static void nilfs_construction_timeout(unsigned long data) 2440{ 2441 struct task_struct *p = (struct task_struct *)data; 2442 wake_up_process(p); 2443} 2444 2445static void 2446nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) 2447{ 2448 struct nilfs_inode_info *ii, *n; 2449 2450 list_for_each_entry_safe(ii, n, head, i_dirty) { 2451 if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) 2452 continue; 2453 hlist_del_init(&ii->vfs_inode.i_hash); 2454 list_del_init(&ii->i_dirty); 2455 nilfs_clear_gcinode(&ii->vfs_inode); 2456 } 2457} 2458 2459int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, 2460 void **kbufs) 2461{ 2462 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2463 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2464 struct the_nilfs *nilfs = sbi->s_nilfs; 2465 struct nilfs_transaction_info ti; 2466 int err; 2467 2468 if (unlikely(!sci)) 2469 return -EROFS; 2470 2471 nilfs_transaction_lock(sbi, &ti, 1); 2472 2473 err = nilfs_init_gcdat_inode(nilfs); 2474 if (unlikely(err)) 2475 goto out_unlock; 2476 2477 err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); 2478 if (unlikely(err)) 2479 goto out_unlock; 2480 2481 sci->sc_freesegs = kbufs[4]; 2482 sci->sc_nfreesegs = argv[4].v_nmembs; 2483 list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes); 2484 2485 for (;;) { 2486 err = nilfs_segctor_construct(sci, SC_LSEG_SR); 2487 nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes); 2488 2489 if (likely(!err)) 2490 break; 2491 2492 nilfs_warning(sb, __func__, 2493 "segment construction failed. (err=%d)", err); 2494 set_current_state(TASK_INTERRUPTIBLE); 2495 schedule_timeout(sci->sc_interval); 2496 } 2497 if (nilfs_test_opt(sbi, DISCARD)) { 2498 int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs, 2499 sci->sc_nfreesegs); 2500 if (ret) { 2501 printk(KERN_WARNING 2502 "NILFS warning: error %d on discard request, " 2503 "turning discards off for the device\n", ret); 2504 nilfs_clear_opt(sbi, DISCARD); 2505 } 2506 } 2507 2508 out_unlock: 2509 sci->sc_freesegs = NULL; 2510 sci->sc_nfreesegs = 0; 2511 nilfs_clear_gcdat_inode(nilfs); 2512 nilfs_transaction_unlock(sbi); 2513 return err; 2514} 2515 2516static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) 2517{ 2518 struct nilfs_sb_info *sbi = sci->sc_sbi; 2519 struct nilfs_transaction_info ti; 2520 2521 nilfs_transaction_lock(sbi, &ti, 0); 2522 nilfs_segctor_construct(sci, mode); 2523 2524 /* 2525 * Unclosed segment should be retried. We do this using sc_timer. 2526 * Timeout of sc_timer will invoke complete construction which leads 2527 * to close the current logical segment. 2528 */ 2529 if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) 2530 nilfs_segctor_start_timer(sci); 2531 2532 nilfs_transaction_unlock(sbi); 2533} 2534 2535static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) 2536{ 2537 int mode = 0; 2538 int err; 2539 2540 spin_lock(&sci->sc_state_lock); 2541 mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ? 2542 SC_FLUSH_DAT : SC_FLUSH_FILE; 2543 spin_unlock(&sci->sc_state_lock); 2544 2545 if (mode) { 2546 err = nilfs_segctor_do_construct(sci, mode); 2547 2548 spin_lock(&sci->sc_state_lock); 2549 sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ? 2550 ~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT; 2551 spin_unlock(&sci->sc_state_lock); 2552 } 2553 clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); 2554} 2555 2556static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) 2557{ 2558 if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || 2559 time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) { 2560 if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT)) 2561 return SC_FLUSH_FILE; 2562 else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT)) 2563 return SC_FLUSH_DAT; 2564 } 2565 return SC_LSEG_SR; 2566} 2567 2568/** 2569 * nilfs_segctor_thread - main loop of the segment constructor thread. 2570 * @arg: pointer to a struct nilfs_sc_info. 2571 * 2572 * nilfs_segctor_thread() initializes a timer and serves as a daemon 2573 * to execute segment constructions. 2574 */ 2575static int nilfs_segctor_thread(void *arg) 2576{ 2577 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; 2578 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 2579 int timeout = 0; 2580 2581 sci->sc_timer.data = (unsigned long)current; 2582 sci->sc_timer.function = nilfs_construction_timeout; 2583 2584 /* start sync. */ 2585 sci->sc_task = current; 2586 wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ 2587 printk(KERN_INFO 2588 "segctord starting. Construction interval = %lu seconds, " 2589 "CP frequency < %lu seconds\n", 2590 sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); 2591 2592 spin_lock(&sci->sc_state_lock); 2593 loop: 2594 for (;;) { 2595 int mode; 2596 2597 if (sci->sc_state & NILFS_SEGCTOR_QUIT) 2598 goto end_thread; 2599 2600 if (timeout || sci->sc_seq_request != sci->sc_seq_done) 2601 mode = SC_LSEG_SR; 2602 else if (!sci->sc_flush_request) 2603 break; 2604 else 2605 mode = nilfs_segctor_flush_mode(sci); 2606 2607 spin_unlock(&sci->sc_state_lock); 2608 nilfs_segctor_thread_construct(sci, mode); 2609 spin_lock(&sci->sc_state_lock); 2610 timeout = 0; 2611 } 2612 2613 2614 if (freezing(current)) { 2615 spin_unlock(&sci->sc_state_lock); 2616 refrigerator(); 2617 spin_lock(&sci->sc_state_lock); 2618 } else { 2619 DEFINE_WAIT(wait); 2620 int should_sleep = 1; 2621 2622 prepare_to_wait(&sci->sc_wait_daemon, &wait, 2623 TASK_INTERRUPTIBLE); 2624 2625 if (sci->sc_seq_request != sci->sc_seq_done) 2626 should_sleep = 0; 2627 else if (sci->sc_flush_request) 2628 should_sleep = 0; 2629 else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) 2630 should_sleep = time_before(jiffies, 2631 sci->sc_timer.expires); 2632 2633 if (should_sleep) { 2634 spin_unlock(&sci->sc_state_lock); 2635 schedule(); 2636 spin_lock(&sci->sc_state_lock); 2637 } 2638 finish_wait(&sci->sc_wait_daemon, &wait); 2639 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2640 time_after_eq(jiffies, sci->sc_timer.expires)); 2641 2642 if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) 2643 set_nilfs_discontinued(nilfs); 2644 } 2645 goto loop; 2646 2647 end_thread: 2648 spin_unlock(&sci->sc_state_lock); 2649 2650 /* end sync. */ 2651 sci->sc_task = NULL; 2652 wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ 2653 return 0; 2654} 2655 2656static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) 2657{ 2658 struct task_struct *t; 2659 2660 t = kthread_run(nilfs_segctor_thread, sci, "segctord"); 2661 if (IS_ERR(t)) { 2662 int err = PTR_ERR(t); 2663 2664 printk(KERN_ERR "NILFS: error %d creating segctord thread\n", 2665 err); 2666 return err; 2667 } 2668 wait_event(sci->sc_wait_task, sci->sc_task != NULL); 2669 return 0; 2670} 2671 2672static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) 2673{ 2674 sci->sc_state |= NILFS_SEGCTOR_QUIT; 2675 2676 while (sci->sc_task) { 2677 wake_up(&sci->sc_wait_daemon); 2678 spin_unlock(&sci->sc_state_lock); 2679 wait_event(sci->sc_wait_task, sci->sc_task == NULL); 2680 spin_lock(&sci->sc_state_lock); 2681 } 2682} 2683 2684/* 2685 * Setup & clean-up functions 2686 */ 2687static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) 2688{ 2689 struct nilfs_sc_info *sci; 2690 2691 sci = kzalloc(sizeof(*sci), GFP_KERNEL); 2692 if (!sci) 2693 return NULL; 2694 2695 sci->sc_sbi = sbi; 2696 sci->sc_super = sbi->s_super; 2697 2698 init_waitqueue_head(&sci->sc_wait_request); 2699 init_waitqueue_head(&sci->sc_wait_daemon); 2700 init_waitqueue_head(&sci->sc_wait_task); 2701 spin_lock_init(&sci->sc_state_lock); 2702 INIT_LIST_HEAD(&sci->sc_dirty_files); 2703 INIT_LIST_HEAD(&sci->sc_segbufs); 2704 INIT_LIST_HEAD(&sci->sc_write_logs); 2705 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2706 INIT_LIST_HEAD(&sci->sc_copied_buffers); 2707 init_timer(&sci->sc_timer); 2708 2709 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2710 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; 2711 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; 2712 2713 if (sbi->s_interval) 2714 sci->sc_interval = sbi->s_interval; 2715 if (sbi->s_watermark) 2716 sci->sc_watermark = sbi->s_watermark; 2717 return sci; 2718} 2719 2720static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) 2721{ 2722 int ret, retrycount = NILFS_SC_CLEANUP_RETRY; 2723 2724 /* The segctord thread was stopped and its timer was removed. 2725 But some tasks remain. */ 2726 do { 2727 struct nilfs_sb_info *sbi = sci->sc_sbi; 2728 struct nilfs_transaction_info ti; 2729 2730 nilfs_transaction_lock(sbi, &ti, 0); 2731 ret = nilfs_segctor_construct(sci, SC_LSEG_SR); 2732 nilfs_transaction_unlock(sbi); 2733 2734 } while (ret && retrycount-- > 0); 2735} 2736 2737/** 2738 * nilfs_segctor_destroy - destroy the segment constructor. 2739 * @sci: nilfs_sc_info 2740 * 2741 * nilfs_segctor_destroy() kills the segctord thread and frees 2742 * the nilfs_sc_info struct. 2743 * Caller must hold the segment semaphore. 2744 */ 2745static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) 2746{ 2747 struct nilfs_sb_info *sbi = sci->sc_sbi; 2748 int flag; 2749 2750 up_write(&sbi->s_nilfs->ns_segctor_sem); 2751 2752 spin_lock(&sci->sc_state_lock); 2753 nilfs_segctor_kill_thread(sci); 2754 flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request 2755 || sci->sc_seq_request != sci->sc_seq_done); 2756 spin_unlock(&sci->sc_state_lock); 2757 2758 if (flag || !nilfs_segctor_confirm(sci)) 2759 nilfs_segctor_write_out(sci); 2760 2761 WARN_ON(!list_empty(&sci->sc_copied_buffers)); 2762 2763 if (!list_empty(&sci->sc_dirty_files)) { 2764 nilfs_warning(sbi->s_super, __func__, 2765 "dirty file(s) after the final construction\n"); 2766 nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); 2767 } 2768 2769 WARN_ON(!list_empty(&sci->sc_segbufs)); 2770 WARN_ON(!list_empty(&sci->sc_write_logs)); 2771 2772 down_write(&sbi->s_nilfs->ns_segctor_sem); 2773 2774 del_timer_sync(&sci->sc_timer); 2775 kfree(sci); 2776} 2777 2778/** 2779 * nilfs_attach_segment_constructor - attach a segment constructor 2780 * @sbi: nilfs_sb_info 2781 * 2782 * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, 2783 * initializes it, and starts the segment constructor. 2784 * 2785 * Return Value: On success, 0 is returned. On error, one of the following 2786 * negative error code is returned. 2787 * 2788 * %-ENOMEM - Insufficient memory available. 2789 */ 2790int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi) 2791{ 2792 struct the_nilfs *nilfs = sbi->s_nilfs; 2793 int err; 2794 2795 if (NILFS_SC(sbi)) { 2796 /* 2797 * This happens if the filesystem was remounted 2798 * read/write after nilfs_error degenerated it into a 2799 * read-only mount. 2800 */ 2801 nilfs_detach_segment_constructor(sbi); 2802 } 2803 2804 sbi->s_sc_info = nilfs_segctor_new(sbi); 2805 if (!sbi->s_sc_info) 2806 return -ENOMEM; 2807 2808 nilfs_attach_writer(nilfs, sbi); 2809 err = nilfs_segctor_start_thread(NILFS_SC(sbi)); 2810 if (err) { 2811 nilfs_detach_writer(nilfs, sbi); 2812 kfree(sbi->s_sc_info); 2813 sbi->s_sc_info = NULL; 2814 } 2815 return err; 2816} 2817 2818/** 2819 * nilfs_detach_segment_constructor - destroy the segment constructor 2820 * @sbi: nilfs_sb_info 2821 * 2822 * nilfs_detach_segment_constructor() kills the segment constructor daemon, 2823 * frees the struct nilfs_sc_info, and destroy the dirty file list. 2824 */ 2825void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) 2826{ 2827 struct the_nilfs *nilfs = sbi->s_nilfs; 2828 LIST_HEAD(garbage_list); 2829 2830 down_write(&nilfs->ns_segctor_sem); 2831 if (NILFS_SC(sbi)) { 2832 nilfs_segctor_destroy(NILFS_SC(sbi)); 2833 sbi->s_sc_info = NULL; 2834 } 2835 2836 /* Force to free the list of dirty files */ 2837 spin_lock(&sbi->s_inode_lock); 2838 if (!list_empty(&sbi->s_dirty_files)) { 2839 list_splice_init(&sbi->s_dirty_files, &garbage_list); 2840 nilfs_warning(sbi->s_super, __func__, 2841 "Non empty dirty list after the last " 2842 "segment construction\n"); 2843 } 2844 spin_unlock(&sbi->s_inode_lock); 2845 up_write(&nilfs->ns_segctor_sem); 2846 2847 nilfs_dispose_list(sbi, &garbage_list, 1); 2848 nilfs_detach_writer(nilfs, sbi); 2849} 2850