1// SPDX-License-Identifier: GPL-2.0+ 2/* 3 * This file is part of UBIFS. 4 * 5 * Copyright (C) 2006-2008 Nokia Corporation. 6 * 7 * Authors: Adrian Hunter 8 * Artem Bityutskiy (���������������� ����������) 9 */ 10 11/* 12 * This file contains journal replay code. It runs when the file-system is being 13 * mounted and requires no locking. 14 * 15 * The larger is the journal, the longer it takes to scan it, so the longer it 16 * takes to mount UBIFS. This is why the journal has limited size which may be 17 * changed depending on the system requirements. But a larger journal gives 18 * faster I/O speed because it writes the index less frequently. So this is a 19 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 20 * larger is the journal, the more memory its index may consume. 21 */ 22 23#ifdef __UBOOT__ 24#include <log.h> 25#include <dm/devres.h> 26#include <linux/compat.h> 27#include <linux/err.h> 28#endif 29#include "ubifs.h" 30#include <linux/bug.h> 31#include <linux/list_sort.h> 32 33/** 34 * struct replay_entry - replay list entry. 35 * @lnum: logical eraseblock number of the node 36 * @offs: node offset 37 * @len: node length 38 * @deletion: non-zero if this entry corresponds to a node deletion 39 * @sqnum: node sequence number 40 * @list: links the replay list 41 * @key: node key 42 * @nm: directory entry name 43 * @old_size: truncation old size 44 * @new_size: truncation new size 45 * 46 * The replay process first scans all buds and builds the replay list, then 47 * sorts the replay list in nodes sequence number order, and then inserts all 48 * the replay entries to the TNC. 49 */ 50struct replay_entry { 51 int lnum; 52 int offs; 53 int len; 54 unsigned int deletion:1; 55 unsigned long long sqnum; 56 struct list_head list; 57 union ubifs_key key; 58 union { 59 struct qstr nm; 60 struct { 61 loff_t old_size; 62 loff_t new_size; 63 }; 64 }; 65}; 66 67/** 68 * struct bud_entry - entry in the list of buds to replay. 69 * @list: next bud in the list 70 * @bud: bud description object 71 * @sqnum: reference node sequence number 72 * @free: free bytes in the bud 73 * @dirty: dirty bytes in the bud 74 */ 75struct bud_entry { 76 struct list_head list; 77 struct ubifs_bud *bud; 78 unsigned long long sqnum; 79 int free; 80 int dirty; 81}; 82 83/** 84 * set_bud_lprops - set free and dirty space used by a bud. 85 * @c: UBIFS file-system description object 86 * @b: bud entry which describes the bud 87 * 88 * This function makes sure the LEB properties of bud @b are set correctly 89 * after the replay. Returns zero in case of success and a negative error code 90 * in case of failure. 91 */ 92static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) 93{ 94 const struct ubifs_lprops *lp; 95 int err = 0, dirty; 96 97 ubifs_get_lprops(c); 98 99 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); 100 if (IS_ERR(lp)) { 101 err = PTR_ERR(lp); 102 goto out; 103 } 104 105 dirty = lp->dirty; 106 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 107 /* 108 * The LEB was added to the journal with a starting offset of 109 * zero which means the LEB must have been empty. The LEB 110 * property values should be @lp->free == @c->leb_size and 111 * @lp->dirty == 0, but that is not the case. The reason is that 112 * the LEB had been garbage collected before it became the bud, 113 * and there was not commit inbetween. The garbage collector 114 * resets the free and dirty space without recording it 115 * anywhere except lprops, so if there was no commit then 116 * lprops does not have that information. 117 * 118 * We do not need to adjust free space because the scan has told 119 * us the exact value which is recorded in the replay entry as 120 * @b->free. 121 * 122 * However we do need to subtract from the dirty space the 123 * amount of space that the garbage collector reclaimed, which 124 * is the whole LEB minus the amount of space that was free. 125 */ 126 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 127 lp->free, lp->dirty); 128 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 129 lp->free, lp->dirty); 130 dirty -= c->leb_size - lp->free; 131 /* 132 * If the replay order was perfect the dirty space would now be 133 * zero. The order is not perfect because the journal heads 134 * race with each other. This is not a problem but is does mean 135 * that the dirty space may temporarily exceed c->leb_size 136 * during the replay. 137 */ 138 if (dirty != 0) 139 dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", 140 b->bud->lnum, lp->free, lp->dirty, b->free, 141 b->dirty); 142 } 143 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, 144 lp->flags | LPROPS_TAKEN, 0); 145 if (IS_ERR(lp)) { 146 err = PTR_ERR(lp); 147 goto out; 148 } 149 150 /* Make sure the journal head points to the latest bud */ 151 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, 152 b->bud->lnum, c->leb_size - b->free); 153 154out: 155 ubifs_release_lprops(c); 156 return err; 157} 158 159/** 160 * set_buds_lprops - set free and dirty space for all replayed buds. 161 * @c: UBIFS file-system description object 162 * 163 * This function sets LEB properties for all replayed buds. Returns zero in 164 * case of success and a negative error code in case of failure. 165 */ 166static int set_buds_lprops(struct ubifs_info *c) 167{ 168 struct bud_entry *b; 169 int err; 170 171 list_for_each_entry(b, &c->replay_buds, list) { 172 err = set_bud_lprops(c, b); 173 if (err) 174 return err; 175 } 176 177 return 0; 178} 179 180/** 181 * trun_remove_range - apply a replay entry for a truncation to the TNC. 182 * @c: UBIFS file-system description object 183 * @r: replay entry of truncation 184 */ 185static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 186{ 187 unsigned min_blk, max_blk; 188 union ubifs_key min_key, max_key; 189 ino_t ino; 190 191 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 192 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 193 min_blk += 1; 194 195 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 196 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 197 max_blk -= 1; 198 199 ino = key_inum(c, &r->key); 200 201 data_key_init(c, &min_key, ino, min_blk); 202 data_key_init(c, &max_key, ino, max_blk); 203 204 return ubifs_tnc_remove_range(c, &min_key, &max_key); 205} 206 207/** 208 * apply_replay_entry - apply a replay entry to the TNC. 209 * @c: UBIFS file-system description object 210 * @r: replay entry to apply 211 * 212 * Apply a replay entry to the TNC. 213 */ 214static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 215{ 216 int err; 217 218 dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", 219 r->lnum, r->offs, r->len, r->deletion, r->sqnum); 220 221 /* Set c->replay_sqnum to help deal with dangling branches. */ 222 c->replay_sqnum = r->sqnum; 223 224 if (is_hash_key(c, &r->key)) { 225 if (r->deletion) 226 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 227 else 228 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 229 r->len, &r->nm); 230 } else { 231 if (r->deletion) 232 switch (key_type(c, &r->key)) { 233 case UBIFS_INO_KEY: 234 { 235 ino_t inum = key_inum(c, &r->key); 236 237 err = ubifs_tnc_remove_ino(c, inum); 238 break; 239 } 240 case UBIFS_TRUN_KEY: 241 err = trun_remove_range(c, r); 242 break; 243 default: 244 err = ubifs_tnc_remove(c, &r->key); 245 break; 246 } 247 else 248 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 249 r->len); 250 if (err) 251 return err; 252 253 if (c->need_recovery) 254 err = ubifs_recover_size_accum(c, &r->key, r->deletion, 255 r->new_size); 256 } 257 258 return err; 259} 260 261/** 262 * replay_entries_cmp - compare 2 replay entries. 263 * @priv: UBIFS file-system description object 264 * @a: first replay entry 265 * @a: second replay entry 266 * 267 * This is a comparios function for 'list_sort()' which compares 2 replay 268 * entries @a and @b by comparing their sequence numer. Returns %1 if @a has 269 * greater sequence number and %-1 otherwise. 270 */ 271static int replay_entries_cmp(void *priv, struct list_head *a, 272 struct list_head *b) 273{ 274 struct replay_entry *ra, *rb; 275 276 cond_resched(); 277 if (a == b) 278 return 0; 279 280 ra = list_entry(a, struct replay_entry, list); 281 rb = list_entry(b, struct replay_entry, list); 282 ubifs_assert(ra->sqnum != rb->sqnum); 283 if (ra->sqnum > rb->sqnum) 284 return 1; 285 return -1; 286} 287 288/** 289 * apply_replay_list - apply the replay list to the TNC. 290 * @c: UBIFS file-system description object 291 * 292 * Apply all entries in the replay list to the TNC. Returns zero in case of 293 * success and a negative error code in case of failure. 294 */ 295static int apply_replay_list(struct ubifs_info *c) 296{ 297 struct replay_entry *r; 298 int err; 299 300 list_sort(c, &c->replay_list, &replay_entries_cmp); 301 302 list_for_each_entry(r, &c->replay_list, list) { 303 cond_resched(); 304 305 err = apply_replay_entry(c, r); 306 if (err) 307 return err; 308 } 309 310 return 0; 311} 312 313/** 314 * destroy_replay_list - destroy the replay. 315 * @c: UBIFS file-system description object 316 * 317 * Destroy the replay list. 318 */ 319static void destroy_replay_list(struct ubifs_info *c) 320{ 321 struct replay_entry *r, *tmp; 322 323 list_for_each_entry_safe(r, tmp, &c->replay_list, list) { 324 if (is_hash_key(c, &r->key)) 325 kfree(r->nm.name); 326 list_del(&r->list); 327 kfree(r); 328 } 329} 330 331/** 332 * insert_node - insert a node to the replay list 333 * @c: UBIFS file-system description object 334 * @lnum: node logical eraseblock number 335 * @offs: node offset 336 * @len: node length 337 * @key: node key 338 * @sqnum: sequence number 339 * @deletion: non-zero if this is a deletion 340 * @used: number of bytes in use in a LEB 341 * @old_size: truncation old size 342 * @new_size: truncation new size 343 * 344 * This function inserts a scanned non-direntry node to the replay list. The 345 * replay list contains @struct replay_entry elements, and we sort this list in 346 * sequence number order before applying it. The replay list is applied at the 347 * very end of the replay process. Since the list is sorted in sequence number 348 * order, the older modifications are applied first. This function returns zero 349 * in case of success and a negative error code in case of failure. 350 */ 351static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 352 union ubifs_key *key, unsigned long long sqnum, 353 int deletion, int *used, loff_t old_size, 354 loff_t new_size) 355{ 356 struct replay_entry *r; 357 358 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 359 360 if (key_inum(c, key) >= c->highest_inum) 361 c->highest_inum = key_inum(c, key); 362 363 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 364 if (!r) 365 return -ENOMEM; 366 367 if (!deletion) 368 *used += ALIGN(len, 8); 369 r->lnum = lnum; 370 r->offs = offs; 371 r->len = len; 372 r->deletion = !!deletion; 373 r->sqnum = sqnum; 374 key_copy(c, key, &r->key); 375 r->old_size = old_size; 376 r->new_size = new_size; 377 378 list_add_tail(&r->list, &c->replay_list); 379 return 0; 380} 381 382/** 383 * insert_dent - insert a directory entry node into the replay list. 384 * @c: UBIFS file-system description object 385 * @lnum: node logical eraseblock number 386 * @offs: node offset 387 * @len: node length 388 * @key: node key 389 * @name: directory entry name 390 * @nlen: directory entry name length 391 * @sqnum: sequence number 392 * @deletion: non-zero if this is a deletion 393 * @used: number of bytes in use in a LEB 394 * 395 * This function inserts a scanned directory entry node or an extended 396 * attribute entry to the replay list. Returns zero in case of success and a 397 * negative error code in case of failure. 398 */ 399static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 400 union ubifs_key *key, const char *name, int nlen, 401 unsigned long long sqnum, int deletion, int *used) 402{ 403 struct replay_entry *r; 404 char *nbuf; 405 406 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 407 if (key_inum(c, key) >= c->highest_inum) 408 c->highest_inum = key_inum(c, key); 409 410 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 411 if (!r) 412 return -ENOMEM; 413 414 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 415 if (!nbuf) { 416 kfree(r); 417 return -ENOMEM; 418 } 419 420 if (!deletion) 421 *used += ALIGN(len, 8); 422 r->lnum = lnum; 423 r->offs = offs; 424 r->len = len; 425 r->deletion = !!deletion; 426 r->sqnum = sqnum; 427 key_copy(c, key, &r->key); 428 r->nm.len = nlen; 429 memcpy(nbuf, name, nlen); 430 nbuf[nlen] = '\0'; 431 r->nm.name = nbuf; 432 433 list_add_tail(&r->list, &c->replay_list); 434 return 0; 435} 436 437/** 438 * ubifs_validate_entry - validate directory or extended attribute entry node. 439 * @c: UBIFS file-system description object 440 * @dent: the node to validate 441 * 442 * This function validates directory or extended attribute entry node @dent. 443 * Returns zero if the node is all right and a %-EINVAL if not. 444 */ 445int ubifs_validate_entry(struct ubifs_info *c, 446 const struct ubifs_dent_node *dent) 447{ 448 int key_type = key_type_flash(c, dent->key); 449 int nlen = le16_to_cpu(dent->nlen); 450 451 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 452 dent->type >= UBIFS_ITYPES_CNT || 453 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 454 strnlen(dent->name, nlen) != nlen || 455 le64_to_cpu(dent->inum) > MAX_INUM) { 456 ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ? 457 "directory entry" : "extended attribute entry"); 458 return -EINVAL; 459 } 460 461 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 462 ubifs_err(c, "bad key type %d", key_type); 463 return -EINVAL; 464 } 465 466 return 0; 467} 468 469/** 470 * is_last_bud - check if the bud is the last in the journal head. 471 * @c: UBIFS file-system description object 472 * @bud: bud description object 473 * 474 * This function checks if bud @bud is the last bud in its journal head. This 475 * information is then used by 'replay_bud()' to decide whether the bud can 476 * have corruptions or not. Indeed, only last buds can be corrupted by power 477 * cuts. Returns %1 if this is the last bud, and %0 if not. 478 */ 479static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) 480{ 481 struct ubifs_jhead *jh = &c->jheads[bud->jhead]; 482 struct ubifs_bud *next; 483 uint32_t data; 484 int err; 485 486 if (list_is_last(&bud->list, &jh->buds_list)) 487 return 1; 488 489 /* 490 * The following is a quirk to make sure we work correctly with UBIFS 491 * images used with older UBIFS. 492 * 493 * Normally, the last bud will be the last in the journal head's list 494 * of bud. However, there is one exception if the UBIFS image belongs 495 * to older UBIFS. This is fairly unlikely: one would need to use old 496 * UBIFS, then have a power cut exactly at the right point, and then 497 * try to mount this image with new UBIFS. 498 * 499 * The exception is: it is possible to have 2 buds A and B, A goes 500 * before B, and B is the last, bud B is contains no data, and bud A is 501 * corrupted at the end. The reason is that in older versions when the 502 * journal code switched the next bud (from A to B), it first added a 503 * log reference node for the new bud (B), and only after this it 504 * synchronized the write-buffer of current bud (A). But later this was 505 * changed and UBIFS started to always synchronize the write-buffer of 506 * the bud (A) before writing the log reference for the new bud (B). 507 * 508 * But because older UBIFS always synchronized A's write-buffer before 509 * writing to B, we can recognize this exceptional situation but 510 * checking the contents of bud B - if it is empty, then A can be 511 * treated as the last and we can recover it. 512 * 513 * TODO: remove this piece of code in a couple of years (today it is 514 * 16.05.2011). 515 */ 516 next = list_entry(bud->list.next, struct ubifs_bud, list); 517 if (!list_is_last(&next->list, &jh->buds_list)) 518 return 0; 519 520 err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); 521 if (err) 522 return 0; 523 524 return data == 0xFFFFFFFF; 525} 526 527/** 528 * replay_bud - replay a bud logical eraseblock. 529 * @c: UBIFS file-system description object 530 * @b: bud entry which describes the bud 531 * 532 * This function replays bud @bud, recovers it if needed, and adds all nodes 533 * from this bud to the replay list. Returns zero in case of success and a 534 * negative error code in case of failure. 535 */ 536static int replay_bud(struct ubifs_info *c, struct bud_entry *b) 537{ 538 int is_last = is_last_bud(c, b->bud); 539 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; 540 struct ubifs_scan_leb *sleb; 541 struct ubifs_scan_node *snod; 542 543 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", 544 lnum, b->bud->jhead, offs, is_last); 545 546 if (c->need_recovery && is_last) 547 /* 548 * Recover only last LEBs in the journal heads, because power 549 * cuts may cause corruptions only in these LEBs, because only 550 * these LEBs could possibly be written to at the power cut 551 * time. 552 */ 553 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); 554 else 555 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 556 if (IS_ERR(sleb)) 557 return PTR_ERR(sleb); 558 559 /* 560 * The bud does not have to start from offset zero - the beginning of 561 * the 'lnum' LEB may contain previously committed data. One of the 562 * things we have to do in replay is to correctly update lprops with 563 * newer information about this LEB. 564 * 565 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 566 * bytes of free space because it only contain information about 567 * committed data. 568 * 569 * But we know that real amount of free space is 'c->leb_size - 570 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 571 * 'sleb->endpt' is used by bud data. We have to correctly calculate 572 * how much of these data are dirty and update lprops with this 573 * information. 574 * 575 * The dirt in that LEB region is comprised of padding nodes, deletion 576 * nodes, truncation nodes and nodes which are obsoleted by subsequent 577 * nodes in this LEB. So instead of calculating clean space, we 578 * calculate used space ('used' variable). 579 */ 580 581 list_for_each_entry(snod, &sleb->nodes, list) { 582 int deletion = 0; 583 584 cond_resched(); 585 586 if (snod->sqnum >= SQNUM_WATERMARK) { 587 ubifs_err(c, "file system's life ended"); 588 goto out_dump; 589 } 590 591 if (snod->sqnum > c->max_sqnum) 592 c->max_sqnum = snod->sqnum; 593 594 switch (snod->type) { 595 case UBIFS_INO_NODE: 596 { 597 struct ubifs_ino_node *ino = snod->node; 598 loff_t new_size = le64_to_cpu(ino->size); 599 600 if (le32_to_cpu(ino->nlink) == 0) 601 deletion = 1; 602 err = insert_node(c, lnum, snod->offs, snod->len, 603 &snod->key, snod->sqnum, deletion, 604 &used, 0, new_size); 605 break; 606 } 607 case UBIFS_DATA_NODE: 608 { 609 struct ubifs_data_node *dn = snod->node; 610 loff_t new_size = le32_to_cpu(dn->size) + 611 key_block(c, &snod->key) * 612 UBIFS_BLOCK_SIZE; 613 614 err = insert_node(c, lnum, snod->offs, snod->len, 615 &snod->key, snod->sqnum, deletion, 616 &used, 0, new_size); 617 break; 618 } 619 case UBIFS_DENT_NODE: 620 case UBIFS_XENT_NODE: 621 { 622 struct ubifs_dent_node *dent = snod->node; 623 624 err = ubifs_validate_entry(c, dent); 625 if (err) 626 goto out_dump; 627 628 err = insert_dent(c, lnum, snod->offs, snod->len, 629 &snod->key, dent->name, 630 le16_to_cpu(dent->nlen), snod->sqnum, 631 !le64_to_cpu(dent->inum), &used); 632 break; 633 } 634 case UBIFS_TRUN_NODE: 635 { 636 struct ubifs_trun_node *trun = snod->node; 637 loff_t old_size = le64_to_cpu(trun->old_size); 638 loff_t new_size = le64_to_cpu(trun->new_size); 639 union ubifs_key key; 640 641 /* Validate truncation node */ 642 if (old_size < 0 || old_size > c->max_inode_sz || 643 new_size < 0 || new_size > c->max_inode_sz || 644 old_size <= new_size) { 645 ubifs_err(c, "bad truncation node"); 646 goto out_dump; 647 } 648 649 /* 650 * Create a fake truncation key just to use the same 651 * functions which expect nodes to have keys. 652 */ 653 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 654 err = insert_node(c, lnum, snod->offs, snod->len, 655 &key, snod->sqnum, 1, &used, 656 old_size, new_size); 657 break; 658 } 659 default: 660 ubifs_err(c, "unexpected node type %d in bud LEB %d:%d", 661 snod->type, lnum, snod->offs); 662 err = -EINVAL; 663 goto out_dump; 664 } 665 if (err) 666 goto out; 667 } 668 669 ubifs_assert(ubifs_search_bud(c, lnum)); 670 ubifs_assert(sleb->endpt - offs >= used); 671 ubifs_assert(sleb->endpt % c->min_io_size == 0); 672 673 b->dirty = sleb->endpt - offs - used; 674 b->free = c->leb_size - sleb->endpt; 675 dbg_mnt("bud LEB %d replied: dirty %d, free %d", 676 lnum, b->dirty, b->free); 677 678out: 679 ubifs_scan_destroy(sleb); 680 return err; 681 682out_dump: 683 ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); 684 ubifs_dump_node(c, snod->node); 685 ubifs_scan_destroy(sleb); 686 return -EINVAL; 687} 688 689/** 690 * replay_buds - replay all buds. 691 * @c: UBIFS file-system description object 692 * 693 * This function returns zero in case of success and a negative error code in 694 * case of failure. 695 */ 696static int replay_buds(struct ubifs_info *c) 697{ 698 struct bud_entry *b; 699 int err; 700 unsigned long long prev_sqnum = 0; 701 702 list_for_each_entry(b, &c->replay_buds, list) { 703 err = replay_bud(c, b); 704 if (err) 705 return err; 706 707 ubifs_assert(b->sqnum > prev_sqnum); 708 prev_sqnum = b->sqnum; 709 } 710 711 return 0; 712} 713 714/** 715 * destroy_bud_list - destroy the list of buds to replay. 716 * @c: UBIFS file-system description object 717 */ 718static void destroy_bud_list(struct ubifs_info *c) 719{ 720 struct bud_entry *b; 721 722 while (!list_empty(&c->replay_buds)) { 723 b = list_entry(c->replay_buds.next, struct bud_entry, list); 724 list_del(&b->list); 725 kfree(b); 726 } 727} 728 729/** 730 * add_replay_bud - add a bud to the list of buds to replay. 731 * @c: UBIFS file-system description object 732 * @lnum: bud logical eraseblock number to replay 733 * @offs: bud start offset 734 * @jhead: journal head to which this bud belongs 735 * @sqnum: reference node sequence number 736 * 737 * This function returns zero in case of success and a negative error code in 738 * case of failure. 739 */ 740static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 741 unsigned long long sqnum) 742{ 743 struct ubifs_bud *bud; 744 struct bud_entry *b; 745 746 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 747 748 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 749 if (!bud) 750 return -ENOMEM; 751 752 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 753 if (!b) { 754 kfree(bud); 755 return -ENOMEM; 756 } 757 758 bud->lnum = lnum; 759 bud->start = offs; 760 bud->jhead = jhead; 761 ubifs_add_bud(c, bud); 762 763 b->bud = bud; 764 b->sqnum = sqnum; 765 list_add_tail(&b->list, &c->replay_buds); 766 767 return 0; 768} 769 770/** 771 * validate_ref - validate a reference node. 772 * @c: UBIFS file-system description object 773 * @ref: the reference node to validate 774 * @ref_lnum: LEB number of the reference node 775 * @ref_offs: reference node offset 776 * 777 * This function returns %1 if a bud reference already exists for the LEB. %0 is 778 * returned if the reference node is new, otherwise %-EINVAL is returned if 779 * validation failed. 780 */ 781static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 782{ 783 struct ubifs_bud *bud; 784 int lnum = le32_to_cpu(ref->lnum); 785 unsigned int offs = le32_to_cpu(ref->offs); 786 unsigned int jhead = le32_to_cpu(ref->jhead); 787 788 /* 789 * ref->offs may point to the end of LEB when the journal head points 790 * to the end of LEB and we write reference node for it during commit. 791 * So this is why we require 'offs > c->leb_size'. 792 */ 793 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 794 lnum < c->main_first || offs > c->leb_size || 795 offs & (c->min_io_size - 1)) 796 return -EINVAL; 797 798 /* Make sure we have not already looked at this bud */ 799 bud = ubifs_search_bud(c, lnum); 800 if (bud) { 801 if (bud->jhead == jhead && bud->start <= offs) 802 return 1; 803 ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs); 804 return -EINVAL; 805 } 806 807 return 0; 808} 809 810/** 811 * replay_log_leb - replay a log logical eraseblock. 812 * @c: UBIFS file-system description object 813 * @lnum: log logical eraseblock to replay 814 * @offs: offset to start replaying from 815 * @sbuf: scan buffer 816 * 817 * This function replays a log LEB and returns zero in case of success, %1 if 818 * this is the last LEB in the log, and a negative error code in case of 819 * failure. 820 */ 821static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 822{ 823 int err; 824 struct ubifs_scan_leb *sleb; 825 struct ubifs_scan_node *snod; 826 const struct ubifs_cs_node *node; 827 828 dbg_mnt("replay log LEB %d:%d", lnum, offs); 829 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); 830 if (IS_ERR(sleb)) { 831 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 832 return PTR_ERR(sleb); 833 /* 834 * Note, the below function will recover this log LEB only if 835 * it is the last, because unclean reboots can possibly corrupt 836 * only the tail of the log. 837 */ 838 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 839 if (IS_ERR(sleb)) 840 return PTR_ERR(sleb); 841 } 842 843 if (sleb->nodes_cnt == 0) { 844 err = 1; 845 goto out; 846 } 847 848 node = sleb->buf; 849 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 850 if (c->cs_sqnum == 0) { 851 /* 852 * This is the first log LEB we are looking at, make sure that 853 * the first node is a commit start node. Also record its 854 * sequence number so that UBIFS can determine where the log 855 * ends, because all nodes which were have higher sequence 856 * numbers. 857 */ 858 if (snod->type != UBIFS_CS_NODE) { 859 ubifs_err(c, "first log node at LEB %d:%d is not CS node", 860 lnum, offs); 861 goto out_dump; 862 } 863 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 864 ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu", 865 lnum, offs, 866 (unsigned long long)le64_to_cpu(node->cmt_no), 867 c->cmt_no); 868 goto out_dump; 869 } 870 871 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 872 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 873 } 874 875 if (snod->sqnum < c->cs_sqnum) { 876 /* 877 * This means that we reached end of log and now 878 * look to the older log data, which was already 879 * committed but the eraseblock was not erased (UBIFS 880 * only un-maps it). So this basically means we have to 881 * exit with "end of log" code. 882 */ 883 err = 1; 884 goto out; 885 } 886 887 /* Make sure the first node sits at offset zero of the LEB */ 888 if (snod->offs != 0) { 889 ubifs_err(c, "first node is not at zero offset"); 890 goto out_dump; 891 } 892 893 list_for_each_entry(snod, &sleb->nodes, list) { 894 cond_resched(); 895 896 if (snod->sqnum >= SQNUM_WATERMARK) { 897 ubifs_err(c, "file system's life ended"); 898 goto out_dump; 899 } 900 901 if (snod->sqnum < c->cs_sqnum) { 902 ubifs_err(c, "bad sqnum %llu, commit sqnum %llu", 903 snod->sqnum, c->cs_sqnum); 904 goto out_dump; 905 } 906 907 if (snod->sqnum > c->max_sqnum) 908 c->max_sqnum = snod->sqnum; 909 910 switch (snod->type) { 911 case UBIFS_REF_NODE: { 912 const struct ubifs_ref_node *ref = snod->node; 913 914 err = validate_ref(c, ref); 915 if (err == 1) 916 break; /* Already have this bud */ 917 if (err) 918 goto out_dump; 919 920 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 921 le32_to_cpu(ref->offs), 922 le32_to_cpu(ref->jhead), 923 snod->sqnum); 924 if (err) 925 goto out; 926 927 break; 928 } 929 case UBIFS_CS_NODE: 930 /* Make sure it sits at the beginning of LEB */ 931 if (snod->offs != 0) { 932 ubifs_err(c, "unexpected node in log"); 933 goto out_dump; 934 } 935 break; 936 default: 937 ubifs_err(c, "unexpected node in log"); 938 goto out_dump; 939 } 940 } 941 942 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 943 c->lhead_lnum = lnum; 944 c->lhead_offs = sleb->endpt; 945 } 946 947 err = !sleb->endpt; 948out: 949 ubifs_scan_destroy(sleb); 950 return err; 951 952out_dump: 953 ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", 954 lnum, offs + snod->offs); 955 ubifs_dump_node(c, snod->node); 956 ubifs_scan_destroy(sleb); 957 return -EINVAL; 958} 959 960/** 961 * take_ihead - update the status of the index head in lprops to 'taken'. 962 * @c: UBIFS file-system description object 963 * 964 * This function returns the amount of free space in the index head LEB or a 965 * negative error code. 966 */ 967static int take_ihead(struct ubifs_info *c) 968{ 969 const struct ubifs_lprops *lp; 970 int err, free; 971 972 ubifs_get_lprops(c); 973 974 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 975 if (IS_ERR(lp)) { 976 err = PTR_ERR(lp); 977 goto out; 978 } 979 980 free = lp->free; 981 982 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 983 lp->flags | LPROPS_TAKEN, 0); 984 if (IS_ERR(lp)) { 985 err = PTR_ERR(lp); 986 goto out; 987 } 988 989 err = free; 990out: 991 ubifs_release_lprops(c); 992 return err; 993} 994 995/** 996 * ubifs_replay_journal - replay journal. 997 * @c: UBIFS file-system description object 998 * 999 * This function scans the journal, replays and cleans it up. It makes sure all 1000 * memory data structures related to uncommitted journal are built (dirty TNC 1001 * tree, tree of buds, modified lprops, etc). 1002 */ 1003int ubifs_replay_journal(struct ubifs_info *c) 1004{ 1005 int err, lnum, free; 1006 1007 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1008 1009 /* Update the status of the index head in lprops to 'taken' */ 1010 free = take_ihead(c); 1011 if (free < 0) 1012 return free; /* Error code */ 1013 1014 if (c->ihead_offs != c->leb_size - free) { 1015 ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum, 1016 c->ihead_offs); 1017 return -EINVAL; 1018 } 1019 1020 dbg_mnt("start replaying the journal"); 1021 c->replaying = 1; 1022 lnum = c->ltail_lnum = c->lhead_lnum; 1023 1024 do { 1025 err = replay_log_leb(c, lnum, 0, c->sbuf); 1026 if (err == 1) { 1027 if (lnum != c->lhead_lnum) 1028 /* We hit the end of the log */ 1029 break; 1030 1031 /* 1032 * The head of the log must always start with the 1033 * "commit start" node on a properly formatted UBIFS. 1034 * But we found no nodes at all, which means that 1035 * someting went wrong and we cannot proceed mounting 1036 * the file-system. 1037 */ 1038 ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", 1039 lnum, 0); 1040 err = -EINVAL; 1041 } 1042 if (err) 1043 goto out; 1044 lnum = ubifs_next_log_lnum(c, lnum); 1045 } while (lnum != c->ltail_lnum); 1046 1047 err = replay_buds(c); 1048 if (err) 1049 goto out; 1050 1051 err = apply_replay_list(c); 1052 if (err) 1053 goto out; 1054 1055 err = set_buds_lprops(c); 1056 if (err) 1057 goto out; 1058 1059 /* 1060 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable 1061 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs 1062 * depend on it. This means we have to initialize it to make sure 1063 * budgeting works properly. 1064 */ 1065 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1066 c->bi.uncommitted_idx *= c->max_idx_node_sz; 1067 1068 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1069 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", 1070 c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1071 (unsigned long)c->highest_inum); 1072out: 1073 destroy_replay_list(c); 1074 destroy_bud_list(c); 1075 c->replaying = 0; 1076 return err; 1077} 1078