1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 *          Artem Bityutskiy (���������������� ����������)
21 */
22
23/*
24 * This file contains journal replay code. It runs when the file-system is being
25 * mounted and requires no locking.
26 *
27 * The larger is the journal, the longer it takes to scan it, so the longer it
28 * takes to mount UBIFS. This is why the journal has limited size which may be
29 * changed depending on the system requirements. But a larger journal gives
30 * faster I/O speed because it writes the index less frequently. So this is a
31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
32 * larger is the journal, the more memory its index may consume.
33 */
34
35#include "ubifs.h"
36
37/*
38 * Replay flags.
39 *
40 * REPLAY_DELETION: node was deleted
41 * REPLAY_REF: node is a reference node
42 */
43enum {
44	REPLAY_DELETION = 1,
45	REPLAY_REF = 2,
46};
47
48/**
49 * struct replay_entry - replay tree entry.
50 * @lnum: logical eraseblock number of the node
51 * @offs: node offset
52 * @len: node length
53 * @sqnum: node sequence number
54 * @flags: replay flags
55 * @rb: links the replay tree
56 * @key: node key
57 * @nm: directory entry name
58 * @old_size: truncation old size
59 * @new_size: truncation new size
60 * @free: amount of free space in a bud
61 * @dirty: amount of dirty space in a bud from padding and deletion nodes
62 *
63 * UBIFS journal replay must compare node sequence numbers, which means it must
64 * build a tree of node information to insert into the TNC.
65 */
66struct replay_entry {
67	int lnum;
68	int offs;
69	int len;
70	unsigned long long sqnum;
71	int flags;
72	struct rb_node rb;
73	union ubifs_key key;
74	union {
75		struct qstr nm;
76		struct {
77			loff_t old_size;
78			loff_t new_size;
79		};
80		struct {
81			int free;
82			int dirty;
83		};
84	};
85};
86
87/**
88 * struct bud_entry - entry in the list of buds to replay.
89 * @list: next bud in the list
90 * @bud: bud description object
91 * @free: free bytes in the bud
92 * @sqnum: reference node sequence number
93 */
94struct bud_entry {
95	struct list_head list;
96	struct ubifs_bud *bud;
97	int free;
98	unsigned long long sqnum;
99};
100
101/**
102 * set_bud_lprops - set free and dirty space used by a bud.
103 * @c: UBIFS file-system description object
104 * @r: replay entry of bud
105 */
106static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
107{
108	const struct ubifs_lprops *lp;
109	int err = 0, dirty;
110
111	ubifs_get_lprops(c);
112
113	lp = ubifs_lpt_lookup_dirty(c, r->lnum);
114	if (IS_ERR(lp)) {
115		err = PTR_ERR(lp);
116		goto out;
117	}
118
119	dirty = lp->dirty;
120	if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
121		/*
122		 * The LEB was added to the journal with a starting offset of
123		 * zero which means the LEB must have been empty. The LEB
124		 * property values should be lp->free == c->leb_size and
125		 * lp->dirty == 0, but that is not the case. The reason is that
126		 * the LEB was garbage collected. The garbage collector resets
127		 * the free and dirty space without recording it anywhere except
128		 * lprops, so if there is not a commit then lprops does not have
129		 * that information next time the file system is mounted.
130		 *
131		 * We do not need to adjust free space because the scan has told
132		 * us the exact value which is recorded in the replay entry as
133		 * r->free.
134		 *
135		 * However we do need to subtract from the dirty space the
136		 * amount of space that the garbage collector reclaimed, which
137		 * is the whole LEB minus the amount of space that was free.
138		 */
139		dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
140			lp->free, lp->dirty);
141		dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
142			lp->free, lp->dirty);
143		dirty -= c->leb_size - lp->free;
144		/*
145		 * If the replay order was perfect the dirty space would now be
146		 * zero. The order is not perfect because the journal heads
147		 * race with each other. This is not a problem but is does mean
148		 * that the dirty space may temporarily exceed c->leb_size
149		 * during the replay.
150		 */
151		if (dirty != 0)
152			dbg_msg("LEB %d lp: %d free %d dirty "
153				"replay: %d free %d dirty", r->lnum, lp->free,
154				lp->dirty, r->free, r->dirty);
155	}
156	lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty,
157			     lp->flags | LPROPS_TAKEN, 0);
158	if (IS_ERR(lp)) {
159		err = PTR_ERR(lp);
160		goto out;
161	}
162out:
163	ubifs_release_lprops(c);
164	return err;
165}
166
167/**
168 * trun_remove_range - apply a replay entry for a truncation to the TNC.
169 * @c: UBIFS file-system description object
170 * @r: replay entry of truncation
171 */
172static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
173{
174	unsigned min_blk, max_blk;
175	union ubifs_key min_key, max_key;
176	ino_t ino;
177
178	min_blk = r->new_size / UBIFS_BLOCK_SIZE;
179	if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
180		min_blk += 1;
181
182	max_blk = r->old_size / UBIFS_BLOCK_SIZE;
183	if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
184		max_blk -= 1;
185
186	ino = key_inum(c, &r->key);
187
188	data_key_init(c, &min_key, ino, min_blk);
189	data_key_init(c, &max_key, ino, max_blk);
190
191	return ubifs_tnc_remove_range(c, &min_key, &max_key);
192}
193
194/**
195 * apply_replay_entry - apply a replay entry to the TNC.
196 * @c: UBIFS file-system description object
197 * @r: replay entry to apply
198 *
199 * Apply a replay entry to the TNC.
200 */
201static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
202{
203	int err, deletion = ((r->flags & REPLAY_DELETION) != 0);
204
205	dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum,
206		r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key));
207
208	/* Set c->replay_sqnum to help deal with dangling branches. */
209	c->replay_sqnum = r->sqnum;
210
211	if (r->flags & REPLAY_REF)
212		err = set_bud_lprops(c, r);
213	else if (is_hash_key(c, &r->key)) {
214		if (deletion)
215			err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
216		else
217			err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
218					       r->len, &r->nm);
219	} else {
220		if (deletion)
221			switch (key_type(c, &r->key)) {
222			case UBIFS_INO_KEY:
223			{
224				ino_t inum = key_inum(c, &r->key);
225
226				err = ubifs_tnc_remove_ino(c, inum);
227				break;
228			}
229			case UBIFS_TRUN_KEY:
230				err = trun_remove_range(c, r);
231				break;
232			default:
233				err = ubifs_tnc_remove(c, &r->key);
234				break;
235			}
236		else
237			err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
238					    r->len);
239		if (err)
240			return err;
241
242		if (c->need_recovery)
243			err = ubifs_recover_size_accum(c, &r->key, deletion,
244						       r->new_size);
245	}
246
247	return err;
248}
249
250/**
251 * destroy_replay_tree - destroy the replay.
252 * @c: UBIFS file-system description object
253 *
254 * Destroy the replay tree.
255 */
256static void destroy_replay_tree(struct ubifs_info *c)
257{
258	struct rb_node *this = c->replay_tree.rb_node;
259	struct replay_entry *r;
260
261	while (this) {
262		if (this->rb_left) {
263			this = this->rb_left;
264			continue;
265		} else if (this->rb_right) {
266			this = this->rb_right;
267			continue;
268		}
269		r = rb_entry(this, struct replay_entry, rb);
270		this = rb_parent(this);
271		if (this) {
272			if (this->rb_left == &r->rb)
273				this->rb_left = NULL;
274			else
275				this->rb_right = NULL;
276		}
277		if (is_hash_key(c, &r->key))
278			kfree(r->nm.name);
279		kfree(r);
280	}
281	c->replay_tree = RB_ROOT;
282}
283
284/**
285 * apply_replay_tree - apply the replay tree to the TNC.
286 * @c: UBIFS file-system description object
287 *
288 * Apply the replay tree.
289 * Returns zero in case of success and a negative error code in case of
290 * failure.
291 */
292static int apply_replay_tree(struct ubifs_info *c)
293{
294	struct rb_node *this = rb_first(&c->replay_tree);
295
296	while (this) {
297		struct replay_entry *r;
298		int err;
299
300		cond_resched();
301
302		r = rb_entry(this, struct replay_entry, rb);
303		err = apply_replay_entry(c, r);
304		if (err)
305			return err;
306		this = rb_next(this);
307	}
308	return 0;
309}
310
311/**
312 * insert_node - insert a node to the replay tree.
313 * @c: UBIFS file-system description object
314 * @lnum: node logical eraseblock number
315 * @offs: node offset
316 * @len: node length
317 * @key: node key
318 * @sqnum: sequence number
319 * @deletion: non-zero if this is a deletion
320 * @used: number of bytes in use in a LEB
321 * @old_size: truncation old size
322 * @new_size: truncation new size
323 *
324 * This function inserts a scanned non-direntry node to the replay tree. The
325 * replay tree is an RB-tree containing @struct replay_entry elements which are
326 * indexed by the sequence number. The replay tree is applied at the very end
327 * of the replay process. Since the tree is sorted in sequence number order,
328 * the older modifications are applied first. This function returns zero in
329 * case of success and a negative error code in case of failure.
330 */
331static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
332		       union ubifs_key *key, unsigned long long sqnum,
333		       int deletion, int *used, loff_t old_size,
334		       loff_t new_size)
335{
336	struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
337	struct replay_entry *r;
338
339	if (key_inum(c, key) >= c->highest_inum)
340		c->highest_inum = key_inum(c, key);
341
342	dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
343	while (*p) {
344		parent = *p;
345		r = rb_entry(parent, struct replay_entry, rb);
346		if (sqnum < r->sqnum) {
347			p = &(*p)->rb_left;
348			continue;
349		} else if (sqnum > r->sqnum) {
350			p = &(*p)->rb_right;
351			continue;
352		}
353		ubifs_err("duplicate sqnum in replay");
354		return -EINVAL;
355	}
356
357	r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
358	if (!r)
359		return -ENOMEM;
360
361	if (!deletion)
362		*used += ALIGN(len, 8);
363	r->lnum = lnum;
364	r->offs = offs;
365	r->len = len;
366	r->sqnum = sqnum;
367	r->flags = (deletion ? REPLAY_DELETION : 0);
368	r->old_size = old_size;
369	r->new_size = new_size;
370	key_copy(c, key, &r->key);
371
372	rb_link_node(&r->rb, parent, p);
373	rb_insert_color(&r->rb, &c->replay_tree);
374	return 0;
375}
376
377/**
378 * insert_dent - insert a directory entry node into the replay tree.
379 * @c: UBIFS file-system description object
380 * @lnum: node logical eraseblock number
381 * @offs: node offset
382 * @len: node length
383 * @key: node key
384 * @name: directory entry name
385 * @nlen: directory entry name length
386 * @sqnum: sequence number
387 * @deletion: non-zero if this is a deletion
388 * @used: number of bytes in use in a LEB
389 *
390 * This function inserts a scanned directory entry node to the replay tree.
391 * Returns zero in case of success and a negative error code in case of
392 * failure.
393 *
394 * This function is also used for extended attribute entries because they are
395 * implemented as directory entry nodes.
396 */
397static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
398		       union ubifs_key *key, const char *name, int nlen,
399		       unsigned long long sqnum, int deletion, int *used)
400{
401	struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
402	struct replay_entry *r;
403	char *nbuf;
404
405	if (key_inum(c, key) >= c->highest_inum)
406		c->highest_inum = key_inum(c, key);
407
408	dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
409	while (*p) {
410		parent = *p;
411		r = rb_entry(parent, struct replay_entry, rb);
412		if (sqnum < r->sqnum) {
413			p = &(*p)->rb_left;
414			continue;
415		}
416		if (sqnum > r->sqnum) {
417			p = &(*p)->rb_right;
418			continue;
419		}
420		ubifs_err("duplicate sqnum in replay");
421		return -EINVAL;
422	}
423
424	r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
425	if (!r)
426		return -ENOMEM;
427	nbuf = kmalloc(nlen + 1, GFP_KERNEL);
428	if (!nbuf) {
429		kfree(r);
430		return -ENOMEM;
431	}
432
433	if (!deletion)
434		*used += ALIGN(len, 8);
435	r->lnum = lnum;
436	r->offs = offs;
437	r->len = len;
438	r->sqnum = sqnum;
439	r->nm.len = nlen;
440	memcpy(nbuf, name, nlen);
441	nbuf[nlen] = '\0';
442	r->nm.name = nbuf;
443	r->flags = (deletion ? REPLAY_DELETION : 0);
444	key_copy(c, key, &r->key);
445
446	ubifs_assert(!*p);
447	rb_link_node(&r->rb, parent, p);
448	rb_insert_color(&r->rb, &c->replay_tree);
449	return 0;
450}
451
452/**
453 * ubifs_validate_entry - validate directory or extended attribute entry node.
454 * @c: UBIFS file-system description object
455 * @dent: the node to validate
456 *
457 * This function validates directory or extended attribute entry node @dent.
458 * Returns zero if the node is all right and a %-EINVAL if not.
459 */
460int ubifs_validate_entry(struct ubifs_info *c,
461			 const struct ubifs_dent_node *dent)
462{
463	int key_type = key_type_flash(c, dent->key);
464	int nlen = le16_to_cpu(dent->nlen);
465
466	if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
467	    dent->type >= UBIFS_ITYPES_CNT ||
468	    nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
469	    strnlen(dent->name, nlen) != nlen ||
470	    le64_to_cpu(dent->inum) > MAX_INUM) {
471		ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ?
472			  "directory entry" : "extended attribute entry");
473		return -EINVAL;
474	}
475
476	if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
477		ubifs_err("bad key type %d", key_type);
478		return -EINVAL;
479	}
480
481	return 0;
482}
483
484/**
485 * replay_bud - replay a bud logical eraseblock.
486 * @c: UBIFS file-system description object
487 * @lnum: bud logical eraseblock number to replay
488 * @offs: bud start offset
489 * @jhead: journal head to which this bud belongs
490 * @free: amount of free space in the bud is returned here
491 * @dirty: amount of dirty space from padding and deletion nodes is returned
492 * here
493 *
494 * This function returns zero in case of success and a negative error code in
495 * case of failure.
496 */
497static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
498		      int *free, int *dirty)
499{
500	int err = 0, used = 0;
501	struct ubifs_scan_leb *sleb;
502	struct ubifs_scan_node *snod;
503	struct ubifs_bud *bud;
504
505	dbg_mnt("replay bud LEB %d, head %d", lnum, jhead);
506	if (c->need_recovery)
507		sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
508	else
509		sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
510	if (IS_ERR(sleb))
511		return PTR_ERR(sleb);
512
513	/*
514	 * The bud does not have to start from offset zero - the beginning of
515	 * the 'lnum' LEB may contain previously committed data. One of the
516	 * things we have to do in replay is to correctly update lprops with
517	 * newer information about this LEB.
518	 *
519	 * At this point lprops thinks that this LEB has 'c->leb_size - offs'
520	 * bytes of free space because it only contain information about
521	 * committed data.
522	 *
523	 * But we know that real amount of free space is 'c->leb_size -
524	 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
525	 * 'sleb->endpt' is used by bud data. We have to correctly calculate
526	 * how much of these data are dirty and update lprops with this
527	 * information.
528	 *
529	 * The dirt in that LEB region is comprised of padding nodes, deletion
530	 * nodes, truncation nodes and nodes which are obsoleted by subsequent
531	 * nodes in this LEB. So instead of calculating clean space, we
532	 * calculate used space ('used' variable).
533	 */
534
535	list_for_each_entry(snod, &sleb->nodes, list) {
536		int deletion = 0;
537
538		cond_resched();
539
540		if (snod->sqnum >= SQNUM_WATERMARK) {
541			ubifs_err("file system's life ended");
542			goto out_dump;
543		}
544
545		if (snod->sqnum > c->max_sqnum)
546			c->max_sqnum = snod->sqnum;
547
548		switch (snod->type) {
549		case UBIFS_INO_NODE:
550		{
551			struct ubifs_ino_node *ino = snod->node;
552			loff_t new_size = le64_to_cpu(ino->size);
553
554			if (le32_to_cpu(ino->nlink) == 0)
555				deletion = 1;
556			err = insert_node(c, lnum, snod->offs, snod->len,
557					  &snod->key, snod->sqnum, deletion,
558					  &used, 0, new_size);
559			break;
560		}
561		case UBIFS_DATA_NODE:
562		{
563			struct ubifs_data_node *dn = snod->node;
564			loff_t new_size = le32_to_cpu(dn->size) +
565					  key_block(c, &snod->key) *
566					  UBIFS_BLOCK_SIZE;
567
568			err = insert_node(c, lnum, snod->offs, snod->len,
569					  &snod->key, snod->sqnum, deletion,
570					  &used, 0, new_size);
571			break;
572		}
573		case UBIFS_DENT_NODE:
574		case UBIFS_XENT_NODE:
575		{
576			struct ubifs_dent_node *dent = snod->node;
577
578			err = ubifs_validate_entry(c, dent);
579			if (err)
580				goto out_dump;
581
582			err = insert_dent(c, lnum, snod->offs, snod->len,
583					  &snod->key, dent->name,
584					  le16_to_cpu(dent->nlen), snod->sqnum,
585					  !le64_to_cpu(dent->inum), &used);
586			break;
587		}
588		case UBIFS_TRUN_NODE:
589		{
590			struct ubifs_trun_node *trun = snod->node;
591			loff_t old_size = le64_to_cpu(trun->old_size);
592			loff_t new_size = le64_to_cpu(trun->new_size);
593			union ubifs_key key;
594
595			/* Validate truncation node */
596			if (old_size < 0 || old_size > c->max_inode_sz ||
597			    new_size < 0 || new_size > c->max_inode_sz ||
598			    old_size <= new_size) {
599				ubifs_err("bad truncation node");
600				goto out_dump;
601			}
602
603			/*
604			 * Create a fake truncation key just to use the same
605			 * functions which expect nodes to have keys.
606			 */
607			trun_key_init(c, &key, le32_to_cpu(trun->inum));
608			err = insert_node(c, lnum, snod->offs, snod->len,
609					  &key, snod->sqnum, 1, &used,
610					  old_size, new_size);
611			break;
612		}
613		default:
614			ubifs_err("unexpected node type %d in bud LEB %d:%d",
615				  snod->type, lnum, snod->offs);
616			err = -EINVAL;
617			goto out_dump;
618		}
619		if (err)
620			goto out;
621	}
622
623	bud = ubifs_search_bud(c, lnum);
624	if (!bud)
625		BUG();
626
627	ubifs_assert(sleb->endpt - offs >= used);
628	ubifs_assert(sleb->endpt % c->min_io_size == 0);
629
630	if (sleb->endpt + c->min_io_size <= c->leb_size &&
631	    !(c->vfs_sb->s_flags & MS_RDONLY))
632		err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
633					     sleb->endpt, UBI_SHORTTERM);
634
635	*dirty = sleb->endpt - offs - used;
636	*free = c->leb_size - sleb->endpt;
637
638out:
639	ubifs_scan_destroy(sleb);
640	return err;
641
642out_dump:
643	ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs);
644	dbg_dump_node(c, snod->node);
645	ubifs_scan_destroy(sleb);
646	return -EINVAL;
647}
648
649/**
650 * insert_ref_node - insert a reference node to the replay tree.
651 * @c: UBIFS file-system description object
652 * @lnum: node logical eraseblock number
653 * @offs: node offset
654 * @sqnum: sequence number
655 * @free: amount of free space in bud
656 * @dirty: amount of dirty space from padding and deletion nodes
657 *
658 * This function inserts a reference node to the replay tree and returns zero
659 * in case of success or a negative error code in case of failure.
660 */
661static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
662			   unsigned long long sqnum, int free, int dirty)
663{
664	struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
665	struct replay_entry *r;
666
667	dbg_mnt("add ref LEB %d:%d", lnum, offs);
668	while (*p) {
669		parent = *p;
670		r = rb_entry(parent, struct replay_entry, rb);
671		if (sqnum < r->sqnum) {
672			p = &(*p)->rb_left;
673			continue;
674		} else if (sqnum > r->sqnum) {
675			p = &(*p)->rb_right;
676			continue;
677		}
678		ubifs_err("duplicate sqnum in replay tree");
679		return -EINVAL;
680	}
681
682	r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
683	if (!r)
684		return -ENOMEM;
685
686	r->lnum = lnum;
687	r->offs = offs;
688	r->sqnum = sqnum;
689	r->flags = REPLAY_REF;
690	r->free = free;
691	r->dirty = dirty;
692
693	rb_link_node(&r->rb, parent, p);
694	rb_insert_color(&r->rb, &c->replay_tree);
695	return 0;
696}
697
698/**
699 * replay_buds - replay all buds.
700 * @c: UBIFS file-system description object
701 *
702 * This function returns zero in case of success and a negative error code in
703 * case of failure.
704 */
705static int replay_buds(struct ubifs_info *c)
706{
707	struct bud_entry *b;
708	int err, uninitialized_var(free), uninitialized_var(dirty);
709
710	list_for_each_entry(b, &c->replay_buds, list) {
711		err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
712				 &free, &dirty);
713		if (err)
714			return err;
715		err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
716				      free, dirty);
717		if (err)
718			return err;
719	}
720
721	return 0;
722}
723
724/**
725 * destroy_bud_list - destroy the list of buds to replay.
726 * @c: UBIFS file-system description object
727 */
728static void destroy_bud_list(struct ubifs_info *c)
729{
730	struct bud_entry *b;
731
732	while (!list_empty(&c->replay_buds)) {
733		b = list_entry(c->replay_buds.next, struct bud_entry, list);
734		list_del(&b->list);
735		kfree(b);
736	}
737}
738
739/**
740 * add_replay_bud - add a bud to the list of buds to replay.
741 * @c: UBIFS file-system description object
742 * @lnum: bud logical eraseblock number to replay
743 * @offs: bud start offset
744 * @jhead: journal head to which this bud belongs
745 * @sqnum: reference node sequence number
746 *
747 * This function returns zero in case of success and a negative error code in
748 * case of failure.
749 */
750static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
751			  unsigned long long sqnum)
752{
753	struct ubifs_bud *bud;
754	struct bud_entry *b;
755
756	dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
757
758	bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
759	if (!bud)
760		return -ENOMEM;
761
762	b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
763	if (!b) {
764		kfree(bud);
765		return -ENOMEM;
766	}
767
768	bud->lnum = lnum;
769	bud->start = offs;
770	bud->jhead = jhead;
771	ubifs_add_bud(c, bud);
772
773	b->bud = bud;
774	b->sqnum = sqnum;
775	list_add_tail(&b->list, &c->replay_buds);
776
777	return 0;
778}
779
780/**
781 * validate_ref - validate a reference node.
782 * @c: UBIFS file-system description object
783 * @ref: the reference node to validate
784 * @ref_lnum: LEB number of the reference node
785 * @ref_offs: reference node offset
786 *
787 * This function returns %1 if a bud reference already exists for the LEB. %0 is
788 * returned if the reference node is new, otherwise %-EINVAL is returned if
789 * validation failed.
790 */
791static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
792{
793	struct ubifs_bud *bud;
794	int lnum = le32_to_cpu(ref->lnum);
795	unsigned int offs = le32_to_cpu(ref->offs);
796	unsigned int jhead = le32_to_cpu(ref->jhead);
797
798	/*
799	 * ref->offs may point to the end of LEB when the journal head points
800	 * to the end of LEB and we write reference node for it during commit.
801	 * So this is why we require 'offs > c->leb_size'.
802	 */
803	if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
804	    lnum < c->main_first || offs > c->leb_size ||
805	    offs & (c->min_io_size - 1))
806		return -EINVAL;
807
808	/* Make sure we have not already looked at this bud */
809	bud = ubifs_search_bud(c, lnum);
810	if (bud) {
811		if (bud->jhead == jhead && bud->start <= offs)
812			return 1;
813		ubifs_err("bud at LEB %d:%d was already referred", lnum, offs);
814		return -EINVAL;
815	}
816
817	return 0;
818}
819
820/**
821 * replay_log_leb - replay a log logical eraseblock.
822 * @c: UBIFS file-system description object
823 * @lnum: log logical eraseblock to replay
824 * @offs: offset to start replaying from
825 * @sbuf: scan buffer
826 *
827 * This function replays a log LEB and returns zero in case of success, %1 if
828 * this is the last LEB in the log, and a negative error code in case of
829 * failure.
830 */
831static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
832{
833	int err;
834	struct ubifs_scan_leb *sleb;
835	struct ubifs_scan_node *snod;
836	const struct ubifs_cs_node *node;
837
838	dbg_mnt("replay log LEB %d:%d", lnum, offs);
839	sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
840	if (IS_ERR(sleb)) {
841		if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
842			return PTR_ERR(sleb);
843		sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
844		if (IS_ERR(sleb))
845			return PTR_ERR(sleb);
846	}
847
848	if (sleb->nodes_cnt == 0) {
849		err = 1;
850		goto out;
851	}
852
853	node = sleb->buf;
854
855	snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
856	if (c->cs_sqnum == 0) {
857		/*
858		 * This is the first log LEB we are looking at, make sure that
859		 * the first node is a commit start node. Also record its
860		 * sequence number so that UBIFS can determine where the log
861		 * ends, because all nodes which were have higher sequence
862		 * numbers.
863		 */
864		if (snod->type != UBIFS_CS_NODE) {
865			dbg_err("first log node at LEB %d:%d is not CS node",
866				lnum, offs);
867			goto out_dump;
868		}
869		if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
870			dbg_err("first CS node at LEB %d:%d has wrong "
871				"commit number %llu expected %llu",
872				lnum, offs,
873				(unsigned long long)le64_to_cpu(node->cmt_no),
874				c->cmt_no);
875			goto out_dump;
876		}
877
878		c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
879		dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
880	}
881
882	if (snod->sqnum < c->cs_sqnum) {
883		/*
884		 * This means that we reached end of log and now
885		 * look to the older log data, which was already
886		 * committed but the eraseblock was not erased (UBIFS
887		 * only un-maps it). So this basically means we have to
888		 * exit with "end of log" code.
889		 */
890		err = 1;
891		goto out;
892	}
893
894	/* Make sure the first node sits at offset zero of the LEB */
895	if (snod->offs != 0) {
896		dbg_err("first node is not at zero offset");
897		goto out_dump;
898	}
899
900	list_for_each_entry(snod, &sleb->nodes, list) {
901
902		cond_resched();
903
904		if (snod->sqnum >= SQNUM_WATERMARK) {
905			ubifs_err("file system's life ended");
906			goto out_dump;
907		}
908
909		if (snod->sqnum < c->cs_sqnum) {
910			dbg_err("bad sqnum %llu, commit sqnum %llu",
911				snod->sqnum, c->cs_sqnum);
912			goto out_dump;
913		}
914
915		if (snod->sqnum > c->max_sqnum)
916			c->max_sqnum = snod->sqnum;
917
918		switch (snod->type) {
919		case UBIFS_REF_NODE: {
920			const struct ubifs_ref_node *ref = snod->node;
921
922			err = validate_ref(c, ref);
923			if (err == 1)
924				break; /* Already have this bud */
925			if (err)
926				goto out_dump;
927
928			err = add_replay_bud(c, le32_to_cpu(ref->lnum),
929					     le32_to_cpu(ref->offs),
930					     le32_to_cpu(ref->jhead),
931					     snod->sqnum);
932			if (err)
933				goto out;
934
935			break;
936		}
937		case UBIFS_CS_NODE:
938			/* Make sure it sits at the beginning of LEB */
939			if (snod->offs != 0) {
940				ubifs_err("unexpected node in log");
941				goto out_dump;
942			}
943			break;
944		default:
945			ubifs_err("unexpected node in log");
946			goto out_dump;
947		}
948	}
949
950	if (sleb->endpt || c->lhead_offs >= c->leb_size) {
951		c->lhead_lnum = lnum;
952		c->lhead_offs = sleb->endpt;
953	}
954
955	err = !sleb->endpt;
956out:
957	ubifs_scan_destroy(sleb);
958	return err;
959
960out_dump:
961	ubifs_err("log error detected while replaying the log at LEB %d:%d",
962		  lnum, offs + snod->offs);
963	dbg_dump_node(c, snod->node);
964	ubifs_scan_destroy(sleb);
965	return -EINVAL;
966}
967
968/**
969 * take_ihead - update the status of the index head in lprops to 'taken'.
970 * @c: UBIFS file-system description object
971 *
972 * This function returns the amount of free space in the index head LEB or a
973 * negative error code.
974 */
975static int take_ihead(struct ubifs_info *c)
976{
977	const struct ubifs_lprops *lp;
978	int err, free;
979
980	ubifs_get_lprops(c);
981
982	lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
983	if (IS_ERR(lp)) {
984		err = PTR_ERR(lp);
985		goto out;
986	}
987
988	free = lp->free;
989
990	lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
991			     lp->flags | LPROPS_TAKEN, 0);
992	if (IS_ERR(lp)) {
993		err = PTR_ERR(lp);
994		goto out;
995	}
996
997	err = free;
998out:
999	ubifs_release_lprops(c);
1000	return err;
1001}
1002
1003/**
1004 * ubifs_replay_journal - replay journal.
1005 * @c: UBIFS file-system description object
1006 *
1007 * This function scans the journal, replays and cleans it up. It makes sure all
1008 * memory data structures related to uncommitted journal are built (dirty TNC
1009 * tree, tree of buds, modified lprops, etc).
1010 */
1011int ubifs_replay_journal(struct ubifs_info *c)
1012{
1013	int err, i, lnum, offs, free;
1014	void *sbuf = NULL;
1015
1016	BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
1017
1018	/* Update the status of the index head in lprops to 'taken' */
1019	free = take_ihead(c);
1020	if (free < 0)
1021		return free; /* Error code */
1022
1023	if (c->ihead_offs != c->leb_size - free) {
1024		ubifs_err("bad index head LEB %d:%d", c->ihead_lnum,
1025			  c->ihead_offs);
1026		return -EINVAL;
1027	}
1028
1029	sbuf = vmalloc(c->leb_size);
1030	if (!sbuf)
1031		return -ENOMEM;
1032
1033	dbg_mnt("start replaying the journal");
1034
1035	c->replaying = 1;
1036
1037	lnum = c->ltail_lnum = c->lhead_lnum;
1038	offs = c->lhead_offs;
1039
1040	for (i = 0; i < c->log_lebs; i++, lnum++) {
1041		if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) {
1042			/*
1043			 * The log is logically circular, we reached the last
1044			 * LEB, switch to the first one.
1045			 */
1046			lnum = UBIFS_LOG_LNUM;
1047			offs = 0;
1048		}
1049		err = replay_log_leb(c, lnum, offs, sbuf);
1050		if (err == 1)
1051			/* We hit the end of the log */
1052			break;
1053		if (err)
1054			goto out;
1055		offs = 0;
1056	}
1057
1058	err = replay_buds(c);
1059	if (err)
1060		goto out;
1061
1062	err = apply_replay_tree(c);
1063	if (err)
1064		goto out;
1065
1066	/*
1067	 * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable
1068	 * to roughly estimate index growth. Things like @c->min_idx_lebs
1069	 * depend on it. This means we have to initialize it to make sure
1070	 * budgeting works properly.
1071	 */
1072	c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1073	c->budg_uncommitted_idx *= c->max_idx_node_sz;
1074
1075	ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1076	dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1077		"highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1078		(unsigned long)c->highest_inum);
1079out:
1080	destroy_replay_tree(c);
1081	destroy_bud_list(c);
1082	vfree(sbuf);
1083	c->replaying = 0;
1084	return err;
1085}
1086