1/*
2 * JFFS2 -- Journalling Flash File System, Version 2.
3 *
4 * Copyright �� 2001-2007 Red Hat, Inc.
5 * Copyright �� 2004-2010 David Woodhouse <dwmw2@infradead.org>
6 *
7 * Created by David Woodhouse <dwmw2@infradead.org>
8 *
9 * For licensing information, see the file 'LICENCE' in this directory.
10 *
11 */
12
13#include <linux/kernel.h>
14#include <linux/mtd/mtd.h>
15#include <linux/slab.h>
16#include <linux/pagemap.h>
17#include <linux/crc32.h>
18#include <linux/compiler.h>
19#include <linux/stat.h>
20#include "nodelist.h"
21#include "compr.h"
22
23static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
24					  struct jffs2_inode_cache *ic,
25					  struct jffs2_raw_node_ref *raw);
26static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
27					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
28static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
29					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
30static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
31					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
32static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
33				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
34				      uint32_t start, uint32_t end);
35static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
36				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
37				       uint32_t start, uint32_t end);
38static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
39			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
40
41/* Called with erase_completion_lock held */
42static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
43{
44	struct jffs2_eraseblock *ret;
45	struct list_head *nextlist = NULL;
46	int n = jiffies % 128;
47
48	/* Pick an eraseblock to garbage collect next. This is where we'll
49	   put the clever wear-levelling algorithms. Eventually.  */
50	/* We possibly want to favour the dirtier blocks more when the
51	   number of free blocks is low. */
52again:
53	if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
54		D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
55		nextlist = &c->bad_used_list;
56	} else if (n < 50 && !list_empty(&c->erasable_list)) {
57		/* Note that most of them will have gone directly to be erased.
58		   So don't favour the erasable_list _too_ much. */
59		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
60		nextlist = &c->erasable_list;
61	} else if (n < 110 && !list_empty(&c->very_dirty_list)) {
62		/* Most of the time, pick one off the very_dirty list */
63		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
64		nextlist = &c->very_dirty_list;
65	} else if (n < 126 && !list_empty(&c->dirty_list)) {
66		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
67		nextlist = &c->dirty_list;
68	} else if (!list_empty(&c->clean_list)) {
69		D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
70		nextlist = &c->clean_list;
71	} else if (!list_empty(&c->dirty_list)) {
72		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
73
74		nextlist = &c->dirty_list;
75	} else if (!list_empty(&c->very_dirty_list)) {
76		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
77		nextlist = &c->very_dirty_list;
78	} else if (!list_empty(&c->erasable_list)) {
79		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
80
81		nextlist = &c->erasable_list;
82	} else if (!list_empty(&c->erasable_pending_wbuf_list)) {
83		/* There are blocks are wating for the wbuf sync */
84		D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
85		spin_unlock(&c->erase_completion_lock);
86		jffs2_flush_wbuf_pad(c);
87		spin_lock(&c->erase_completion_lock);
88		goto again;
89	} else {
90		/* Eep. All were empty */
91		D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
92		return NULL;
93	}
94
95	ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
96	list_del(&ret->list);
97	c->gcblock = ret;
98	ret->gc_node = ret->first_node;
99	if (!ret->gc_node) {
100		printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
101		BUG();
102	}
103
104	/* Have we accidentally picked a clean block with wasted space ? */
105	if (ret->wasted_size) {
106		D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
107		ret->dirty_size += ret->wasted_size;
108		c->wasted_size -= ret->wasted_size;
109		c->dirty_size += ret->wasted_size;
110		ret->wasted_size = 0;
111	}
112
113	return ret;
114}
115
116/* jffs2_garbage_collect_pass
117 * Make a single attempt to progress GC. Move one node, and possibly
118 * start erasing one eraseblock.
119 */
120int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
121{
122	struct jffs2_inode_info *f;
123	struct jffs2_inode_cache *ic;
124	struct jffs2_eraseblock *jeb;
125	struct jffs2_raw_node_ref *raw;
126	uint32_t gcblock_dirty;
127	int ret = 0, inum, nlink;
128	int xattr = 0;
129
130	if (mutex_lock_interruptible(&c->alloc_sem))
131		return -EINTR;
132
133	for (;;) {
134		spin_lock(&c->erase_completion_lock);
135		if (!c->unchecked_size)
136			break;
137
138		/* We can't start doing GC yet. We haven't finished checking
139		   the node CRCs etc. Do it now. */
140
141		/* checked_ino is protected by the alloc_sem */
142		if (c->checked_ino > c->highest_ino && xattr) {
143			printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
144			       c->unchecked_size);
145			jffs2_dbg_dump_block_lists_nolock(c);
146			spin_unlock(&c->erase_completion_lock);
147			mutex_unlock(&c->alloc_sem);
148			return -ENOSPC;
149		}
150
151		spin_unlock(&c->erase_completion_lock);
152
153		if (!xattr)
154			xattr = jffs2_verify_xattr(c);
155
156		spin_lock(&c->inocache_lock);
157
158		ic = jffs2_get_ino_cache(c, c->checked_ino++);
159
160		if (!ic) {
161			spin_unlock(&c->inocache_lock);
162			continue;
163		}
164
165		if (!ic->pino_nlink) {
166			D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
167				  ic->ino));
168			spin_unlock(&c->inocache_lock);
169			jffs2_xattr_delete_inode(c, ic);
170			continue;
171		}
172		switch(ic->state) {
173		case INO_STATE_CHECKEDABSENT:
174		case INO_STATE_PRESENT:
175			D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
176			spin_unlock(&c->inocache_lock);
177			continue;
178
179		case INO_STATE_GC:
180		case INO_STATE_CHECKING:
181			printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
182			spin_unlock(&c->inocache_lock);
183			BUG();
184
185		case INO_STATE_READING:
186			/* We need to wait for it to finish, lest we move on
187			   and trigger the BUG() above while we haven't yet
188			   finished checking all its nodes */
189			D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
190			/* We need to come back again for the _same_ inode. We've
191			 made no progress in this case, but that should be OK */
192			c->checked_ino--;
193
194			mutex_unlock(&c->alloc_sem);
195			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
196			return 0;
197
198		default:
199			BUG();
200
201		case INO_STATE_UNCHECKED:
202			;
203		}
204		ic->state = INO_STATE_CHECKING;
205		spin_unlock(&c->inocache_lock);
206
207		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
208
209		ret = jffs2_do_crccheck_inode(c, ic);
210		if (ret)
211			printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
212
213		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
214		mutex_unlock(&c->alloc_sem);
215		return ret;
216	}
217
218	/* If there are any blocks which need erasing, erase them now */
219	if (!list_empty(&c->erase_complete_list) ||
220	    !list_empty(&c->erase_pending_list)) {
221		spin_unlock(&c->erase_completion_lock);
222		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
223		if (jffs2_erase_pending_blocks(c, 1)) {
224			mutex_unlock(&c->alloc_sem);
225			return 0;
226		}
227		D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
228		spin_lock(&c->erase_completion_lock);
229	}
230
231	/* First, work out which block we're garbage-collecting */
232	jeb = c->gcblock;
233
234	if (!jeb)
235		jeb = jffs2_find_gc_block(c);
236
237	if (!jeb) {
238		/* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
239		if (c->nr_erasing_blocks) {
240			spin_unlock(&c->erase_completion_lock);
241			mutex_unlock(&c->alloc_sem);
242			return -EAGAIN;
243		}
244		D1(printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
245		spin_unlock(&c->erase_completion_lock);
246		mutex_unlock(&c->alloc_sem);
247		return -EIO;
248	}
249
250	D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
251	D1(if (c->nextblock)
252	   printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
253
254	if (!jeb->used_size) {
255		mutex_unlock(&c->alloc_sem);
256		goto eraseit;
257	}
258
259	raw = jeb->gc_node;
260	gcblock_dirty = jeb->dirty_size;
261
262	while(ref_obsolete(raw)) {
263		D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
264		raw = ref_next(raw);
265		if (unlikely(!raw)) {
266			printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
267			printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
268			       jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
269			jeb->gc_node = raw;
270			spin_unlock(&c->erase_completion_lock);
271			mutex_unlock(&c->alloc_sem);
272			BUG();
273		}
274	}
275	jeb->gc_node = raw;
276
277	D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
278
279	if (!raw->next_in_ino) {
280		/* Inode-less node. Clean marker, snapshot or something like that */
281		spin_unlock(&c->erase_completion_lock);
282		if (ref_flags(raw) == REF_PRISTINE) {
283			/* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
284			jffs2_garbage_collect_pristine(c, NULL, raw);
285		} else {
286			/* Just mark it obsolete */
287			jffs2_mark_node_obsolete(c, raw);
288		}
289		mutex_unlock(&c->alloc_sem);
290		goto eraseit_lock;
291	}
292
293	ic = jffs2_raw_ref_to_ic(raw);
294
295#ifdef CONFIG_JFFS2_FS_XATTR
296	/* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
297	 * We can decide whether this node is inode or xattr by ic->class.     */
298	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
299	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
300		spin_unlock(&c->erase_completion_lock);
301
302		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
303			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
304		} else {
305			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
306		}
307		goto test_gcnode;
308	}
309#endif
310
311	/* We need to hold the inocache. Either the erase_completion_lock or
312	   the inocache_lock are sufficient; we trade down since the inocache_lock
313	   causes less contention. */
314	spin_lock(&c->inocache_lock);
315
316	spin_unlock(&c->erase_completion_lock);
317
318	D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
319
320	/* Three possibilities:
321	   1. Inode is already in-core. We must iget it and do proper
322	      updating to its fragtree, etc.
323	   2. Inode is not in-core, node is REF_PRISTINE. We lock the
324	      inocache to prevent a read_inode(), copy the node intact.
325	   3. Inode is not in-core, node is not pristine. We must iget()
326	      and take the slow path.
327	*/
328
329	switch(ic->state) {
330	case INO_STATE_CHECKEDABSENT:
331		/* It's been checked, but it's not currently in-core.
332		   We can just copy any pristine nodes, but have
333		   to prevent anyone else from doing read_inode() while
334		   we're at it, so we set the state accordingly */
335		if (ref_flags(raw) == REF_PRISTINE)
336			ic->state = INO_STATE_GC;
337		else {
338			D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
339				  ic->ino));
340		}
341		break;
342
343	case INO_STATE_PRESENT:
344		/* It's in-core. GC must iget() it. */
345		break;
346
347	case INO_STATE_UNCHECKED:
348	case INO_STATE_CHECKING:
349	case INO_STATE_GC:
350		/* Should never happen. We should have finished checking
351		   by the time we actually start doing any GC, and since
352		   we're holding the alloc_sem, no other garbage collection
353		   can happen.
354		*/
355		printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
356		       ic->ino, ic->state);
357		mutex_unlock(&c->alloc_sem);
358		spin_unlock(&c->inocache_lock);
359		BUG();
360
361	case INO_STATE_READING:
362		/* Someone's currently trying to read it. We must wait for
363		   them to finish and then go through the full iget() route
364		   to do the GC. However, sometimes read_inode() needs to get
365		   the alloc_sem() (for marking nodes invalid) so we must
366		   drop the alloc_sem before sleeping. */
367
368		mutex_unlock(&c->alloc_sem);
369		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
370			  ic->ino, ic->state));
371		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
372		/* And because we dropped the alloc_sem we must start again from the
373		   beginning. Ponder chance of livelock here -- we're returning success
374		   without actually making any progress.
375
376		   Q: What are the chances that the inode is back in INO_STATE_READING
377		   again by the time we next enter this function? And that this happens
378		   enough times to cause a real delay?
379
380		   A: Small enough that I don't care :)
381		*/
382		return 0;
383	}
384
385	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
386	   node intact, and we don't have to muck about with the fragtree etc.
387	   because we know it's not in-core. If it _was_ in-core, we go through
388	   all the iget() crap anyway */
389
390	if (ic->state == INO_STATE_GC) {
391		spin_unlock(&c->inocache_lock);
392
393		ret = jffs2_garbage_collect_pristine(c, ic, raw);
394
395		spin_lock(&c->inocache_lock);
396		ic->state = INO_STATE_CHECKEDABSENT;
397		wake_up(&c->inocache_wq);
398
399		if (ret != -EBADFD) {
400			spin_unlock(&c->inocache_lock);
401			goto test_gcnode;
402		}
403
404		/* Fall through if it wanted us to, with inocache_lock held */
405	}
406
407	/* Prevent the fairly unlikely race where the gcblock is
408	   entirely obsoleted by the final close of a file which had
409	   the only valid nodes in the block, followed by erasure,
410	   followed by freeing of the ic because the erased block(s)
411	   held _all_ the nodes of that inode.... never been seen but
412	   it's vaguely possible. */
413
414	inum = ic->ino;
415	nlink = ic->pino_nlink;
416	spin_unlock(&c->inocache_lock);
417
418	f = jffs2_gc_fetch_inode(c, inum, !nlink);
419	if (IS_ERR(f)) {
420		ret = PTR_ERR(f);
421		goto release_sem;
422	}
423	if (!f) {
424		ret = 0;
425		goto release_sem;
426	}
427
428	ret = jffs2_garbage_collect_live(c, jeb, raw, f);
429
430	jffs2_gc_release_inode(c, f);
431
432 test_gcnode:
433	if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
434		/* Eep. This really should never happen. GC is broken */
435		printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
436		ret = -ENOSPC;
437	}
438 release_sem:
439	mutex_unlock(&c->alloc_sem);
440
441 eraseit_lock:
442	/* If we've finished this block, start it erasing */
443	spin_lock(&c->erase_completion_lock);
444
445 eraseit:
446	if (c->gcblock && !c->gcblock->used_size) {
447		D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
448		/* We're GC'ing an empty block? */
449		list_add_tail(&c->gcblock->list, &c->erase_pending_list);
450		c->gcblock = NULL;
451		c->nr_erasing_blocks++;
452		jffs2_garbage_collect_trigger(c);
453	}
454	spin_unlock(&c->erase_completion_lock);
455
456	return ret;
457}
458
459static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
460				      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
461{
462	struct jffs2_node_frag *frag;
463	struct jffs2_full_dnode *fn = NULL;
464	struct jffs2_full_dirent *fd;
465	uint32_t start = 0, end = 0, nrfrags = 0;
466	int ret = 0;
467
468	mutex_lock(&f->sem);
469
470	/* Now we have the lock for this inode. Check that it's still the one at the head
471	   of the list. */
472
473	spin_lock(&c->erase_completion_lock);
474
475	if (c->gcblock != jeb) {
476		spin_unlock(&c->erase_completion_lock);
477		D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
478		goto upnout;
479	}
480	if (ref_obsolete(raw)) {
481		spin_unlock(&c->erase_completion_lock);
482		D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
483		/* They'll call again */
484		goto upnout;
485	}
486	spin_unlock(&c->erase_completion_lock);
487
488	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
489	if (f->metadata && f->metadata->raw == raw) {
490		fn = f->metadata;
491		ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
492		goto upnout;
493	}
494
495	for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
496		if (frag->node && frag->node->raw == raw) {
497			fn = frag->node;
498			end = frag->ofs + frag->size;
499			if (!nrfrags++)
500				start = frag->ofs;
501			if (nrfrags == frag->node->frags)
502				break; /* We've found them all */
503		}
504	}
505	if (fn) {
506		if (ref_flags(raw) == REF_PRISTINE) {
507			ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
508			if (!ret) {
509				/* Urgh. Return it sensibly. */
510				frag->node->raw = f->inocache->nodes;
511			}
512			if (ret != -EBADFD)
513				goto upnout;
514		}
515		/* We found a datanode. Do the GC */
516		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
517			/* It crosses a page boundary. Therefore, it must be a hole. */
518			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
519		} else {
520			/* It could still be a hole. But we GC the page this way anyway */
521			ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
522		}
523		goto upnout;
524	}
525
526	/* Wasn't a dnode. Try dirent */
527	for (fd = f->dents; fd; fd=fd->next) {
528		if (fd->raw == raw)
529			break;
530	}
531
532	if (fd && fd->ino) {
533		ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
534	} else if (fd) {
535		ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
536	} else {
537		printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
538		       ref_offset(raw), f->inocache->ino);
539		if (ref_obsolete(raw)) {
540			printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
541		} else {
542			jffs2_dbg_dump_node(c, ref_offset(raw));
543			BUG();
544		}
545	}
546 upnout:
547	mutex_unlock(&f->sem);
548
549	return ret;
550}
551
552static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
553					  struct jffs2_inode_cache *ic,
554					  struct jffs2_raw_node_ref *raw)
555{
556	union jffs2_node_union *node;
557	size_t retlen;
558	int ret;
559	uint32_t phys_ofs, alloclen;
560	uint32_t crc, rawlen;
561	int retried = 0;
562
563	D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
564
565	alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
566
567	/* Ask for a small amount of space (or the totlen if smaller) because we
568	   don't want to force wastage of the end of a block if splitting would
569	   work. */
570	if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
571		alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
572
573	ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
574	/* 'rawlen' is not the exact summary size; it is only an upper estimation */
575
576	if (ret)
577		return ret;
578
579	if (alloclen < rawlen) {
580		/* Doesn't fit untouched. We'll go the old route and split it */
581		return -EBADFD;
582	}
583
584	node = kmalloc(rawlen, GFP_KERNEL);
585	if (!node)
586		return -ENOMEM;
587
588	ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
589	if (!ret && retlen != rawlen)
590		ret = -EIO;
591	if (ret)
592		goto out_node;
593
594	crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
595	if (je32_to_cpu(node->u.hdr_crc) != crc) {
596		printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
597		       ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
598		goto bail;
599	}
600
601	switch(je16_to_cpu(node->u.nodetype)) {
602	case JFFS2_NODETYPE_INODE:
603		crc = crc32(0, node, sizeof(node->i)-8);
604		if (je32_to_cpu(node->i.node_crc) != crc) {
605			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
606			       ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
607			goto bail;
608		}
609
610		if (je32_to_cpu(node->i.dsize)) {
611			crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
612			if (je32_to_cpu(node->i.data_crc) != crc) {
613				printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
614				       ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
615				goto bail;
616			}
617		}
618		break;
619
620	case JFFS2_NODETYPE_DIRENT:
621		crc = crc32(0, node, sizeof(node->d)-8);
622		if (je32_to_cpu(node->d.node_crc) != crc) {
623			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
624			       ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
625			goto bail;
626		}
627
628		if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
629			printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw));
630			goto bail;
631		}
632
633		if (node->d.nsize) {
634			crc = crc32(0, node->d.name, node->d.nsize);
635			if (je32_to_cpu(node->d.name_crc) != crc) {
636				printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
637				       ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
638				goto bail;
639			}
640		}
641		break;
642	default:
643		/* If it's inode-less, we don't _know_ what it is. Just copy it intact */
644		if (ic) {
645			printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
646			       ref_offset(raw), je16_to_cpu(node->u.nodetype));
647			goto bail;
648		}
649	}
650
651	/* OK, all the CRCs are good; this node can just be copied as-is. */
652 retry:
653	phys_ofs = write_ofs(c);
654
655	ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
656
657	if (ret || (retlen != rawlen)) {
658		printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
659		       rawlen, phys_ofs, ret, retlen);
660		if (retlen) {
661			jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
662		} else {
663			printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
664		}
665		if (!retried) {
666			/* Try to reallocate space and retry */
667			uint32_t dummy;
668			struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
669
670			retried = 1;
671
672			D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
673
674			jffs2_dbg_acct_sanity_check(c,jeb);
675			jffs2_dbg_acct_paranoia_check(c, jeb);
676
677			ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
678						/* this is not the exact summary size of it,
679							it is only an upper estimation */
680
681			if (!ret) {
682				D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
683
684				jffs2_dbg_acct_sanity_check(c,jeb);
685				jffs2_dbg_acct_paranoia_check(c, jeb);
686
687				goto retry;
688			}
689			D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
690		}
691
692		if (!ret)
693			ret = -EIO;
694		goto out_node;
695	}
696	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
697
698	jffs2_mark_node_obsolete(c, raw);
699	D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
700
701 out_node:
702	kfree(node);
703	return ret;
704 bail:
705	ret = -EBADFD;
706	goto out_node;
707}
708
709static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
710					struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
711{
712	struct jffs2_full_dnode *new_fn;
713	struct jffs2_raw_inode ri;
714	struct jffs2_node_frag *last_frag;
715	union jffs2_device_node dev;
716	char *mdata = NULL;
717	int mdatalen = 0;
718	uint32_t alloclen, ilen;
719	int ret;
720
721	if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
722	    S_ISCHR(JFFS2_F_I_MODE(f)) ) {
723		/* For these, we don't actually need to read the old node */
724		mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
725		mdata = (char *)&dev;
726		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
727	} else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
728		mdatalen = fn->size;
729		mdata = kmalloc(fn->size, GFP_KERNEL);
730		if (!mdata) {
731			printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
732			return -ENOMEM;
733		}
734		ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
735		if (ret) {
736			printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
737			kfree(mdata);
738			return ret;
739		}
740		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
741
742	}
743
744	ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
745				JFFS2_SUMMARY_INODE_SIZE);
746	if (ret) {
747		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
748		       sizeof(ri)+ mdatalen, ret);
749		goto out;
750	}
751
752	last_frag = frag_last(&f->fragtree);
753	if (last_frag)
754		/* Fetch the inode length from the fragtree rather then
755		 * from i_size since i_size may have not been updated yet */
756		ilen = last_frag->ofs + last_frag->size;
757	else
758		ilen = JFFS2_F_I_SIZE(f);
759
760	memset(&ri, 0, sizeof(ri));
761	ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
762	ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
763	ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
764	ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
765
766	ri.ino = cpu_to_je32(f->inocache->ino);
767	ri.version = cpu_to_je32(++f->highest_version);
768	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
769	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
770	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
771	ri.isize = cpu_to_je32(ilen);
772	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
773	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
774	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
775	ri.offset = cpu_to_je32(0);
776	ri.csize = cpu_to_je32(mdatalen);
777	ri.dsize = cpu_to_je32(mdatalen);
778	ri.compr = JFFS2_COMPR_NONE;
779	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
780	ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
781
782	new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
783
784	if (IS_ERR(new_fn)) {
785		printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
786		ret = PTR_ERR(new_fn);
787		goto out;
788	}
789	jffs2_mark_node_obsolete(c, fn->raw);
790	jffs2_free_full_dnode(fn);
791	f->metadata = new_fn;
792 out:
793	if (S_ISLNK(JFFS2_F_I_MODE(f)))
794		kfree(mdata);
795	return ret;
796}
797
798static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
799					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
800{
801	struct jffs2_full_dirent *new_fd;
802	struct jffs2_raw_dirent rd;
803	uint32_t alloclen;
804	int ret;
805
806	rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
807	rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
808	rd.nsize = strlen(fd->name);
809	rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
810	rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
811
812	rd.pino = cpu_to_je32(f->inocache->ino);
813	rd.version = cpu_to_je32(++f->highest_version);
814	rd.ino = cpu_to_je32(fd->ino);
815	/* If the times on this inode were set by explicit utime() they can be different,
816	   so refrain from splatting them. */
817	if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
818		rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
819	else
820		rd.mctime = cpu_to_je32(0);
821	rd.type = fd->type;
822	rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
823	rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
824
825	ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
826				JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
827	if (ret) {
828		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
829		       sizeof(rd)+rd.nsize, ret);
830		return ret;
831	}
832	new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
833
834	if (IS_ERR(new_fd)) {
835		printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
836		return PTR_ERR(new_fd);
837	}
838	jffs2_add_fd_to_list(c, new_fd, &f->dents);
839	return 0;
840}
841
842static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
843					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
844{
845	struct jffs2_full_dirent **fdp = &f->dents;
846	int found = 0;
847
848	/* On a medium where we can't actually mark nodes obsolete
849	   pernamently, such as NAND flash, we need to work out
850	   whether this deletion dirent is still needed to actively
851	   delete a 'real' dirent with the same name that's still
852	   somewhere else on the flash. */
853	if (!jffs2_can_mark_obsolete(c)) {
854		struct jffs2_raw_dirent *rd;
855		struct jffs2_raw_node_ref *raw;
856		int ret;
857		size_t retlen;
858		int name_len = strlen(fd->name);
859		uint32_t name_crc = crc32(0, fd->name, name_len);
860		uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
861
862		rd = kmalloc(rawlen, GFP_KERNEL);
863		if (!rd)
864			return -ENOMEM;
865
866		/* Prevent the erase code from nicking the obsolete node refs while
867		   we're looking at them. I really don't like this extra lock but
868		   can't see any alternative. Suggestions on a postcard to... */
869		mutex_lock(&c->erase_free_sem);
870
871		for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
872
873			cond_resched();
874
875			/* We only care about obsolete ones */
876			if (!(ref_obsolete(raw)))
877				continue;
878
879			/* Any dirent with the same name is going to have the same length... */
880			if (ref_totlen(c, NULL, raw) != rawlen)
881				continue;
882
883			/* Doesn't matter if there's one in the same erase block. We're going to
884			   delete it too at the same time. */
885			if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
886				continue;
887
888			D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
889
890			/* This is an obsolete node belonging to the same directory, and it's of the right
891			   length. We need to take a closer look...*/
892			ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
893			if (ret) {
894				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
895				/* If we can't read it, we don't need to continue to obsolete it. Continue */
896				continue;
897			}
898			if (retlen != rawlen) {
899				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
900				       retlen, rawlen, ref_offset(raw));
901				continue;
902			}
903
904			if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
905				continue;
906
907			/* If the name CRC doesn't match, skip */
908			if (je32_to_cpu(rd->name_crc) != name_crc)
909				continue;
910
911			/* If the name length doesn't match, or it's another deletion dirent, skip */
912			if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
913				continue;
914
915			/* OK, check the actual name now */
916			if (memcmp(rd->name, fd->name, name_len))
917				continue;
918
919			/* OK. The name really does match. There really is still an older node on
920			   the flash which our deletion dirent obsoletes. So we have to write out
921			   a new deletion dirent to replace it */
922			mutex_unlock(&c->erase_free_sem);
923
924			D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
925				  ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
926			kfree(rd);
927
928			return jffs2_garbage_collect_dirent(c, jeb, f, fd);
929		}
930
931		mutex_unlock(&c->erase_free_sem);
932		kfree(rd);
933	}
934
935
936	/* No need for it any more. Just mark it obsolete and remove it from the list */
937	while (*fdp) {
938		if ((*fdp) == fd) {
939			found = 1;
940			*fdp = fd->next;
941			break;
942		}
943		fdp = &(*fdp)->next;
944	}
945	if (!found) {
946		printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
947	}
948	jffs2_mark_node_obsolete(c, fd->raw);
949	jffs2_free_full_dirent(fd);
950	return 0;
951}
952
953static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
954				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
955				      uint32_t start, uint32_t end)
956{
957	struct jffs2_raw_inode ri;
958	struct jffs2_node_frag *frag;
959	struct jffs2_full_dnode *new_fn;
960	uint32_t alloclen, ilen;
961	int ret;
962
963	D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
964		  f->inocache->ino, start, end));
965
966	memset(&ri, 0, sizeof(ri));
967
968	if(fn->frags > 1) {
969		size_t readlen;
970		uint32_t crc;
971		/* It's partially obsoleted by a later write. So we have to
972		   write it out again with the _same_ version as before */
973		ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
974		if (readlen != sizeof(ri) || ret) {
975			printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
976			goto fill;
977		}
978		if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
979			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
980			       ref_offset(fn->raw),
981			       je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
982			return -EIO;
983		}
984		if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
985			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
986			       ref_offset(fn->raw),
987			       je32_to_cpu(ri.totlen), sizeof(ri));
988			return -EIO;
989		}
990		crc = crc32(0, &ri, sizeof(ri)-8);
991		if (crc != je32_to_cpu(ri.node_crc)) {
992			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
993			       ref_offset(fn->raw),
994			       je32_to_cpu(ri.node_crc), crc);
995			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
996			       start, end, f->inocache->ino);
997			goto fill;
998		}
999		if (ri.compr != JFFS2_COMPR_ZERO) {
1000			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
1001			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1002			       start, end, f->inocache->ino);
1003			goto fill;
1004		}
1005	} else {
1006	fill:
1007		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1008		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1009		ri.totlen = cpu_to_je32(sizeof(ri));
1010		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1011
1012		ri.ino = cpu_to_je32(f->inocache->ino);
1013		ri.version = cpu_to_je32(++f->highest_version);
1014		ri.offset = cpu_to_je32(start);
1015		ri.dsize = cpu_to_je32(end - start);
1016		ri.csize = cpu_to_je32(0);
1017		ri.compr = JFFS2_COMPR_ZERO;
1018	}
1019
1020	frag = frag_last(&f->fragtree);
1021	if (frag)
1022		/* Fetch the inode length from the fragtree rather then
1023		 * from i_size since i_size may have not been updated yet */
1024		ilen = frag->ofs + frag->size;
1025	else
1026		ilen = JFFS2_F_I_SIZE(f);
1027
1028	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1029	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1030	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1031	ri.isize = cpu_to_je32(ilen);
1032	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1033	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1034	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1035	ri.data_crc = cpu_to_je32(0);
1036	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1037
1038	ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1039				     JFFS2_SUMMARY_INODE_SIZE);
1040	if (ret) {
1041		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1042		       sizeof(ri), ret);
1043		return ret;
1044	}
1045	new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1046
1047	if (IS_ERR(new_fn)) {
1048		printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1049		return PTR_ERR(new_fn);
1050	}
1051	if (je32_to_cpu(ri.version) == f->highest_version) {
1052		jffs2_add_full_dnode_to_inode(c, f, new_fn);
1053		if (f->metadata) {
1054			jffs2_mark_node_obsolete(c, f->metadata->raw);
1055			jffs2_free_full_dnode(f->metadata);
1056			f->metadata = NULL;
1057		}
1058		return 0;
1059	}
1060
1061	/*
1062	 * We should only get here in the case where the node we are
1063	 * replacing had more than one frag, so we kept the same version
1064	 * number as before. (Except in case of error -- see 'goto fill;'
1065	 * above.)
1066	 */
1067	D1(if(unlikely(fn->frags <= 1)) {
1068		printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1069		       fn->frags, je32_to_cpu(ri.version), f->highest_version,
1070		       je32_to_cpu(ri.ino));
1071	});
1072
1073	/* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1074	mark_ref_normal(new_fn->raw);
1075
1076	for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1077	     frag; frag = frag_next(frag)) {
1078		if (frag->ofs > fn->size + fn->ofs)
1079			break;
1080		if (frag->node == fn) {
1081			frag->node = new_fn;
1082			new_fn->frags++;
1083			fn->frags--;
1084		}
1085	}
1086	if (fn->frags) {
1087		printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1088		BUG();
1089	}
1090	if (!new_fn->frags) {
1091		printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1092		BUG();
1093	}
1094
1095	jffs2_mark_node_obsolete(c, fn->raw);
1096	jffs2_free_full_dnode(fn);
1097
1098	return 0;
1099}
1100
1101static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1102				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1103				       uint32_t start, uint32_t end)
1104{
1105	struct jffs2_full_dnode *new_fn;
1106	struct jffs2_raw_inode ri;
1107	uint32_t alloclen, offset, orig_end, orig_start;
1108	int ret = 0;
1109	unsigned char *comprbuf = NULL, *writebuf;
1110	unsigned long pg;
1111	unsigned char *pg_ptr;
1112
1113	memset(&ri, 0, sizeof(ri));
1114
1115	D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1116		  f->inocache->ino, start, end));
1117
1118	orig_end = end;
1119	orig_start = start;
1120
1121	if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1122		/* Attempt to do some merging. But only expand to cover logically
1123		   adjacent frags if the block containing them is already considered
1124		   to be dirty. Otherwise we end up with GC just going round in
1125		   circles dirtying the nodes it already wrote out, especially
1126		   on NAND where we have small eraseblocks and hence a much higher
1127		   chance of nodes having to be split to cross boundaries. */
1128
1129		struct jffs2_node_frag *frag;
1130		uint32_t min, max;
1131
1132		min = start & ~(PAGE_CACHE_SIZE-1);
1133		max = min + PAGE_CACHE_SIZE;
1134
1135		frag = jffs2_lookup_node_frag(&f->fragtree, start);
1136
1137		/* BUG_ON(!frag) but that'll happen anyway... */
1138
1139		BUG_ON(frag->ofs != start);
1140
1141		/* First grow down... */
1142		while((frag = frag_prev(frag)) && frag->ofs >= min) {
1143
1144			/* If the previous frag doesn't even reach the beginning, there's
1145			   excessive fragmentation. Just merge. */
1146			if (frag->ofs > min) {
1147				D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1148					  frag->ofs, frag->ofs+frag->size));
1149				start = frag->ofs;
1150				continue;
1151			}
1152			/* OK. This frag holds the first byte of the page. */
1153			if (!frag->node || !frag->node->raw) {
1154				D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1155					  frag->ofs, frag->ofs+frag->size));
1156				break;
1157			} else {
1158
1159				/* OK, it's a frag which extends to the beginning of the page. Does it live
1160				   in a block which is still considered clean? If so, don't obsolete it.
1161				   If not, cover it anyway. */
1162
1163				struct jffs2_raw_node_ref *raw = frag->node->raw;
1164				struct jffs2_eraseblock *jeb;
1165
1166				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1167
1168				if (jeb == c->gcblock) {
1169					D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1170						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1171					start = frag->ofs;
1172					break;
1173				}
1174				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1175					D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1176						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1177					break;
1178				}
1179
1180				D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1181						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1182				start = frag->ofs;
1183				break;
1184			}
1185		}
1186
1187		/* ... then up */
1188
1189		/* Find last frag which is actually part of the node we're to GC. */
1190		frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1191
1192		while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1193
1194			/* If the previous frag doesn't even reach the beginning, there's lots
1195			   of fragmentation. Just merge. */
1196			if (frag->ofs+frag->size < max) {
1197				D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1198					  frag->ofs, frag->ofs+frag->size));
1199				end = frag->ofs + frag->size;
1200				continue;
1201			}
1202
1203			if (!frag->node || !frag->node->raw) {
1204				D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1205					  frag->ofs, frag->ofs+frag->size));
1206				break;
1207			} else {
1208
1209				/* OK, it's a frag which extends to the beginning of the page. Does it live
1210				   in a block which is still considered clean? If so, don't obsolete it.
1211				   If not, cover it anyway. */
1212
1213				struct jffs2_raw_node_ref *raw = frag->node->raw;
1214				struct jffs2_eraseblock *jeb;
1215
1216				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1217
1218				if (jeb == c->gcblock) {
1219					D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1220						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1221					end = frag->ofs + frag->size;
1222					break;
1223				}
1224				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1225					D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1226						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1227					break;
1228				}
1229
1230				D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1231						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1232				end = frag->ofs + frag->size;
1233				break;
1234			}
1235		}
1236		D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1237			  orig_start, orig_end, start, end));
1238
1239		D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1240		BUG_ON(end < orig_end);
1241		BUG_ON(start > orig_start);
1242	}
1243
1244	/* First, use readpage() to read the appropriate page into the page cache */
1245	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1246	 *    triggered garbage collection in the first place?
1247	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1248	 *    page OK. We'll actually write it out again in commit_write, which is a little
1249	 *    suboptimal, but at least we're correct.
1250	 */
1251	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1252
1253	if (IS_ERR(pg_ptr)) {
1254		printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1255		return PTR_ERR(pg_ptr);
1256	}
1257
1258	offset = start;
1259	while(offset < orig_end) {
1260		uint32_t datalen;
1261		uint32_t cdatalen;
1262		uint16_t comprtype = JFFS2_COMPR_NONE;
1263
1264		ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1265					&alloclen, JFFS2_SUMMARY_INODE_SIZE);
1266
1267		if (ret) {
1268			printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1269			       sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1270			break;
1271		}
1272		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1273		datalen = end - offset;
1274
1275		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1276
1277		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1278
1279		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1280		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1281		ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1282		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1283
1284		ri.ino = cpu_to_je32(f->inocache->ino);
1285		ri.version = cpu_to_je32(++f->highest_version);
1286		ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1287		ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1288		ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1289		ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1290		ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1291		ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1292		ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1293		ri.offset = cpu_to_je32(offset);
1294		ri.csize = cpu_to_je32(cdatalen);
1295		ri.dsize = cpu_to_je32(datalen);
1296		ri.compr = comprtype & 0xff;
1297		ri.usercompr = (comprtype >> 8) & 0xff;
1298		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1299		ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1300
1301		new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1302
1303		jffs2_free_comprbuf(comprbuf, writebuf);
1304
1305		if (IS_ERR(new_fn)) {
1306			printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1307			ret = PTR_ERR(new_fn);
1308			break;
1309		}
1310		ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1311		offset += datalen;
1312		if (f->metadata) {
1313			jffs2_mark_node_obsolete(c, f->metadata->raw);
1314			jffs2_free_full_dnode(f->metadata);
1315			f->metadata = NULL;
1316		}
1317	}
1318
1319	jffs2_gc_release_page(c, pg_ptr, &pg);
1320	return ret;
1321}
1322