1/*
2 *  linux/fs/ext3/balloc.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10 *  Big-endian to little-endian byte-swapping/bitmaps by
11 *        David S. Miller (davem@caip.rutgers.edu), 1995
12 */
13
14#include <linux/config.h>
15#include <linux/sched.h>
16#include <linux/fs.h>
17#include <linux/jbd.h>
18#include <linux/ext3_fs.h>
19#include <linux/ext3_jbd.h>
20#include <linux/locks.h>
21#include <linux/quotaops.h>
22
23/*
24 * balloc.c contains the blocks allocation and deallocation routines
25 */
26
27/*
28 * The free blocks are managed by bitmaps.  A file system contains several
29 * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
30 * block for inodes, N blocks for the inode table and data blocks.
31 *
32 * The file system contains group descriptors which are located after the
33 * super block.  Each descriptor contains the number of the bitmap block and
34 * the free blocks count in the block.  The descriptors are loaded in memory
35 * when a file system is mounted (see ext3_read_super).
36 */
37
38
39#define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
40
41struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
42					     unsigned int block_group,
43					     struct buffer_head ** bh)
44{
45	unsigned long group_desc;
46	unsigned long desc;
47	struct ext3_group_desc * gdp;
48
49	if (block_group >= sb->u.ext3_sb.s_groups_count) {
50		ext3_error (sb, "ext3_get_group_desc",
51			    "block_group >= groups_count - "
52			    "block_group = %d, groups_count = %lu",
53			    block_group, sb->u.ext3_sb.s_groups_count);
54
55		return NULL;
56	}
57
58	group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
59	desc = block_group % EXT3_DESC_PER_BLOCK(sb);
60	if (!sb->u.ext3_sb.s_group_desc[group_desc]) {
61		ext3_error (sb, "ext3_get_group_desc",
62			    "Group descriptor not loaded - "
63			    "block_group = %d, group_desc = %lu, desc = %lu",
64			     block_group, group_desc, desc);
65		return NULL;
66	}
67
68	gdp = (struct ext3_group_desc *)
69	      sb->u.ext3_sb.s_group_desc[group_desc]->b_data;
70	if (bh)
71		*bh = sb->u.ext3_sb.s_group_desc[group_desc];
72	return gdp + desc;
73}
74
75/*
76 * Read the bitmap for a given block_group, reading into the specified
77 * slot in the superblock's bitmap cache.
78 *
79 * Return >=0 on success or a -ve error code.
80 */
81
82static int read_block_bitmap (struct super_block * sb,
83			       unsigned int block_group,
84			       unsigned long bitmap_nr)
85{
86	struct ext3_group_desc * gdp;
87	struct buffer_head * bh = NULL;
88	int retval = -EIO;
89
90	gdp = ext3_get_group_desc (sb, block_group, NULL);
91	if (!gdp)
92		goto error_out;
93	retval = 0;
94	bh = sb_bread(sb, le32_to_cpu(gdp->bg_block_bitmap));
95	if (!bh) {
96		ext3_error (sb, "read_block_bitmap",
97			    "Cannot read block bitmap - "
98			    "block_group = %d, block_bitmap = %lu",
99			    block_group, (unsigned long) gdp->bg_block_bitmap);
100		retval = -EIO;
101	}
102	/*
103	 * On IO error, just leave a zero in the superblock's block pointer for
104	 * this group.  The IO will be retried next time.
105	 */
106error_out:
107	sb->u.ext3_sb.s_block_bitmap_number[bitmap_nr] = block_group;
108	sb->u.ext3_sb.s_block_bitmap[bitmap_nr] = bh;
109	return retval;
110}
111
112/*
113 * load_block_bitmap loads the block bitmap for a blocks group
114 *
115 * It maintains a cache for the last bitmaps loaded.  This cache is managed
116 * with a LRU algorithm.
117 *
118 * Notes:
119 * 1/ There is one cache per mounted file system.
120 * 2/ If the file system contains less than EXT3_MAX_GROUP_LOADED groups,
121 *    this function reads the bitmap without maintaining a LRU cache.
122 *
123 * Return the slot used to store the bitmap, or a -ve error code.
124 */
125static int __load_block_bitmap (struct super_block * sb,
126			        unsigned int block_group)
127{
128	int i, j, retval = 0;
129	unsigned long block_bitmap_number;
130	struct buffer_head * block_bitmap;
131
132	if (block_group >= sb->u.ext3_sb.s_groups_count)
133		ext3_panic (sb, "load_block_bitmap",
134			    "block_group >= groups_count - "
135			    "block_group = %d, groups_count = %lu",
136			    block_group, sb->u.ext3_sb.s_groups_count);
137
138	if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED) {
139		if (sb->u.ext3_sb.s_block_bitmap[block_group]) {
140			if (sb->u.ext3_sb.s_block_bitmap_number[block_group] ==
141			    block_group)
142				return block_group;
143			ext3_error (sb, "__load_block_bitmap",
144				    "block_group != block_bitmap_number");
145		}
146		retval = read_block_bitmap (sb, block_group, block_group);
147		if (retval < 0)
148			return retval;
149		return block_group;
150	}
151
152	for (i = 0; i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
153		    sb->u.ext3_sb.s_block_bitmap_number[i] != block_group; i++)
154		;
155	if (i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
156  	    sb->u.ext3_sb.s_block_bitmap_number[i] == block_group) {
157		block_bitmap_number = sb->u.ext3_sb.s_block_bitmap_number[i];
158		block_bitmap = sb->u.ext3_sb.s_block_bitmap[i];
159		for (j = i; j > 0; j--) {
160			sb->u.ext3_sb.s_block_bitmap_number[j] =
161				sb->u.ext3_sb.s_block_bitmap_number[j - 1];
162			sb->u.ext3_sb.s_block_bitmap[j] =
163				sb->u.ext3_sb.s_block_bitmap[j - 1];
164		}
165		sb->u.ext3_sb.s_block_bitmap_number[0] = block_bitmap_number;
166		sb->u.ext3_sb.s_block_bitmap[0] = block_bitmap;
167
168		/*
169		 * There's still one special case here --- if block_bitmap == 0
170		 * then our last attempt to read the bitmap failed and we have
171		 * just ended up caching that failure.  Try again to read it.
172		 */
173		if (!block_bitmap)
174			retval = read_block_bitmap (sb, block_group, 0);
175	} else {
176		if (sb->u.ext3_sb.s_loaded_block_bitmaps<EXT3_MAX_GROUP_LOADED)
177			sb->u.ext3_sb.s_loaded_block_bitmaps++;
178		else
179			brelse (sb->u.ext3_sb.s_block_bitmap
180					[EXT3_MAX_GROUP_LOADED - 1]);
181		for (j = sb->u.ext3_sb.s_loaded_block_bitmaps - 1;
182					j > 0;  j--) {
183			sb->u.ext3_sb.s_block_bitmap_number[j] =
184				sb->u.ext3_sb.s_block_bitmap_number[j - 1];
185			sb->u.ext3_sb.s_block_bitmap[j] =
186				sb->u.ext3_sb.s_block_bitmap[j - 1];
187		}
188		retval = read_block_bitmap (sb, block_group, 0);
189	}
190	return retval;
191}
192
193/*
194 * Load the block bitmap for a given block group.  First of all do a couple
195 * of fast lookups for common cases and then pass the request onto the guts
196 * of the bitmap loader.
197 *
198 * Return the slot number of the group in the superblock bitmap cache's on
199 * success, or a -ve error code.
200 *
201 * There is still one inconsistency here --- if the number of groups in this
202 * filesystems is <= EXT3_MAX_GROUP_LOADED, then we have no way of
203 * differentiating between a group for which we have never performed a bitmap
204 * IO request, and a group for which the last bitmap read request failed.
205 */
206static inline int load_block_bitmap (struct super_block * sb,
207				     unsigned int block_group)
208{
209	int slot;
210
211	/*
212	 * Do the lookup for the slot.  First of all, check if we're asking
213	 * for the same slot as last time, and did we succeed that last time?
214	 */
215	if (sb->u.ext3_sb.s_loaded_block_bitmaps > 0 &&
216	    sb->u.ext3_sb.s_block_bitmap_number[0] == block_group &&
217	    sb->u.ext3_sb.s_block_bitmap[0]) {
218		return 0;
219	}
220	/*
221	 * Or can we do a fast lookup based on a loaded group on a filesystem
222	 * small enough to be mapped directly into the superblock?
223	 */
224	else if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED &&
225		 sb->u.ext3_sb.s_block_bitmap_number[block_group]==block_group
226			&& sb->u.ext3_sb.s_block_bitmap[block_group]) {
227		slot = block_group;
228	}
229	/*
230	 * If not, then do a full lookup for this block group.
231	 */
232	else {
233		slot = __load_block_bitmap (sb, block_group);
234	}
235
236	/*
237	 * <0 means we just got an error
238	 */
239	if (slot < 0)
240		return slot;
241
242	/*
243	 * If it's a valid slot, we may still have cached a previous IO error,
244	 * in which case the bh in the superblock cache will be zero.
245	 */
246	if (!sb->u.ext3_sb.s_block_bitmap[slot])
247		return -EIO;
248
249	/*
250	 * Must have been read in OK to get this far.
251	 */
252	return slot;
253}
254
255/* Free given blocks, update quota and i_blocks field */
256void ext3_free_blocks (handle_t *handle, struct inode * inode,
257			unsigned long block, unsigned long count)
258{
259	struct buffer_head *bitmap_bh;
260	struct buffer_head *gd_bh;
261	unsigned long block_group;
262	unsigned long bit;
263	unsigned long i;
264	int bitmap_nr;
265	unsigned long overflow;
266	struct super_block * sb;
267	struct ext3_group_desc * gdp;
268	struct ext3_super_block * es;
269	int err = 0, ret;
270	int dquot_freed_blocks = 0;
271
272	sb = inode->i_sb;
273	if (!sb) {
274		printk ("ext3_free_blocks: nonexistent device");
275		return;
276	}
277	lock_super (sb);
278	es = sb->u.ext3_sb.s_es;
279	if (block < le32_to_cpu(es->s_first_data_block) ||
280	    (block + count) > le32_to_cpu(es->s_blocks_count)) {
281		ext3_error (sb, "ext3_free_blocks",
282			    "Freeing blocks not in datazone - "
283			    "block = %lu, count = %lu", block, count);
284		goto error_return;
285	}
286
287	ext3_debug ("freeing block %lu\n", block);
288
289do_more:
290	overflow = 0;
291	block_group = (block - le32_to_cpu(es->s_first_data_block)) /
292		      EXT3_BLOCKS_PER_GROUP(sb);
293	bit = (block - le32_to_cpu(es->s_first_data_block)) %
294		      EXT3_BLOCKS_PER_GROUP(sb);
295	/*
296	 * Check to see if we are freeing blocks across a group
297	 * boundary.
298	 */
299	if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
300		overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
301		count -= overflow;
302	}
303	bitmap_nr = load_block_bitmap (sb, block_group);
304	if (bitmap_nr < 0)
305		goto error_return;
306
307	bitmap_bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
308	gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
309	if (!gdp)
310		goto error_return;
311
312	if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
313	    in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
314	    in_range (block, le32_to_cpu(gdp->bg_inode_table),
315		      sb->u.ext3_sb.s_itb_per_group) ||
316	    in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
317		      sb->u.ext3_sb.s_itb_per_group))
318		ext3_error (sb, "ext3_free_blocks",
319			    "Freeing blocks in system zones - "
320			    "Block = %lu, count = %lu",
321			    block, count);
322
323	/*
324	 * We are about to start releasing blocks in the bitmap,
325	 * so we need undo access.
326	 */
327	/* @@@ check errors */
328	BUFFER_TRACE(bitmap_bh, "getting undo access");
329	err = ext3_journal_get_undo_access(handle, bitmap_bh);
330	if (err)
331		goto error_return;
332
333	/*
334	 * We are about to modify some metadata.  Call the journal APIs
335	 * to unshare ->b_data if a currently-committing transaction is
336	 * using it
337	 */
338	BUFFER_TRACE(gd_bh, "get_write_access");
339	err = ext3_journal_get_write_access(handle, gd_bh);
340	if (err)
341		goto error_return;
342
343	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
344	err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
345	if (err)
346		goto error_return;
347
348	for (i = 0; i < count; i++) {
349		/*
350		 * An HJ special.  This is expensive...
351		 */
352#ifdef CONFIG_JBD_DEBUG
353		{
354			struct buffer_head *debug_bh;
355			debug_bh = sb_get_hash_table(sb, block + i);
356			if (debug_bh) {
357				BUFFER_TRACE(debug_bh, "Deleted!");
358				if (!bh2jh(bitmap_bh)->b_committed_data)
359					BUFFER_TRACE(debug_bh,
360						"No commited data in bitmap");
361				BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
362				__brelse(debug_bh);
363			}
364		}
365#endif
366		BUFFER_TRACE(bitmap_bh, "clear bit");
367		if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) {
368			ext3_error (sb, __FUNCTION__,
369				      "bit already cleared for block %lu",
370				      block + i);
371			BUFFER_TRACE(bitmap_bh, "bit already cleared");
372		} else {
373			dquot_freed_blocks++;
374			gdp->bg_free_blocks_count =
375			  cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)+1);
376			es->s_free_blocks_count =
377			  cpu_to_le32(le32_to_cpu(es->s_free_blocks_count)+1);
378		}
379		/* @@@ This prevents newly-allocated data from being
380		 * freed and then reallocated within the same
381		 * transaction.
382		 *
383		 * Ideally we would want to allow that to happen, but to
384		 * do so requires making journal_forget() capable of
385		 * revoking the queued write of a data block, which
386		 * implies blocking on the journal lock.  *forget()
387		 * cannot block due to truncate races.
388		 *
389		 * Eventually we can fix this by making journal_forget()
390		 * return a status indicating whether or not it was able
391		 * to revoke the buffer.  On successful revoke, it is
392		 * safe not to set the allocation bit in the committed
393		 * bitmap, because we know that there is no outstanding
394		 * activity on the buffer any more and so it is safe to
395		 * reallocate it.
396		 */
397		BUFFER_TRACE(bitmap_bh, "clear in b_committed_data");
398		J_ASSERT_BH(bitmap_bh,
399				bh2jh(bitmap_bh)->b_committed_data != NULL);
400		ext3_set_bit(bit + i, bh2jh(bitmap_bh)->b_committed_data);
401	}
402
403	/* We dirtied the bitmap block */
404	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
405	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
406
407	/* And the group descriptor block */
408	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
409	ret = ext3_journal_dirty_metadata(handle, gd_bh);
410	if (!err) err = ret;
411
412	/* And the superblock */
413	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock");
414	ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
415	if (!err) err = ret;
416
417	if (overflow && !err) {
418		block += count;
419		count = overflow;
420		goto do_more;
421	}
422	sb->s_dirt = 1;
423error_return:
424	ext3_std_error(sb, err);
425	unlock_super(sb);
426	if (dquot_freed_blocks)
427		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
428	return;
429}
430
431/* For ext3 allocations, we must not reuse any blocks which are
432 * allocated in the bitmap buffer's "last committed data" copy.  This
433 * prevents deletes from freeing up the page for reuse until we have
434 * committed the delete transaction.
435 *
436 * If we didn't do this, then deleting something and reallocating it as
437 * data would allow the old block to be overwritten before the
438 * transaction committed (because we force data to disk before commit).
439 * This would lead to corruption if we crashed between overwriting the
440 * data and committing the delete.
441 *
442 * @@@ We may want to make this allocation behaviour conditional on
443 * data-writes at some point, and disable it for metadata allocations or
444 * sync-data inodes.
445 */
446static int ext3_test_allocatable(int nr, struct buffer_head *bh)
447{
448	if (ext3_test_bit(nr, bh->b_data))
449		return 0;
450	if (!buffer_jbd(bh) || !bh2jh(bh)->b_committed_data)
451		return 1;
452	return !ext3_test_bit(nr, bh2jh(bh)->b_committed_data);
453}
454
455/*
456 * Find an allocatable block in a bitmap.  We honour both the bitmap and
457 * its last-committed copy (if that exists), and perform the "most
458 * appropriate allocation" algorithm of looking for a free block near
459 * the initial goal; then for a free byte somewhere in the bitmap; then
460 * for any free bit in the bitmap.
461 */
462static int find_next_usable_block(int start,
463			struct buffer_head *bh, int maxblocks)
464{
465	int here, next;
466	char *p, *r;
467
468	if (start > 0) {
469		/*
470		 * The goal was occupied; search forward for a free
471		 * block within the next XX blocks.
472		 *
473		 * end_goal is more or less random, but it has to be
474		 * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
475		 * next 64-bit boundary is simple..
476		 */
477		int end_goal = (start + 63) & ~63;
478		here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
479		if (here < end_goal && ext3_test_allocatable(here, bh))
480			return here;
481
482		ext3_debug ("Bit not found near goal\n");
483
484	}
485
486	here = start;
487	if (here < 0)
488		here = 0;
489
490	/*
491	 * There has been no free block found in the near vicinity of
492	 * the goal: do a search forward through the block groups,
493	 * searching in each group first for an entire free byte in the
494	 * bitmap and then for any free bit.
495	 *
496	 * Search first in the remainder of the current group
497	 */
498	p = ((char *) bh->b_data) + (here >> 3);
499	r = memscan(p, 0, (maxblocks - here + 7) >> 3);
500	next = (r - ((char *) bh->b_data)) << 3;
501
502	if (next < maxblocks && ext3_test_allocatable(next, bh))
503		return next;
504
505	/* The bitmap search --- search forward alternately
506	 * through the actual bitmap and the last-committed copy
507	 * until we find a bit free in both. */
508
509	while (here < maxblocks) {
510		next  = ext3_find_next_zero_bit ((unsigned long *) bh->b_data,
511						 maxblocks, here);
512		if (next >= maxblocks)
513			return -1;
514		if (ext3_test_allocatable(next, bh))
515			return next;
516
517		J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data);
518		here = ext3_find_next_zero_bit
519			((unsigned long *) bh2jh(bh)->b_committed_data,
520			 maxblocks, next);
521	}
522	return -1;
523}
524
525/*
526 * ext3_new_block uses a goal block to assist allocation.  If the goal is
527 * free, or there is a free block within 32 blocks of the goal, that block
528 * is allocated.  Otherwise a forward search is made for a free block; within
529 * each block group the search first looks for an entire free byte in the block
530 * bitmap, and then for any free bit if that fails.
531 * This function also updates quota and i_blocks field.
532 */
533int ext3_new_block (handle_t *handle, struct inode * inode,
534		unsigned long goal, u32 * prealloc_count,
535		u32 * prealloc_block, int * errp)
536{
537	struct buffer_head * bh, *bhtmp;
538	struct buffer_head * bh2;
539	int i, j, k, tmp, alloctmp;
540	int bitmap_nr;
541	int fatal = 0, err;
542	int performed_allocation = 0;
543	struct super_block * sb;
544	struct ext3_group_desc * gdp;
545	struct ext3_super_block * es;
546#ifdef EXT3FS_DEBUG
547	static int goal_hits = 0, goal_attempts = 0;
548#endif
549	*errp = -ENOSPC;
550	sb = inode->i_sb;
551	if (!sb) {
552		printk ("ext3_new_block: nonexistent device");
553		return 0;
554	}
555
556	/*
557	 * Check quota for allocation of this block.
558	 */
559	if (DQUOT_ALLOC_BLOCK(inode, 1)) {
560		*errp = -EDQUOT;
561		return 0;
562	}
563
564	lock_super (sb);
565	es = sb->u.ext3_sb.s_es;
566	if (le32_to_cpu(es->s_free_blocks_count) <=
567			le32_to_cpu(es->s_r_blocks_count) &&
568	    ((sb->u.ext3_sb.s_resuid != current->fsuid) &&
569	     (sb->u.ext3_sb.s_resgid == 0 ||
570	      !in_group_p (sb->u.ext3_sb.s_resgid)) &&
571	     !capable(CAP_SYS_RESOURCE)))
572		goto out;
573
574	ext3_debug ("goal=%lu.\n", goal);
575
576	/*
577	 * First, test whether the goal block is free.
578	 */
579	if (goal < le32_to_cpu(es->s_first_data_block) ||
580	    goal >= le32_to_cpu(es->s_blocks_count))
581		goal = le32_to_cpu(es->s_first_data_block);
582	i = (goal - le32_to_cpu(es->s_first_data_block)) /
583			EXT3_BLOCKS_PER_GROUP(sb);
584	gdp = ext3_get_group_desc (sb, i, &bh2);
585	if (!gdp)
586		goto io_error;
587
588	if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
589		j = ((goal - le32_to_cpu(es->s_first_data_block)) %
590				EXT3_BLOCKS_PER_GROUP(sb));
591#ifdef EXT3FS_DEBUG
592		if (j)
593			goal_attempts++;
594#endif
595		bitmap_nr = load_block_bitmap (sb, i);
596		if (bitmap_nr < 0)
597			goto io_error;
598
599		bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
600
601		ext3_debug ("goal is at %d:%d.\n", i, j);
602
603		if (ext3_test_allocatable(j, bh)) {
604#ifdef EXT3FS_DEBUG
605			goal_hits++;
606			ext3_debug ("goal bit allocated.\n");
607#endif
608			goto got_block;
609		}
610
611		j = find_next_usable_block(j, bh, EXT3_BLOCKS_PER_GROUP(sb));
612		if (j >= 0)
613			goto search_back;
614	}
615
616	ext3_debug ("Bit not found in block group %d.\n", i);
617
618	/*
619	 * Now search the rest of the groups.  We assume that
620	 * i and gdp correctly point to the last group visited.
621	 */
622	for (k = 0; k < sb->u.ext3_sb.s_groups_count; k++) {
623		i++;
624		if (i >= sb->u.ext3_sb.s_groups_count)
625			i = 0;
626		gdp = ext3_get_group_desc (sb, i, &bh2);
627		if (!gdp) {
628			*errp = -EIO;
629			goto out;
630		}
631		if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
632			bitmap_nr = load_block_bitmap (sb, i);
633			if (bitmap_nr < 0)
634				goto io_error;
635
636			bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
637			j = find_next_usable_block(-1, bh,
638						   EXT3_BLOCKS_PER_GROUP(sb));
639			if (j >= 0)
640				goto search_back;
641		}
642	}
643
644	/* No space left on the device */
645	goto out;
646
647search_back:
648	/*
649	 * We have succeeded in finding a free byte in the block
650	 * bitmap.  Now search backwards up to 7 bits to find the
651	 * start of this group of free blocks.
652	 */
653	for (	k = 0;
654		k < 7 && j > 0 && ext3_test_allocatable(j - 1, bh);
655		k++, j--)
656		;
657
658got_block:
659
660	ext3_debug ("using block group %d(%d)\n", i, gdp->bg_free_blocks_count);
661
662	/* Make sure we use undo access for the bitmap, because it is
663           critical that we do the frozen_data COW on bitmap buffers in
664           all cases even if the buffer is in BJ_Forget state in the
665           committing transaction.  */
666	BUFFER_TRACE(bh, "get undo access for marking new block");
667	fatal = ext3_journal_get_undo_access(handle, bh);
668	if (fatal) goto out;
669
670	BUFFER_TRACE(bh2, "get_write_access");
671	fatal = ext3_journal_get_write_access(handle, bh2);
672	if (fatal) goto out;
673
674	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
675	fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
676	if (fatal) goto out;
677
678	tmp = j + i * EXT3_BLOCKS_PER_GROUP(sb)
679				+ le32_to_cpu(es->s_first_data_block);
680
681	if (tmp == le32_to_cpu(gdp->bg_block_bitmap) ||
682	    tmp == le32_to_cpu(gdp->bg_inode_bitmap) ||
683	    in_range (tmp, le32_to_cpu(gdp->bg_inode_table),
684		      sb->u.ext3_sb.s_itb_per_group))
685		ext3_error (sb, "ext3_new_block",
686			    "Allocating block in system zone - "
687			    "block = %u", tmp);
688
689	/* The superblock lock should guard against anybody else beating
690	 * us to this point! */
691	J_ASSERT_BH(bh, !ext3_test_bit(j, bh->b_data));
692	BUFFER_TRACE(bh, "setting bitmap bit");
693	ext3_set_bit(j, bh->b_data);
694	performed_allocation = 1;
695
696#ifdef CONFIG_JBD_DEBUG
697	{
698		struct buffer_head *debug_bh;
699
700		/* Record bitmap buffer state in the newly allocated block */
701		debug_bh = sb_get_hash_table(sb, tmp);
702		if (debug_bh) {
703			BUFFER_TRACE(debug_bh, "state when allocated");
704			BUFFER_TRACE2(debug_bh, bh, "bitmap state");
705			brelse(debug_bh);
706		}
707	}
708#endif
709	if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data)
710		J_ASSERT_BH(bh, !ext3_test_bit(j, bh2jh(bh)->b_committed_data));
711	bhtmp = bh;
712	alloctmp = j;
713
714	ext3_debug ("found bit %d\n", j);
715
716	/*
717	 * Do block preallocation now if required.
718	 */
719#ifdef EXT3_PREALLOCATE
720	/*
721	 * akpm: this is not enabled for ext3.  Need to use
722	 * ext3_test_allocatable()
723	 */
724	/* Writer: ->i_prealloc* */
725	if (prealloc_count && !*prealloc_count) {
726		int	prealloc_goal;
727		unsigned long next_block = tmp + 1;
728
729		prealloc_goal = es->s_prealloc_blocks ?
730			es->s_prealloc_blocks : EXT3_DEFAULT_PREALLOC_BLOCKS;
731
732		*prealloc_block = next_block;
733		/* Writer: end */
734		for (k = 1;
735		     k < prealloc_goal && (j + k) < EXT3_BLOCKS_PER_GROUP(sb);
736		     k++, next_block++) {
737			if (DQUOT_PREALLOC_BLOCK(inode, 1))
738				break;
739			/* Writer: ->i_prealloc* */
740			if (*prealloc_block + *prealloc_count != next_block ||
741			    ext3_set_bit (j + k, bh->b_data)) {
742				/* Writer: end */
743				DQUOT_FREE_BLOCK(inode, 1);
744 				break;
745			}
746			(*prealloc_count)++;
747			/* Writer: end */
748		}
749		/*
750		 * As soon as we go for per-group spinlocks we'll need these
751		 * done inside the loop above.
752		 */
753		gdp->bg_free_blocks_count =
754			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
755			       (k - 1));
756		es->s_free_blocks_count =
757			cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) -
758			       (k - 1));
759		ext3_debug ("Preallocated a further %lu bits.\n",
760			       (k - 1));
761	}
762#endif
763
764	j = tmp;
765
766	BUFFER_TRACE(bh, "journal_dirty_metadata for bitmap block");
767	err = ext3_journal_dirty_metadata(handle, bh);
768	if (!fatal) fatal = err;
769
770	if (j >= le32_to_cpu(es->s_blocks_count)) {
771		ext3_error (sb, "ext3_new_block",
772			    "block(%d) >= blocks count(%d) - "
773			    "block_group = %d, es == %p ",j,
774			le32_to_cpu(es->s_blocks_count), i, es);
775		goto out;
776	}
777
778	/*
779	 * It is up to the caller to add the new buffer to a journal
780	 * list of some description.  We don't know in advance whether
781	 * the caller wants to use it as metadata or data.
782	 */
783
784	ext3_debug ("allocating block %d. "
785		    "Goal hits %d of %d.\n", j, goal_hits, goal_attempts);
786
787	gdp->bg_free_blocks_count =
788			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
789	es->s_free_blocks_count =
790			cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - 1);
791
792	BUFFER_TRACE(bh2, "journal_dirty_metadata for group descriptor");
793	err = ext3_journal_dirty_metadata(handle, bh2);
794	if (!fatal) fatal = err;
795
796	BUFFER_TRACE(bh, "journal_dirty_metadata for superblock");
797	err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
798	if (!fatal) fatal = err;
799
800	sb->s_dirt = 1;
801	if (fatal)
802		goto out;
803
804	unlock_super (sb);
805	*errp = 0;
806	return j;
807
808io_error:
809	*errp = -EIO;
810out:
811	if (fatal) {
812		*errp = fatal;
813		ext3_std_error(sb, fatal);
814	}
815	unlock_super (sb);
816	/*
817	 * Undo the block allocation
818	 */
819	if (!performed_allocation)
820		DQUOT_FREE_BLOCK(inode, 1);
821	return 0;
822
823}
824
825unsigned long ext3_count_free_blocks (struct super_block * sb)
826{
827#ifdef EXT3FS_DEBUG
828	struct ext3_super_block * es;
829	unsigned long desc_count, bitmap_count, x;
830	int bitmap_nr;
831	struct ext3_group_desc * gdp;
832	int i;
833
834	lock_super (sb);
835	es = sb->u.ext3_sb.s_es;
836	desc_count = 0;
837	bitmap_count = 0;
838	gdp = NULL;
839	for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
840		gdp = ext3_get_group_desc (sb, i, NULL);
841		if (!gdp)
842			continue;
843		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
844		bitmap_nr = load_block_bitmap (sb, i);
845		if (bitmap_nr < 0)
846			continue;
847
848		x = ext3_count_free (sb->u.ext3_sb.s_block_bitmap[bitmap_nr],
849				     sb->s_blocksize);
850		printk ("group %d: stored = %d, counted = %lu\n",
851			i, le16_to_cpu(gdp->bg_free_blocks_count), x);
852		bitmap_count += x;
853	}
854	printk("ext3_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
855	       le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count);
856	unlock_super (sb);
857	return bitmap_count;
858#else
859	return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count);
860#endif
861}
862
863static inline int block_in_use (unsigned long block,
864				struct super_block * sb,
865				unsigned char * map)
866{
867	return ext3_test_bit ((block -
868		le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) %
869			 EXT3_BLOCKS_PER_GROUP(sb), map);
870}
871
872static inline int test_root(int a, int b)
873{
874	if (a == 0)
875		return 1;
876	while (1) {
877		if (a == 1)
878			return 1;
879		if (a % b)
880			return 0;
881		a = a / b;
882	}
883}
884
885int ext3_group_sparse(int group)
886{
887	return (test_root(group, 3) || test_root(group, 5) ||
888		test_root(group, 7));
889}
890
891/**
892 *	ext3_bg_has_super - number of blocks used by the superblock in group
893 *	@sb: superblock for filesystem
894 *	@group: group number to check
895 *
896 *	Return the number of blocks used by the superblock (primary or backup)
897 *	in this group.  Currently this will be only 0 or 1.
898 */
899int ext3_bg_has_super(struct super_block *sb, int group)
900{
901	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
902	    !ext3_group_sparse(group))
903		return 0;
904	return 1;
905}
906
907/**
908 *	ext3_bg_num_gdb - number of blocks used by the group table in group
909 *	@sb: superblock for filesystem
910 *	@group: group number to check
911 *
912 *	Return the number of blocks used by the group descriptor table
913 *	(primary or backup) in this group.  In the future there may be a
914 *	different number of descriptor blocks in each group.
915 */
916unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
917{
918	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
919	    !ext3_group_sparse(group))
920		return 0;
921	return EXT3_SB(sb)->s_gdb_count;
922}
923
924#ifdef CONFIG_EXT3_CHECK
925/* Called at mount-time, super-block is locked */
926void ext3_check_blocks_bitmap (struct super_block * sb)
927{
928	struct buffer_head * bh;
929	struct ext3_super_block * es;
930	unsigned long desc_count, bitmap_count, x, j;
931	unsigned long desc_blocks;
932	int bitmap_nr;
933	struct ext3_group_desc * gdp;
934	int i;
935
936	es = sb->u.ext3_sb.s_es;
937	desc_count = 0;
938	bitmap_count = 0;
939	gdp = NULL;
940	for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
941		gdp = ext3_get_group_desc (sb, i, NULL);
942		if (!gdp)
943			continue;
944		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
945		bitmap_nr = load_block_bitmap (sb, i);
946		if (bitmap_nr < 0)
947			continue;
948
949		bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
950
951		if (ext3_bg_has_super(sb, i) && !ext3_test_bit(0, bh->b_data))
952			ext3_error(sb, __FUNCTION__,
953				   "Superblock in group %d is marked free", i);
954
955		desc_blocks = ext3_bg_num_gdb(sb, i);
956		for (j = 0; j < desc_blocks; j++)
957			if (!ext3_test_bit(j + 1, bh->b_data))
958				ext3_error(sb, __FUNCTION__,
959					   "Descriptor block #%ld in group "
960					   "%d is marked free", j, i);
961
962		if (!block_in_use (le32_to_cpu(gdp->bg_block_bitmap),
963						sb, bh->b_data))
964			ext3_error (sb, "ext3_check_blocks_bitmap",
965				    "Block bitmap for group %d is marked free",
966				    i);
967
968		if (!block_in_use (le32_to_cpu(gdp->bg_inode_bitmap),
969						sb, bh->b_data))
970			ext3_error (sb, "ext3_check_blocks_bitmap",
971				    "Inode bitmap for group %d is marked free",
972				    i);
973
974		for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++)
975			if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
976							sb, bh->b_data))
977				ext3_error (sb, "ext3_check_blocks_bitmap",
978					    "Block #%d of the inode table in "
979					    "group %d is marked free", j, i);
980
981		x = ext3_count_free (bh, sb->s_blocksize);
982		if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
983			ext3_error (sb, "ext3_check_blocks_bitmap",
984				    "Wrong free blocks count for group %d, "
985				    "stored = %d, counted = %lu", i,
986				    le16_to_cpu(gdp->bg_free_blocks_count), x);
987		bitmap_count += x;
988	}
989	if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
990		ext3_error (sb, "ext3_check_blocks_bitmap",
991			"Wrong free blocks count in super block, "
992			"stored = %lu, counted = %lu",
993			(unsigned long)le32_to_cpu(es->s_free_blocks_count),
994			bitmap_count);
995}
996#endif
997