1/*
2 * Copyright 2001-2012, Axel D��rfler, axeld@pinc-software.de.
3 * This file may be used under the terms of the MIT License.
4 */
5
6
7//! Block bitmap handling and allocation policies
8
9
10#include "BlockAllocator.h"
11
12#include "bfs_control.h"
13#include "BPlusTree.h"
14#include "Debug.h"
15#include "Inode.h"
16#include "Volume.h"
17
18
19// Things the BlockAllocator should do:
20
21// - find a range of blocks of a certain size nearby a specific position
22// - allocating an unsharp range of blocks for pre-allocation
23// - free blocks
24// - know how to deal with each allocation, special handling for directories,
25//   files, symlinks, etc. (type sensitive allocation policies)
26
27// What makes the code complicated is the fact that we are not just reading
28// in the whole bitmap and operate on that in memory - e.g. a 13 GB partition
29// with a block size of 2048 bytes already has a 800kB bitmap, and the size
30// of partitions will grow even more - so that's not an option.
31// Instead we are reading in every block when it's used - since an allocation
32// group can span several blocks in the block bitmap, the AllocationBlock
33// class is there to make handling those easier.
34
35// The current implementation is only slightly optimized and could probably
36// be improved a lot. Furthermore, the allocation policies used here should
37// have some real world tests.
38
39#if BFS_TRACING && !defined(BFS_SHELL)
40namespace BFSBlockTracing {
41
42
43class Allocate : public AbstractTraceEntry {
44public:
45	Allocate(block_run run)
46		:
47		fRun(run)
48	{
49		Initialized();
50	}
51
52	virtual void AddDump(TraceOutput& out)
53	{
54		out.Print("bfs:alloc %lu.%u.%u", fRun.AllocationGroup(),
55			fRun.Start(), fRun.Length());
56	}
57
58	const block_run& Run() const { return fRun; }
59
60private:
61			block_run			fRun;
62};
63
64
65class Free : public AbstractTraceEntry {
66public:
67	Free(block_run run)
68		:
69		fRun(run)
70	{
71		Initialized();
72	}
73
74	virtual void AddDump(TraceOutput& out)
75	{
76		out.Print("bfs:free %lu.%u.%u", fRun.AllocationGroup(),
77			fRun.Start(), fRun.Length());
78	}
79
80	const block_run& Run() const { return fRun; }
81
82private:
83			block_run			fRun;
84};
85
86
87static uint32
88checksum(const uint8* data, size_t size)
89{
90	const uint32* data4 = (const uint32*)data;
91	uint32 sum = 0;
92	while (size >= 4) {
93		sum += *data4;
94		data4++;
95		size -= 4;
96	}
97	return sum;
98}
99
100
101class Block : public AbstractTraceEntry {
102public:
103	Block(const char* label, off_t blockNumber, const uint8* data,
104			size_t size, uint32 start = 0, uint32 length = 0)
105		:
106		fBlock(blockNumber),
107		fData(data),
108		fStart(start),
109		fLength(length),
110		fLabel(label)
111	{
112		fSum = checksum(data, size);
113		Initialized();
114	}
115
116	virtual void AddDump(TraceOutput& out)
117	{
118		out.Print("bfs:%s: block %Ld (%p), sum %lu, s/l %lu/%lu", fLabel,
119			fBlock, fData, fSum, fStart, fLength);
120	}
121
122private:
123			off_t				fBlock;
124			const uint8*		fData;
125			uint32				fStart;
126			uint32				fLength;
127			uint32				fSum;
128			const char*			fLabel;
129};
130
131
132class BlockChange : public AbstractTraceEntry {
133public:
134	BlockChange(const char* label, int32 block, uint32 oldData, uint32 newData)
135		:
136		fBlock(block),
137		fOldData(oldData),
138		fNewData(newData),
139		fLabel(label)
140	{
141		Initialized();
142	}
143
144	virtual void AddDump(TraceOutput& out)
145	{
146		out.Print("bfs:%s: block %ld, %08lx -> %08lx", fLabel,
147			fBlock, fOldData, fNewData);
148	}
149
150private:
151			int32				fBlock;
152			uint32				fOldData;
153			uint32				fNewData;
154			const char*			fLabel;
155};
156
157
158}	// namespace BFSBlockTracing
159
160#	define T(x) new(std::nothrow) BFSBlockTracing::x;
161#else
162#	define T(x) ;
163#endif
164
165#ifdef DEBUG_ALLOCATION_GROUPS
166#	define CHECK_ALLOCATION_GROUP(group) _CheckGroup(group)
167#else
168#	define CHECK_ALLOCATION_GROUP(group) ;
169#endif
170
171
172struct check_index {
173	check_index()
174		:
175		inode(NULL)
176	{
177	}
178
179	char				name[B_FILE_NAME_LENGTH];
180	block_run			run;
181	Inode*				inode;
182};
183
184
185struct check_cookie {
186	check_cookie()
187	{
188	}
189
190	uint32				pass;
191	block_run			current;
192	Inode*				parent;
193	mode_t				parent_mode;
194	Stack<block_run>	stack;
195	TreeIterator*		iterator;
196	check_control		control;
197	Stack<check_index*>	indices;
198};
199
200
201class AllocationBlock : public CachedBlock {
202public:
203	AllocationBlock(Volume* volume);
204
205	inline void Allocate(uint16 start, uint16 numBlocks);
206	inline void Free(uint16 start, uint16 numBlocks);
207	inline bool IsUsed(uint16 block);
208
209	status_t SetTo(AllocationGroup& group, uint16 block);
210	status_t SetToWritable(Transaction& transaction, AllocationGroup& group,
211		uint16 block);
212
213	uint32 NumBlockBits() const { return fNumBits; }
214	uint32& Block(int32 index) { return ((uint32*)fBlock)[index]; }
215	uint8* Block() const { return (uint8*)fBlock; }
216
217private:
218	uint32	fNumBits;
219#ifdef DEBUG
220	bool	fWritable;
221#endif
222};
223
224
225class AllocationGroup {
226public:
227	AllocationGroup();
228
229	void AddFreeRange(int32 start, int32 blocks);
230	bool IsFull() const { return fFreeBits == 0; }
231
232	status_t Allocate(Transaction& transaction, uint16 start, int32 length);
233	status_t Free(Transaction& transaction, uint16 start, int32 length);
234
235	uint32 NumBits() const { return fNumBits; }
236	uint32 NumBlocks() const { return fNumBlocks; }
237	int32 Start() const { return fStart; }
238
239private:
240	friend class BlockAllocator;
241
242	uint32	fNumBits;
243	uint32	fNumBlocks;
244	int32	fStart;
245	int32	fFirstFree;
246	int32	fFreeBits;
247
248	int32	fLargestStart;
249	int32	fLargestLength;
250	bool	fLargestValid;
251};
252
253
254AllocationBlock::AllocationBlock(Volume* volume)
255	: CachedBlock(volume)
256{
257}
258
259
260status_t
261AllocationBlock::SetTo(AllocationGroup& group, uint16 block)
262{
263	// 8 blocks per byte
264	fNumBits = fVolume->BlockSize() << 3;
265	// the last group may have less bits than the others
266	if ((block + 1) * fNumBits > group.NumBits())
267		fNumBits = group.NumBits() % fNumBits;
268
269#ifdef DEBUG
270	fWritable = false;
271#endif
272	return CachedBlock::SetTo(group.Start() + block) != NULL ? B_OK : B_ERROR;
273}
274
275
276status_t
277AllocationBlock::SetToWritable(Transaction& transaction, AllocationGroup& group,
278	uint16 block)
279{
280	// 8 blocks per byte
281	fNumBits = fVolume->BlockSize() << 3;
282	// the last group may have less bits in the last block
283	if ((block + 1) * fNumBits > group.NumBits())
284		fNumBits = group.NumBits() % fNumBits;
285
286#ifdef DEBUG
287	fWritable = true;
288#endif
289	return CachedBlock::SetToWritable(transaction, group.Start() + block)
290		!= NULL ? B_OK : B_ERROR;
291}
292
293
294bool
295AllocationBlock::IsUsed(uint16 block)
296{
297	if (block > fNumBits)
298		return true;
299
300	// the block bitmap is accessed in 32-bit blocks
301	return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32));
302}
303
304
305void
306AllocationBlock::Allocate(uint16 start, uint16 numBlocks)
307{
308	ASSERT(start < fNumBits);
309	ASSERT(uint32(start + numBlocks) <= fNumBits);
310#ifdef DEBUG
311	ASSERT(fWritable);
312#endif
313
314	T(Block("b-alloc-in", fBlockNumber, fBlock, fVolume->BlockSize(),
315		start, numBlocks));
316
317	int32 block = start >> 5;
318
319	while (numBlocks > 0) {
320		uint32 mask = 0;
321		for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--)
322			mask |= 1UL << i;
323
324		T(BlockChange("b-alloc", block, Block(block),
325			Block(block) | HOST_ENDIAN_TO_BFS_INT32(mask)));
326
327#if KDEBUG
328		// check for already set blocks
329		if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32*)fBlock)[block]) {
330			FATAL(("AllocationBlock::Allocate(): some blocks are already "
331				"allocated, start = %u, numBlocks = %u\n", start, numBlocks));
332			panic("blocks already set!");
333		}
334#endif
335
336		Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask);
337		start = 0;
338	}
339	T(Block("b-alloc-out", fBlockNumber, fBlock, fVolume->BlockSize(),
340		start, numBlocks));
341}
342
343
344void
345AllocationBlock::Free(uint16 start, uint16 numBlocks)
346{
347	ASSERT(start < fNumBits);
348	ASSERT(uint32(start + numBlocks) <= fNumBits);
349#ifdef DEBUG
350	ASSERT(fWritable);
351#endif
352
353	int32 block = start >> 5;
354
355	while (numBlocks > 0) {
356		uint32 mask = 0;
357		for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--)
358			mask |= 1UL << (i % 32);
359
360		T(BlockChange("b-free", block, Block(block),
361			Block(block) & HOST_ENDIAN_TO_BFS_INT32(~mask)));
362
363		Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask);
364		start = 0;
365	}
366}
367
368
369//	#pragma mark -
370
371
372/*!	The allocation groups are created and initialized in
373	BlockAllocator::Initialize() and BlockAllocator::InitializeAndClearBitmap()
374	respectively.
375*/
376AllocationGroup::AllocationGroup()
377	:
378	fFirstFree(-1),
379	fFreeBits(0),
380	fLargestValid(false)
381{
382}
383
384
385void
386AllocationGroup::AddFreeRange(int32 start, int32 blocks)
387{
388	//D(if (blocks > 512)
389	//	PRINT(("range of %ld blocks starting at %ld\n",blocks,start)));
390
391	if (fFirstFree == -1)
392		fFirstFree = start;
393
394	if (!fLargestValid || fLargestLength < blocks) {
395		fLargestStart = start;
396		fLargestLength = blocks;
397		fLargestValid = true;
398	}
399
400	fFreeBits += blocks;
401}
402
403
404/*!	Allocates the specified run in the allocation group.
405	Doesn't check if the run is valid or already allocated partially, nor
406	does it maintain the free ranges hints or the volume's used blocks count.
407	It only does the low-level work of allocating some bits in the block bitmap.
408	Assumes that the block bitmap lock is hold.
409*/
410status_t
411AllocationGroup::Allocate(Transaction& transaction, uint16 start, int32 length)
412{
413	ASSERT(start + length <= (int32)fNumBits);
414
415	// Update the allocation group info
416	// TODO: this info will be incorrect if something goes wrong later
417	// Note, the fFirstFree block doesn't have to be really free
418	if (start == fFirstFree)
419		fFirstFree = start + length;
420	fFreeBits -= length;
421
422	if (fLargestValid) {
423		bool cut = false;
424		if (fLargestStart == start) {
425			// cut from start
426			fLargestStart += length;
427			fLargestLength -= length;
428			cut = true;
429		} else if (start > fLargestStart
430			&& start < fLargestStart + fLargestLength) {
431			// cut from end
432			fLargestLength = start - fLargestStart;
433			cut = true;
434		}
435		if (cut && (fLargestLength < fLargestStart
436				|| fLargestLength
437						< (int32)fNumBits - (fLargestStart + fLargestLength))) {
438			// might not be the largest block anymore
439			fLargestValid = false;
440		}
441	}
442
443	Volume* volume = transaction.GetVolume();
444
445	// calculate block in the block bitmap and position within
446	uint32 bitsPerBlock = volume->BlockSize() << 3;
447	uint32 block = start / bitsPerBlock;
448	start = start % bitsPerBlock;
449
450	AllocationBlock cached(volume);
451
452	while (length > 0) {
453		if (cached.SetToWritable(transaction, *this, block) < B_OK) {
454			fLargestValid = false;
455			RETURN_ERROR(B_IO_ERROR);
456		}
457
458		uint32 numBlocks = length;
459		if (start + numBlocks > cached.NumBlockBits())
460			numBlocks = cached.NumBlockBits() - start;
461
462		cached.Allocate(start, numBlocks);
463
464		length -= numBlocks;
465		start = 0;
466		block++;
467	}
468
469	return B_OK;
470}
471
472
473/*!	Frees the specified run in the allocation group.
474	Doesn't check if the run is valid or was not completely allocated, nor
475	does it maintain the free ranges hints or the volume's used blocks count.
476	It only does the low-level work of freeing some bits in the block bitmap.
477	Assumes that the block bitmap lock is hold.
478*/
479status_t
480AllocationGroup::Free(Transaction& transaction, uint16 start, int32 length)
481{
482	ASSERT(start + length <= (int32)fNumBits);
483
484	// Update the allocation group info
485	// TODO: this info will be incorrect if something goes wrong later
486	if (fFirstFree > start)
487		fFirstFree = start;
488	fFreeBits += length;
489
490	// The range to be freed cannot be part of the valid largest range
491	ASSERT(!fLargestValid || start + length <= fLargestStart
492		|| start > fLargestStart);
493
494	if (fLargestValid
495		&& (start + length == fLargestStart
496			|| fLargestStart + fLargestLength == start
497			|| (start < fLargestStart && fLargestStart > fLargestLength)
498			|| (start > fLargestStart
499				&& (int32)fNumBits - (fLargestStart + fLargestLength)
500						> fLargestLength))) {
501		fLargestValid = false;
502	}
503
504	Volume* volume = transaction.GetVolume();
505
506	// calculate block in the block bitmap and position within
507	uint32 bitsPerBlock = volume->BlockSize() << 3;
508	uint32 block = start / bitsPerBlock;
509	start = start % bitsPerBlock;
510
511	AllocationBlock cached(volume);
512
513	while (length > 0) {
514		if (cached.SetToWritable(transaction, *this, block) < B_OK)
515			RETURN_ERROR(B_IO_ERROR);
516
517		T(Block("free-1", block, cached.Block(), volume->BlockSize()));
518		uint16 freeLength = length;
519		if (uint32(start + length) > cached.NumBlockBits())
520			freeLength = cached.NumBlockBits() - start;
521
522		cached.Free(start, freeLength);
523
524		length -= freeLength;
525		start = 0;
526		T(Block("free-2", block, cached.Block(), volume->BlockSize()));
527		block++;
528	}
529	return B_OK;
530}
531
532
533//	#pragma mark -
534
535
536BlockAllocator::BlockAllocator(Volume* volume)
537	:
538	fVolume(volume),
539	fGroups(NULL),
540	fCheckBitmap(NULL),
541	fCheckCookie(NULL)
542{
543	recursive_lock_init(&fLock, "bfs allocator");
544}
545
546
547BlockAllocator::~BlockAllocator()
548{
549	recursive_lock_destroy(&fLock);
550	delete[] fGroups;
551}
552
553
554status_t
555BlockAllocator::Initialize(bool full)
556{
557	fNumGroups = fVolume->AllocationGroups();
558	fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup();
559	fNumBlocks = (fVolume->NumBlocks() + fVolume->BlockSize() * 8 - 1)
560		/ (fVolume->BlockSize() * 8);
561
562	fGroups = new(std::nothrow) AllocationGroup[fNumGroups];
563	if (fGroups == NULL)
564		return B_NO_MEMORY;
565
566	if (!full)
567		return B_OK;
568
569	recursive_lock_lock(&fLock);
570		// the lock will be released by the _Initialize() method
571
572	thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::_Initialize,
573		"bfs block allocator", B_LOW_PRIORITY, this);
574	if (id < B_OK)
575		return _Initialize(this);
576
577	recursive_lock_transfer_lock(&fLock, id);
578
579	return resume_thread(id);
580}
581
582
583status_t
584BlockAllocator::InitializeAndClearBitmap(Transaction& transaction)
585{
586	status_t status = Initialize(false);
587	if (status != B_OK)
588		return status;
589
590	uint32 numBits = 8 * fBlocksPerGroup * fVolume->BlockSize();
591	uint32 blockShift = fVolume->BlockShift();
592
593	uint32* buffer = (uint32*)malloc(numBits >> 3);
594	if (buffer == NULL)
595		RETURN_ERROR(B_NO_MEMORY);
596
597	memset(buffer, 0, numBits >> 3);
598
599	off_t offset = 1;
600		// the bitmap starts directly after the superblock
601
602	// initialize the AllocationGroup objects and clear the on-disk bitmap
603
604	for (int32 i = 0; i < fNumGroups; i++) {
605		if (write_pos(fVolume->Device(), offset << blockShift, buffer,
606				fBlocksPerGroup << blockShift) < B_OK) {
607			free(buffer);
608			return B_ERROR;
609		}
610
611		// the last allocation group may contain less blocks than the others
612		if (i == fNumGroups - 1) {
613			fGroups[i].fNumBits = fVolume->NumBlocks() - i * numBits;
614			fGroups[i].fNumBlocks = 1 + ((fGroups[i].NumBits() - 1)
615				>> (blockShift + 3));
616		} else {
617			fGroups[i].fNumBits = numBits;
618			fGroups[i].fNumBlocks = fBlocksPerGroup;
619		}
620		fGroups[i].fStart = offset;
621		fGroups[i].fFirstFree = fGroups[i].fLargestStart = 0;
622		fGroups[i].fFreeBits = fGroups[i].fLargestLength = fGroups[i].fNumBits;
623		fGroups[i].fLargestValid = true;
624
625		offset += fBlocksPerGroup;
626	}
627	free(buffer);
628
629	// reserve the boot block, the log area, and the block bitmap itself
630	uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length();
631
632	if (fGroups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) {
633		FATAL(("could not allocate reserved space for block bitmap/log!\n"));
634		return B_ERROR;
635	}
636	fVolume->SuperBlock().used_blocks
637		= HOST_ENDIAN_TO_BFS_INT64(reservedBlocks);
638
639	return B_OK;
640}
641
642
643status_t
644BlockAllocator::_Initialize(BlockAllocator* allocator)
645{
646	// The lock must already be held at this point
647	RecursiveLocker locker(allocator->fLock, true);
648
649	Volume* volume = allocator->fVolume;
650	uint32 blocks = allocator->fBlocksPerGroup;
651	uint32 blockShift = volume->BlockShift();
652	off_t freeBlocks = 0;
653
654	uint32* buffer = (uint32*)malloc(blocks << blockShift);
655	if (buffer == NULL)
656		RETURN_ERROR(B_NO_MEMORY);
657
658	AllocationGroup* groups = allocator->fGroups;
659	off_t offset = 1;
660	uint32 bitsPerGroup = 8 * (blocks << blockShift);
661	int32 numGroups = allocator->fNumGroups;
662
663	for (int32 i = 0; i < numGroups; i++) {
664		if (read_pos(volume->Device(), offset << blockShift, buffer,
665				blocks << blockShift) < B_OK)
666			break;
667
668		// the last allocation group may contain less blocks than the others
669		if (i == numGroups - 1) {
670			groups[i].fNumBits = volume->NumBlocks() - i * bitsPerGroup;
671			groups[i].fNumBlocks = 1 + ((groups[i].NumBits() - 1)
672				>> (blockShift + 3));
673		} else {
674			groups[i].fNumBits = bitsPerGroup;
675			groups[i].fNumBlocks = blocks;
676		}
677		groups[i].fStart = offset;
678
679		// finds all free ranges in this allocation group
680		int32 start = -1, range = 0;
681		int32 numBits = groups[i].fNumBits, bit = 0;
682		int32 count = (numBits + 31) / 32;
683
684		for (int32 k = 0; k < count; k++) {
685			for (int32 j = 0; j < 32 && bit < numBits; j++, bit++) {
686				if (buffer[k] & (1UL << j)) {
687					// block is in use
688					if (range > 0) {
689						groups[i].AddFreeRange(start, range);
690						range = 0;
691					}
692				} else if (range++ == 0) {
693					// block is free, start new free range
694					start = bit;
695				}
696			}
697		}
698		if (range)
699			groups[i].AddFreeRange(start, range);
700
701		freeBlocks += groups[i].fFreeBits;
702
703		offset += blocks;
704	}
705	free(buffer);
706
707	// check if block bitmap and log area are reserved
708	uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length();
709
710	if (allocator->CheckBlocks(0, reservedBlocks) != B_OK) {
711		if (volume->IsReadOnly()) {
712			FATAL(("Space for block bitmap or log area is not reserved "
713				"(volume is mounted read-only)!\n"));
714		} else {
715			Transaction transaction(volume, 0);
716			if (groups[0].Allocate(transaction, 0, reservedBlocks) != B_OK) {
717				FATAL(("Could not allocate reserved space for block "
718					"bitmap/log!\n"));
719				volume->Panic();
720			} else {
721				transaction.Done();
722				FATAL(("Space for block bitmap or log area was not "
723					"reserved!\n"));
724			}
725		}
726	}
727
728	off_t usedBlocks = volume->NumBlocks() - freeBlocks;
729	if (volume->UsedBlocks() != usedBlocks) {
730		// If the disk in a dirty state at mount time, it's
731		// normal that the values don't match
732		INFORM(("volume reports %" B_PRIdOFF " used blocks, correct is %"
733			B_PRIdOFF "\n", volume->UsedBlocks(), usedBlocks));
734		volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks);
735	}
736
737	return B_OK;
738}
739
740
741void
742BlockAllocator::Uninitialize()
743{
744	// We only have to make sure that the initializer thread isn't running
745	// anymore.
746	recursive_lock_lock(&fLock);
747}
748
749
750/*!	Tries to allocate between \a minimum, and \a maximum blocks starting
751	at group \a groupIndex with offset \a start. The resulting allocation
752	is put into \a run.
753
754	The number of allocated blocks is always a multiple of \a minimum which
755	has to be a power of two value.
756*/
757status_t
758BlockAllocator::AllocateBlocks(Transaction& transaction, int32 groupIndex,
759	uint16 start, uint16 maximum, uint16 minimum, block_run& run)
760{
761	if (maximum == 0)
762		return B_BAD_VALUE;
763
764	FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n",
765		groupIndex, start, maximum, minimum));
766
767	AllocationBlock cached(fVolume);
768	RecursiveLocker lock(fLock);
769
770	uint32 bitsPerFullBlock = fVolume->BlockSize() << 3;
771
772	// Find the block_run that can fulfill the request best
773	int32 bestGroup = -1;
774	int32 bestStart = -1;
775	int32 bestLength = -1;
776
777	for (int32 i = 0; i < fNumGroups + 1; i++, groupIndex++, start = 0) {
778		groupIndex = groupIndex % fNumGroups;
779		AllocationGroup& group = fGroups[groupIndex];
780
781		CHECK_ALLOCATION_GROUP(groupIndex);
782
783		if (start >= group.NumBits() || group.IsFull())
784			continue;
785
786		// The wanted maximum is smaller than the largest free block in the
787		// group or already smaller than the minimum
788
789		if (start < group.fFirstFree)
790			start = group.fFirstFree;
791
792		if (group.fLargestValid) {
793			if (group.fLargestLength < bestLength)
794				continue;
795
796			if (group.fLargestStart >= start) {
797				if (group.fLargestLength >= bestLength) {
798					bestGroup = groupIndex;
799					bestStart = group.fLargestStart;
800					bestLength = group.fLargestLength;
801
802					if (bestLength >= maximum)
803						break;
804				}
805
806				// We know everything about this group we have to, let's skip
807				// to the next
808				continue;
809			}
810		}
811
812		// There may be more than one block per allocation group - and
813		// we iterate through it to find a place for the allocation.
814		// (one allocation can't exceed one allocation group)
815
816		uint32 block = start / (fVolume->BlockSize() << 3);
817		int32 currentStart = 0, currentLength = 0;
818		int32 groupLargestStart = -1;
819		int32 groupLargestLength = -1;
820		int32 currentBit = start;
821		bool canFindGroupLargest = start == 0;
822
823		for (; block < group.NumBlocks(); block++) {
824			if (cached.SetTo(group, block) < B_OK)
825				RETURN_ERROR(B_ERROR);
826
827			T(Block("alloc-in", group.Start() + block, cached.Block(),
828				fVolume->BlockSize(), groupIndex, currentStart));
829
830			// find a block large enough to hold the allocation
831			for (uint32 bit = start % bitsPerFullBlock;
832					bit < cached.NumBlockBits(); bit++) {
833				if (!cached.IsUsed(bit)) {
834					if (currentLength == 0) {
835						// start new range
836						currentStart = currentBit;
837					}
838
839					// have we found a range large enough to hold numBlocks?
840					if (++currentLength >= maximum) {
841						bestGroup = groupIndex;
842						bestStart = currentStart;
843						bestLength = currentLength;
844						break;
845					}
846				} else {
847					if (currentLength) {
848						// end of a range
849						if (currentLength > bestLength) {
850							bestGroup = groupIndex;
851							bestStart = currentStart;
852							bestLength = currentLength;
853						}
854						if (currentLength > groupLargestLength) {
855							groupLargestStart = currentStart;
856							groupLargestLength = currentLength;
857						}
858						currentLength = 0;
859					}
860					if ((int32)group.NumBits() - currentBit
861							<= groupLargestLength) {
862						// We can't find a bigger block in this group anymore,
863						// let's skip the rest.
864						block = group.NumBlocks();
865						break;
866					}
867				}
868				currentBit++;
869			}
870
871			T(Block("alloc-out", block, cached.Block(),
872				fVolume->BlockSize(), groupIndex, currentStart));
873
874			if (bestLength >= maximum) {
875				canFindGroupLargest = false;
876				break;
877			}
878
879			// start from the beginning of the next block
880			start = 0;
881		}
882
883		if (currentBit == (int32)group.NumBits()) {
884			if (currentLength > bestLength) {
885				bestGroup = groupIndex;
886				bestStart = currentStart;
887				bestLength = currentLength;
888			}
889			if (canFindGroupLargest && currentLength > groupLargestLength) {
890				groupLargestStart = currentStart;
891				groupLargestLength = currentLength;
892			}
893		}
894
895		if (canFindGroupLargest && !group.fLargestValid
896			&& groupLargestLength >= 0) {
897			group.fLargestStart = groupLargestStart;
898			group.fLargestLength = groupLargestLength;
899			group.fLargestValid = true;
900		}
901
902		if (bestLength >= maximum)
903			break;
904	}
905
906	// If we found a suitable range, mark the blocks as in use, and
907	// write the updated block bitmap back to disk
908	if (bestLength < minimum)
909		return B_DEVICE_FULL;
910
911	if (bestLength > maximum)
912		bestLength = maximum;
913	else if (minimum > 1) {
914		// make sure bestLength is a multiple of minimum
915		bestLength = round_down(bestLength, minimum);
916	}
917
918	if (fGroups[bestGroup].Allocate(transaction, bestStart, bestLength) != B_OK)
919		RETURN_ERROR(B_IO_ERROR);
920
921	CHECK_ALLOCATION_GROUP(bestGroup);
922
923	run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(bestGroup);
924	run.start = HOST_ENDIAN_TO_BFS_INT16(bestStart);
925	run.length = HOST_ENDIAN_TO_BFS_INT16(bestLength);
926
927	fVolume->SuperBlock().used_blocks
928		= HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + bestLength);
929		// We are not writing back the disk's superblock - it's
930		// either done by the journaling code, or when the disk
931		// is unmounted.
932		// If the value is not correct at mount time, it will be
933		// fixed anyway.
934
935	// We need to flush any remaining blocks in the new allocation to make sure
936	// they won't interfere with the file cache.
937	block_cache_discard(fVolume->BlockCache(), fVolume->ToBlock(run),
938		run.Length());
939
940	T(Allocate(run));
941	return B_OK;
942}
943
944
945status_t
946BlockAllocator::AllocateForInode(Transaction& transaction,
947	const block_run* parent, mode_t type, block_run& run)
948{
949	// Apply some allocation policies here (AllocateBlocks() will break them
950	// if necessary) - we will start with those described in Dominic Giampaolo's
951	// "Practical File System Design", and see how good they work
952
953	// Files are going in the same allocation group as its parent,
954	// sub-directories will be inserted 8 allocation groups after
955	// the one of the parent
956	uint16 group = parent->AllocationGroup();
957	if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY)
958		group += 8;
959
960	return AllocateBlocks(transaction, group, 0, 1, 1, run);
961}
962
963
964status_t
965BlockAllocator::Allocate(Transaction& transaction, Inode* inode,
966	off_t numBlocks, block_run& run, uint16 minimum)
967{
968	if (numBlocks <= 0)
969		return B_ERROR;
970
971	// one block_run can't hold more data than there is in one allocation group
972	if (numBlocks > fGroups[0].NumBits())
973		numBlocks = fGroups[0].NumBits();
974
975	// since block_run.length is uint16, the largest number of blocks that
976	// can be covered by a block_run is 65535
977	// TODO: if we drop compatibility, couldn't we do this any better?
978	// There are basically two possibilities:
979	// a) since a length of zero doesn't have any sense, take that for 65536 -
980	//    but that could cause many problems (bugs) in other areas
981	// b) reduce the maximum amount of blocks per block_run, so that the
982	//    remaining number of free blocks can be used in a useful manner
983	//    (like 4 blocks) - but that would also reduce the maximum file size
984	// c) have BlockRun::Length() return (length + 1).
985	if (numBlocks > MAX_BLOCK_RUN_LENGTH)
986		numBlocks = MAX_BLOCK_RUN_LENGTH;
987
988	// Apply some allocation policies here (AllocateBlocks() will break them
989	// if necessary)
990	uint16 group = inode->BlockRun().AllocationGroup();
991	uint16 start = 0;
992
993	// Are there already allocated blocks? (then just try to allocate near the
994	// last one)
995	if (inode->Size() > 0) {
996		const data_stream& data = inode->Node().data;
997		// TODO: we currently don't care for when the data stream
998		// is already grown into the indirect ranges
999		if (data.max_double_indirect_range == 0
1000			&& data.max_indirect_range == 0) {
1001			// Since size > 0, there must be a valid block run in this stream
1002			int32 last = 0;
1003			for (; last < NUM_DIRECT_BLOCKS - 1; last++)
1004				if (data.direct[last + 1].IsZero())
1005					break;
1006
1007			group = data.direct[last].AllocationGroup();
1008			start = data.direct[last].Start() + data.direct[last].Length();
1009		}
1010	} else if (inode->IsContainer() || inode->IsSymLink()) {
1011		// directory and symbolic link data will go in the same allocation
1012		// group as the inode is in but after the inode data
1013		start = inode->BlockRun().Start();
1014	} else {
1015		// file data will start in the next allocation group
1016		group = inode->BlockRun().AllocationGroup() + 1;
1017	}
1018
1019	return AllocateBlocks(transaction, group, start, numBlocks, minimum, run);
1020}
1021
1022
1023status_t
1024BlockAllocator::Free(Transaction& transaction, block_run run)
1025{
1026	RecursiveLocker lock(fLock);
1027
1028	int32 group = run.AllocationGroup();
1029	uint16 start = run.Start();
1030	uint16 length = run.Length();
1031
1032	FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start,
1033		length));
1034	T(Free(run));
1035
1036	// doesn't use Volume::IsValidBlockRun() here because it can check better
1037	// against the group size (the last group may have a different length)
1038	if (group < 0 || group >= fNumGroups
1039		|| start > fGroups[group].NumBits()
1040		|| uint32(start + length) > fGroups[group].NumBits()
1041		|| length == 0) {
1042		FATAL(("tried to free an invalid block_run (%d, %u, %u)\n", (int)group,
1043			start, length));
1044		DEBUGGER(("tried to free invalid block_run"));
1045		return B_BAD_VALUE;
1046	}
1047	// check if someone tries to free reserved areas at the beginning of the
1048	// drive
1049	if (group == 0
1050		&& start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) {
1051		FATAL(("tried to free a reserved block_run (%d, %u, %u)\n", (int)group,
1052			start, length));
1053		DEBUGGER(("tried to free reserved block"));
1054		return B_BAD_VALUE;
1055	}
1056#ifdef DEBUG
1057	if (CheckBlockRun(run) != B_OK)
1058		return B_BAD_DATA;
1059#endif
1060
1061	CHECK_ALLOCATION_GROUP(group);
1062
1063	if (fGroups[group].Free(transaction, start, length) != B_OK)
1064		RETURN_ERROR(B_IO_ERROR);
1065
1066	CHECK_ALLOCATION_GROUP(group);
1067
1068#ifdef DEBUG
1069	if (CheckBlockRun(run, NULL, false) != B_OK) {
1070		DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just "
1071			"freed)\n"));
1072	}
1073#endif
1074
1075	fVolume->SuperBlock().used_blocks =
1076		HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length());
1077	return B_OK;
1078}
1079
1080
1081size_t
1082BlockAllocator::BitmapSize() const
1083{
1084	return fVolume->BlockSize() * fNumBlocks;
1085}
1086
1087
1088#ifdef DEBUG_FRAGMENTER
1089void
1090BlockAllocator::Fragment()
1091{
1092	AllocationBlock cached(fVolume);
1093	RecursiveLocker lock(fLock);
1094
1095	// only leave 4 block holes
1096	static const uint32 kMask = 0x0f0f0f0f;
1097	uint32 valuesPerBlock = fVolume->BlockSize() / 4;
1098
1099	for (int32 i = 0; i < fNumGroups; i++) {
1100		AllocationGroup& group = fGroups[i];
1101
1102		for (uint32 block = 0; block < group.NumBlocks(); block++) {
1103			Transaction transaction(fVolume, 0);
1104
1105			if (cached.SetToWritable(transaction, group, block) != B_OK)
1106				return;
1107
1108			for (int32 index = 0; index < valuesPerBlock; index++) {
1109				cached.Block(index) |= HOST_ENDIAN_TO_BFS_INT32(kMask);
1110			}
1111
1112			transaction.Done();
1113		}
1114	}
1115}
1116#endif	// DEBUG_FRAGMENTER
1117
1118
1119#ifdef DEBUG_ALLOCATION_GROUPS
1120void
1121BlockAllocator::_CheckGroup(int32 groupIndex) const
1122{
1123	AllocationBlock cached(fVolume);
1124	ASSERT_LOCKED_RECURSIVE(&fLock);
1125
1126	AllocationGroup& group = fGroups[groupIndex];
1127
1128	int32 currentStart = 0, currentLength = 0;
1129	int32 firstFree = -1;
1130	int32 largestStart = -1;
1131	int32 largestLength = 0;
1132	int32 currentBit = 0;
1133
1134	for (uint32 block = 0; block < group.NumBlocks(); block++) {
1135		if (cached.SetTo(group, block) < B_OK) {
1136			panic("setting group block %d failed\n", (int)block);
1137			return;
1138		}
1139
1140		for (uint32 bit = 0; bit < cached.NumBlockBits(); bit++) {
1141			if (!cached.IsUsed(bit)) {
1142				if (firstFree < 0) {
1143					firstFree = currentBit;
1144					if (!group.fLargestValid) {
1145						if (firstFree >= 0 && firstFree < group.fFirstFree) {
1146							// mostly harmless but noteworthy
1147							dprintf("group %d first free too late: should be "
1148								"%d, is %d\n", (int)groupIndex, (int)firstFree,
1149								(int)group.fFirstFree);
1150						}
1151						return;
1152					}
1153				}
1154
1155				if (currentLength == 0) {
1156					// start new range
1157					currentStart = currentBit;
1158				}
1159				currentLength++;
1160			} else if (currentLength) {
1161				// end of a range
1162				if (currentLength > largestLength) {
1163					largestStart = currentStart;
1164					largestLength = currentLength;
1165				}
1166				currentLength = 0;
1167			}
1168			currentBit++;
1169		}
1170	}
1171
1172	if (currentLength > largestLength) {
1173		largestStart = currentStart;
1174		largestLength = currentLength;
1175	}
1176
1177	if (firstFree >= 0 && firstFree < group.fFirstFree) {
1178		// mostly harmless but noteworthy
1179		dprintf("group %d first free too late: should be %d, is %d\n",
1180			(int)groupIndex, (int)firstFree, (int)group.fFirstFree);
1181	}
1182	if (group.fLargestValid
1183		&& (largestStart != group.fLargestStart
1184			|| largestLength != group.fLargestLength)) {
1185		panic("bfs %p: group %d largest differs: %d.%d, checked %d.%d.\n",
1186			fVolume, (int)groupIndex, (int)group.fLargestStart,
1187			(int)group.fLargestLength, (int)largestStart, (int)largestLength);
1188	}
1189}
1190#endif	// DEBUG_ALLOCATION_GROUPS
1191
1192
1193//	#pragma mark - Bitmap validity checking
1194
1195// TODO: implement new FS checking API
1196// Functions to check the validity of the bitmap - they are used from
1197// the "checkfs" command (since this does even a bit more, maybe we should
1198// move this some place else?)
1199
1200
1201bool
1202BlockAllocator::_IsValidCheckControl(const check_control* control)
1203{
1204	if (control == NULL
1205		|| control->magic != BFS_IOCTL_CHECK_MAGIC) {
1206		FATAL(("invalid check_control (%p)!\n", control));
1207		return false;
1208	}
1209
1210	return true;
1211}
1212
1213
1214status_t
1215BlockAllocator::StartChecking(const check_control* control)
1216{
1217	if (!_IsValidCheckControl(control))
1218		return B_BAD_VALUE;
1219
1220	fVolume->GetJournal(0)->Lock(NULL, true);
1221		// Lock the volume's journal
1222
1223	recursive_lock_lock(&fLock);
1224
1225	size_t size = BitmapSize();
1226	fCheckBitmap = (uint32*)malloc(size);
1227	if (fCheckBitmap == NULL) {
1228		recursive_lock_unlock(&fLock);
1229		fVolume->GetJournal(0)->Unlock(NULL, true);
1230		return B_NO_MEMORY;
1231	}
1232
1233	fCheckCookie = new(std::nothrow) check_cookie();
1234	if (fCheckCookie == NULL) {
1235		free(fCheckBitmap);
1236		fCheckBitmap = NULL;
1237		recursive_lock_unlock(&fLock);
1238		fVolume->GetJournal(0)->Unlock(NULL, true);
1239
1240		return B_NO_MEMORY;
1241	}
1242
1243	memcpy(&fCheckCookie->control, control, sizeof(check_control));
1244	memset(&fCheckCookie->control.stats, 0, sizeof(control->stats));
1245
1246	// initialize bitmap
1247	memset(fCheckBitmap, 0, size);
1248	for (int32 block = fVolume->Log().Start() + fVolume->Log().Length();
1249			block-- > 0;) {
1250		_SetCheckBitmapAt(block);
1251	}
1252
1253	fCheckCookie->pass = BFS_CHECK_PASS_BITMAP;
1254	fCheckCookie->stack.Push(fVolume->Root());
1255	fCheckCookie->stack.Push(fVolume->Indices());
1256	fCheckCookie->iterator = NULL;
1257	fCheckCookie->control.stats.block_size = fVolume->BlockSize();
1258
1259	// Put removed vnodes to the stack -- they are not reachable by traversing
1260	// the file system anymore.
1261	InodeList::Iterator iterator = fVolume->RemovedInodes().GetIterator();
1262	while (Inode* inode = iterator.Next()) {
1263		fCheckCookie->stack.Push(inode->BlockRun());
1264	}
1265
1266	// TODO: check reserved area in bitmap!
1267
1268	return B_OK;
1269}
1270
1271
1272status_t
1273BlockAllocator::StopChecking(check_control* control)
1274{
1275	if (fCheckCookie == NULL)
1276		return B_NO_INIT;
1277
1278	if (fCheckCookie->iterator != NULL) {
1279		delete fCheckCookie->iterator;
1280		fCheckCookie->iterator = NULL;
1281
1282		// the current directory inode is still locked in memory
1283		put_vnode(fVolume->FSVolume(), fVolume->ToVnode(fCheckCookie->current));
1284	}
1285
1286	if (fVolume->IsReadOnly()) {
1287		// We can't fix errors on this volume
1288		fCheckCookie->control.flags = 0;
1289	}
1290
1291	if (fCheckCookie->control.status != B_ENTRY_NOT_FOUND)
1292		FATAL(("BlockAllocator::CheckNextNode() didn't run through\n"));
1293
1294	switch (fCheckCookie->pass) {
1295		case BFS_CHECK_PASS_BITMAP:
1296			// if CheckNextNode() could completely work through, we can
1297			// fix any damages of the bitmap
1298			if (fCheckCookie->control.status == B_ENTRY_NOT_FOUND)
1299				_WriteBackCheckBitmap();
1300			break;
1301
1302		case BFS_CHECK_PASS_INDEX:
1303			_FreeIndices();
1304			break;
1305	}
1306
1307	fVolume->SetCheckingThread(-1);
1308
1309	if (control != NULL)
1310		user_memcpy(control, &fCheckCookie->control, sizeof(check_control));
1311
1312	free(fCheckBitmap);
1313	fCheckBitmap = NULL;
1314	delete fCheckCookie;
1315	fCheckCookie = NULL;
1316	recursive_lock_unlock(&fLock);
1317	fVolume->GetJournal(0)->Unlock(NULL, true);
1318
1319	return B_OK;
1320}
1321
1322
1323status_t
1324BlockAllocator::CheckNextNode(check_control* control)
1325{
1326	if (fCheckCookie == NULL)
1327		return B_NO_INIT;
1328
1329	fVolume->SetCheckingThread(find_thread(NULL));
1330
1331	// Make sure the user control is copied on exit
1332	class CopyControlOnExit {
1333	public:
1334		CopyControlOnExit(check_control* source, check_control* userTarget)
1335			:
1336			fSource(source),
1337			fTarget(userTarget)
1338		{
1339		}
1340
1341		~CopyControlOnExit()
1342		{
1343			if (fTarget != NULL)
1344				user_memcpy(fTarget, fSource, sizeof(check_control));
1345		}
1346
1347	private:
1348		check_control*	fSource;
1349		check_control*	fTarget;
1350	} copyControl(&fCheckCookie->control, control);
1351
1352	while (true) {
1353		if (fCheckCookie->iterator == NULL) {
1354			if (!fCheckCookie->stack.Pop(&fCheckCookie->current)) {
1355				// No more runs on the stack, we might be finished!
1356				if (fCheckCookie->pass == BFS_CHECK_PASS_BITMAP
1357					&& !fCheckCookie->indices.IsEmpty()) {
1358					// Start second pass to repair indices
1359					_WriteBackCheckBitmap();
1360
1361					fCheckCookie->pass = BFS_CHECK_PASS_INDEX;
1362					fCheckCookie->control.pass = BFS_CHECK_PASS_INDEX;
1363
1364					status_t status = _PrepareIndices();
1365					if (status != B_OK) {
1366						fCheckCookie->control.status = status;
1367						return status;
1368					}
1369
1370					fCheckCookie->stack.Push(fVolume->Root());
1371					continue;
1372				}
1373
1374				fCheckCookie->control.status = B_ENTRY_NOT_FOUND;
1375				return B_ENTRY_NOT_FOUND;
1376			}
1377
1378			Vnode vnode(fVolume, fCheckCookie->current);
1379			Inode* inode;
1380			if (vnode.Get(&inode) != B_OK) {
1381				FATAL(("check: Could not open inode at %" B_PRIdOFF "\n",
1382					fVolume->ToBlock(fCheckCookie->current)));
1383				continue;
1384			}
1385
1386			fCheckCookie->control.inode = inode->ID();
1387			fCheckCookie->control.mode = inode->Mode();
1388
1389			if (!inode->IsContainer()) {
1390				// Check file
1391				fCheckCookie->control.errors = 0;
1392				fCheckCookie->control.status = CheckInode(inode, NULL);
1393
1394				if (inode->GetName(fCheckCookie->control.name) < B_OK)
1395					strcpy(fCheckCookie->control.name, "(node has no name)");
1396
1397				return B_OK;
1398			}
1399
1400			// Check directory
1401			BPlusTree* tree = inode->Tree();
1402			if (tree == NULL) {
1403				FATAL(("check: could not open b+tree from inode at %" B_PRIdOFF
1404					"\n", fVolume->ToBlock(fCheckCookie->current)));
1405				continue;
1406			}
1407
1408			fCheckCookie->parent = inode;
1409			fCheckCookie->parent_mode = inode->Mode();
1410
1411			// get iterator for the next directory
1412			fCheckCookie->iterator = new(std::nothrow) TreeIterator(tree);
1413			if (fCheckCookie->iterator == NULL)
1414				RETURN_ERROR(B_NO_MEMORY);
1415
1416			// the inode must stay locked in memory until the iterator is freed
1417			vnode.Keep();
1418
1419			// check the inode of the directory
1420			fCheckCookie->control.errors = 0;
1421			fCheckCookie->control.status = CheckInode(inode, NULL);
1422
1423			if (inode->GetName(fCheckCookie->control.name) != B_OK)
1424				strcpy(fCheckCookie->control.name, "(dir has no name)");
1425
1426			return B_OK;
1427		}
1428
1429		char name[B_FILE_NAME_LENGTH];
1430		uint16 length;
1431		ino_t id;
1432
1433		status_t status = fCheckCookie->iterator->GetNextEntry(name, &length,
1434			B_FILE_NAME_LENGTH, &id);
1435		if (status != B_OK) {
1436			// we no longer need this iterator
1437			delete fCheckCookie->iterator;
1438			fCheckCookie->iterator = NULL;
1439
1440			// unlock the directory's inode from memory
1441			put_vnode(fVolume->FSVolume(),
1442				fVolume->ToVnode(fCheckCookie->current));
1443
1444			if (status == B_ENTRY_NOT_FOUND) {
1445				// We iterated over all entries already, just go on to the next
1446				continue;
1447			}
1448
1449			// Iterating over the B+tree failed - we let the checkfs run
1450			// fail completely, as we would delete all files we cannot
1451			// access.
1452			// TODO: maybe have a force parameter that actually does that.
1453			// TODO: we also need to be able to repair broken B+trees!
1454			return status;
1455		}
1456
1457		// ignore "." and ".." entries
1458		if (!strcmp(name, ".") || !strcmp(name, ".."))
1459			continue;
1460
1461		// fill in the control data as soon as we have them
1462		strlcpy(fCheckCookie->control.name, name, B_FILE_NAME_LENGTH);
1463		fCheckCookie->control.inode = id;
1464		fCheckCookie->control.errors = 0;
1465
1466		Vnode vnode(fVolume, id);
1467		Inode* inode;
1468		if (vnode.Get(&inode) != B_OK) {
1469			FATAL(("Could not open inode ID %" B_PRIdINO "!\n", id));
1470			fCheckCookie->control.errors |= BFS_COULD_NOT_OPEN;
1471
1472			if ((fCheckCookie->control.flags & BFS_REMOVE_INVALID) != 0) {
1473				status = _RemoveInvalidNode(fCheckCookie->parent,
1474					fCheckCookie->iterator->Tree(), NULL, name);
1475			} else
1476				status = B_ERROR;
1477
1478			fCheckCookie->control.status = status;
1479			return B_OK;
1480		}
1481
1482		// check if the inode's name is the same as in the b+tree
1483		if (fCheckCookie->pass == BFS_CHECK_PASS_BITMAP
1484			&& inode->IsRegularNode()) {
1485			RecursiveLocker locker(inode->SmallDataLock());
1486			NodeGetter node(fVolume, inode);
1487
1488			const char* localName = inode->Name(node.Node());
1489			if (localName == NULL || strcmp(localName, name)) {
1490				fCheckCookie->control.errors |= BFS_NAMES_DONT_MATCH;
1491				FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name,
1492					localName));
1493
1494				if ((fCheckCookie->control.flags & BFS_FIX_NAME_MISMATCHES)
1495						!= 0) {
1496					// Rename the inode
1497					Transaction transaction(fVolume, inode->BlockNumber());
1498
1499					// Note, this may need extra blocks, but the inode will
1500					// only be checked afterwards, so that it won't be lost
1501					status = inode->SetName(transaction, name);
1502					if (status == B_OK)
1503						status = inode->WriteBack(transaction);
1504					if (status == B_OK)
1505						status = transaction.Done();
1506					if (status != B_OK) {
1507						fCheckCookie->control.status = status;
1508						return B_OK;
1509					}
1510				}
1511			}
1512		}
1513
1514		fCheckCookie->control.mode = inode->Mode();
1515
1516		// Check for the correct mode of the node (if the mode of the
1517		// file don't fit to its parent, there is a serious problem)
1518		if (fCheckCookie->pass == BFS_CHECK_PASS_BITMAP
1519			&& (((fCheckCookie->parent_mode & S_ATTR_DIR) != 0
1520					&& !inode->IsAttribute())
1521				|| ((fCheckCookie->parent_mode & S_INDEX_DIR) != 0
1522					&& !inode->IsIndex())
1523				|| (is_directory(fCheckCookie->parent_mode)
1524					&& !inode->IsRegularNode()))) {
1525			FATAL(("inode at %" B_PRIdOFF " is of wrong type: %o (parent "
1526				"%o at %" B_PRIdOFF ")!\n", inode->BlockNumber(),
1527				inode->Mode(), fCheckCookie->parent_mode,
1528				fCheckCookie->parent->BlockNumber()));
1529
1530			// if we are allowed to fix errors, we should remove the file
1531			if ((fCheckCookie->control.flags & BFS_REMOVE_WRONG_TYPES) != 0
1532				&& (fCheckCookie->control.flags & BFS_FIX_BITMAP_ERRORS)
1533						!= 0) {
1534				status = _RemoveInvalidNode(fCheckCookie->parent, NULL,
1535					inode, name);
1536			} else
1537				status = B_ERROR;
1538
1539			fCheckCookie->control.errors |= BFS_WRONG_TYPE;
1540			fCheckCookie->control.status = status;
1541			return B_OK;
1542		}
1543
1544		// push the directory on the stack so that it will be scanned later
1545		if (inode->IsContainer() && !inode->IsIndex())
1546			fCheckCookie->stack.Push(inode->BlockRun());
1547		else {
1548			// check it now
1549			fCheckCookie->control.status = CheckInode(inode, name);
1550			return B_OK;
1551		}
1552	}
1553	// is never reached
1554}
1555
1556
1557status_t
1558BlockAllocator::_RemoveInvalidNode(Inode* parent, BPlusTree* tree, Inode* inode,
1559	const char* name)
1560{
1561	// It's safe to start a transaction, because Inode::Remove()
1562	// won't touch the block bitmap (which we hold the lock for)
1563	// if we set the INODE_DONT_FREE_SPACE flag - since we fix
1564	// the bitmap anyway.
1565	Transaction transaction(fVolume, parent->BlockNumber());
1566	status_t status;
1567
1568	if (inode != NULL) {
1569		inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DONT_FREE_SPACE);
1570
1571		status = parent->Remove(transaction, name, NULL, false, true);
1572	} else {
1573		parent->WriteLockInTransaction(transaction);
1574
1575		// does the file even exist?
1576		off_t id;
1577		status = tree->Find((uint8*)name, (uint16)strlen(name), &id);
1578		if (status == B_OK)
1579			status = tree->Remove(transaction, name, id);
1580	}
1581
1582	if (status == B_OK) {
1583		entry_cache_remove(fVolume->ID(), parent->ID(), name);
1584		transaction.Done();
1585	}
1586
1587	return status;
1588}
1589
1590
1591bool
1592BlockAllocator::_CheckBitmapIsUsedAt(off_t block) const
1593{
1594	size_t size = BitmapSize();
1595	uint32 index = block / 32;	// 32bit resolution
1596	if (index > size / 4)
1597		return false;
1598
1599	return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index])
1600		& (1UL << (block & 0x1f));
1601}
1602
1603
1604void
1605BlockAllocator::_SetCheckBitmapAt(off_t block)
1606{
1607	size_t size = BitmapSize();
1608	uint32 index = block / 32;	// 32bit resolution
1609	if (index > size / 4)
1610		return;
1611
1612	fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f));
1613}
1614
1615
1616status_t
1617BlockAllocator::_WriteBackCheckBitmap()
1618{
1619	if (fVolume->IsReadOnly())
1620		return B_OK;
1621
1622	// calculate the number of used blocks in the check bitmap
1623	size_t size = BitmapSize();
1624	off_t usedBlocks = 0LL;
1625
1626	// TODO: update the allocation groups used blocks info
1627	for (uint32 i = size >> 2; i-- > 0;) {
1628		uint32 compare = 1;
1629		// Count the number of bits set
1630		for (int16 j = 0; j < 32; j++, compare <<= 1) {
1631			if ((compare & fCheckBitmap[i]) != 0)
1632				usedBlocks++;
1633		}
1634	}
1635
1636	fCheckCookie->control.stats.freed = fVolume->UsedBlocks() - usedBlocks
1637		+ fCheckCookie->control.stats.missing;
1638	if (fCheckCookie->control.stats.freed < 0)
1639		fCheckCookie->control.stats.freed = 0;
1640
1641	// Should we fix errors? Were there any errors we can fix?
1642	if ((fCheckCookie->control.flags & BFS_FIX_BITMAP_ERRORS) != 0
1643		&& (fCheckCookie->control.stats.freed != 0
1644			|| fCheckCookie->control.stats.missing != 0)) {
1645		// If so, write the check bitmap back over the original one,
1646		// and use transactions here to play safe - we even use several
1647		// transactions, so that we don't blow the maximum log size
1648		// on large disks, since we don't need to make this atomic.
1649#if 0
1650		// prints the blocks that differ
1651		off_t block = 0;
1652		for (int32 i = 0; i < fNumGroups; i++) {
1653			AllocationBlock cached(fVolume);
1654			for (uint32 j = 0; j < fGroups[i].NumBlocks(); j++) {
1655				cached.SetTo(fGroups[i], j);
1656				for (uint32 k = 0; k < cached.NumBlockBits(); k++) {
1657					if (cached.IsUsed(k) != _CheckBitmapIsUsedAt(block)) {
1658						dprintf("differ block %lld (should be %d)\n", block,
1659							_CheckBitmapIsUsedAt(block));
1660					}
1661					block++;
1662				}
1663			}
1664		}
1665#endif
1666
1667		fVolume->SuperBlock().used_blocks
1668			= HOST_ENDIAN_TO_BFS_INT64(usedBlocks);
1669
1670		size_t blockSize = fVolume->BlockSize();
1671
1672		for (uint32 i = 0; i < fNumBlocks; i += 512) {
1673			Transaction transaction(fVolume, 1 + i);
1674
1675			uint32 blocksToWrite = 512;
1676			if (blocksToWrite + i > fNumBlocks)
1677				blocksToWrite = fNumBlocks - i;
1678
1679			status_t status = transaction.WriteBlocks(1 + i,
1680				(uint8*)fCheckBitmap + i * blockSize, blocksToWrite);
1681			if (status < B_OK) {
1682				FATAL(("error writing bitmap: %s\n", strerror(status)));
1683				return status;
1684			}
1685			transaction.Done();
1686		}
1687	}
1688
1689	return B_OK;
1690}
1691
1692
1693/*!	Checks whether or not the specified block range is allocated or not,
1694	depending on the \a allocated argument.
1695*/
1696status_t
1697BlockAllocator::CheckBlocks(off_t start, off_t length, bool allocated)
1698{
1699	if (start < 0 || start + length > fVolume->NumBlocks())
1700		return B_BAD_VALUE;
1701
1702	int32 group = start >> fVolume->AllocationGroupShift();
1703	uint32 bitmapBlock = start / (fVolume->BlockSize() << 3);
1704	uint32 blockOffset = start % (fVolume->BlockSize() << 3);
1705
1706	uint32 groupBlock = bitmapBlock % fBlocksPerGroup;
1707
1708	AllocationBlock cached(fVolume);
1709
1710	while (groupBlock < fGroups[group].NumBlocks() && length > 0) {
1711		if (cached.SetTo(fGroups[group], groupBlock) != B_OK)
1712			RETURN_ERROR(B_IO_ERROR);
1713
1714		for (; blockOffset < cached.NumBlockBits() && length > 0;
1715				blockOffset++, length--) {
1716			if (cached.IsUsed(blockOffset) != allocated) {
1717				RETURN_ERROR(B_BAD_DATA);
1718			}
1719		}
1720
1721		blockOffset = 0;
1722
1723		if (++groupBlock >= fGroups[group].NumBlocks()) {
1724			groupBlock = 0;
1725			group++;
1726		}
1727	}
1728
1729	return B_OK;
1730}
1731
1732
1733status_t
1734BlockAllocator::CheckBlockRun(block_run run, const char* type, bool allocated)
1735{
1736	if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups
1737		|| run.Start() > fGroups[run.AllocationGroup()].fNumBits
1738		|| uint32(run.Start() + run.Length())
1739				> fGroups[run.AllocationGroup()].fNumBits
1740		|| run.length == 0) {
1741		PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type,
1742			run.AllocationGroup(), run.Start(), run.Length()));
1743		if (fCheckCookie == NULL)
1744			return B_BAD_DATA;
1745
1746		fCheckCookie->control.errors |= BFS_INVALID_BLOCK_RUN;
1747		return B_OK;
1748	}
1749
1750	uint32 bitsPerBlock = fVolume->BlockSize() << 3;
1751	uint32 block = run.Start() / bitsPerBlock;
1752	uint32 pos = run.Start() % bitsPerBlock;
1753	int32 length = 0;
1754	off_t firstMissing = -1, firstSet = -1;
1755	off_t firstGroupBlock
1756		= (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift();
1757
1758	AllocationBlock cached(fVolume);
1759
1760	for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) {
1761		if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK)
1762			RETURN_ERROR(B_IO_ERROR);
1763
1764		if (pos >= cached.NumBlockBits()) {
1765			// something very strange has happened...
1766			RETURN_ERROR(B_ERROR);
1767		}
1768
1769		while (length < run.Length() && pos < cached.NumBlockBits()) {
1770			if (cached.IsUsed(pos) != allocated) {
1771				if (fCheckCookie == NULL) {
1772					PRINT(("%s: block_run(%ld, %u, %u) is only partially "
1773						"allocated (pos = %ld, length = %ld)!\n", type,
1774						run.AllocationGroup(), run.Start(), run.Length(),
1775						pos, length));
1776					return B_BAD_DATA;
1777				}
1778				if (firstMissing == -1) {
1779					firstMissing = firstGroupBlock + pos + block * bitsPerBlock;
1780					fCheckCookie->control.errors |= BFS_MISSING_BLOCKS;
1781				}
1782				fCheckCookie->control.stats.missing++;
1783			} else if (firstMissing != -1) {
1784				PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are "
1785					"%sallocated!\n", type, run.AllocationGroup(), run.Start(),
1786					run.Length(), firstMissing,
1787					firstGroupBlock + pos + block * bitsPerBlock - 1,
1788					allocated ? "not " : ""));
1789				firstMissing = -1;
1790			}
1791
1792			if (fCheckBitmap != NULL) {
1793				// Set the block in the check bitmap as well, but have a look
1794				// if it is already allocated first
1795				uint32 offset = pos + block * bitsPerBlock;
1796				if (_CheckBitmapIsUsedAt(firstGroupBlock + offset)) {
1797					if (firstSet == -1) {
1798						firstSet = firstGroupBlock + offset;
1799						fCheckCookie->control.errors |= BFS_BLOCKS_ALREADY_SET;
1800						dprintf("block %" B_PRIdOFF " is already set!!!\n",
1801							firstGroupBlock + offset);
1802					}
1803					fCheckCookie->control.stats.already_set++;
1804				} else {
1805					if (firstSet != -1) {
1806						FATAL(("%s: block_run(%d, %u, %u): blocks %" B_PRIdOFF
1807							" - %" B_PRIdOFF " are already set!\n", type,
1808							(int)run.AllocationGroup(), run.Start(),
1809							run.Length(), firstSet,
1810							firstGroupBlock + offset - 1));
1811						firstSet = -1;
1812					}
1813					_SetCheckBitmapAt(firstGroupBlock + offset);
1814				}
1815			}
1816			length++;
1817			pos++;
1818		}
1819
1820		if (block + 1 >= fBlocksPerGroup || length >= run.Length()) {
1821			if (firstMissing != -1) {
1822				PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not "
1823					"allocated!\n", type, run.AllocationGroup(), run.Start(),
1824					run.Length(), firstMissing,
1825					firstGroupBlock + pos + block * bitsPerBlock - 1));
1826			}
1827			if (firstSet != -1) {
1828				FATAL(("%s: block_run(%d, %u, %u): blocks %" B_PRIdOFF " - %"
1829					B_PRIdOFF " are already set!\n", type,
1830					(int)run.AllocationGroup(), run.Start(), run.Length(),
1831					firstSet,
1832					firstGroupBlock + pos + block * bitsPerBlock - 1));
1833			}
1834		}
1835	}
1836
1837	return B_OK;
1838}
1839
1840
1841status_t
1842BlockAllocator::CheckInode(Inode* inode, const char* name)
1843{
1844	if (fCheckCookie != NULL && fCheckBitmap == NULL)
1845		return B_NO_INIT;
1846	if (inode == NULL)
1847		return B_BAD_VALUE;
1848
1849	switch (fCheckCookie->pass) {
1850		case BFS_CHECK_PASS_BITMAP:
1851		{
1852			status_t status = _CheckInodeBlocks(inode, name);
1853			if (status != B_OK)
1854				return status;
1855
1856			// Check the B+tree as well
1857			if (inode->IsContainer()) {
1858				bool repairErrors
1859					= (fCheckCookie->control.flags & BFS_FIX_BPLUSTREES) != 0;
1860				bool errorsFound = false;
1861				status = inode->Tree()->Validate(repairErrors, errorsFound);
1862				if (errorsFound) {
1863					fCheckCookie->control.errors |= BFS_INVALID_BPLUSTREE;
1864					if (inode->IsIndex() && name != NULL && repairErrors) {
1865						// We completely rebuild corrupt indices
1866						check_index* index = new(std::nothrow) check_index;
1867						if (index == NULL)
1868							return B_NO_MEMORY;
1869
1870						strlcpy(index->name, name, sizeof(index->name));
1871						index->run = inode->BlockRun();
1872						fCheckCookie->indices.Push(index);
1873					}
1874				}
1875			}
1876
1877			return status;
1878		}
1879
1880		case BFS_CHECK_PASS_INDEX:
1881			return _AddInodeToIndex(inode);
1882	}
1883
1884	return B_OK;
1885}
1886
1887
1888status_t
1889BlockAllocator::_CheckInodeBlocks(Inode* inode, const char* name)
1890{
1891	status_t status = CheckBlockRun(inode->BlockRun(), "inode");
1892	if (status != B_OK)
1893		return status;
1894
1895	// If the inode has an attribute directory, push it on the stack
1896	if (!inode->Attributes().IsZero())
1897		fCheckCookie->stack.Push(inode->Attributes());
1898
1899	if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) {
1900		// symlinks may not have a valid data stream
1901		if (strlen(inode->Node().short_symlink) >= SHORT_SYMLINK_NAME_LENGTH)
1902			return B_BAD_DATA;
1903
1904		return B_OK;
1905	}
1906
1907	data_stream* data = &inode->Node().data;
1908
1909	// check the direct range
1910
1911	if (data->max_direct_range) {
1912		for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) {
1913			if (data->direct[i].IsZero())
1914				break;
1915
1916			status = CheckBlockRun(data->direct[i], "direct");
1917			if (status < B_OK)
1918				return status;
1919
1920			fCheckCookie->control.stats.direct_block_runs++;
1921			fCheckCookie->control.stats.blocks_in_direct
1922				+= data->direct[i].Length();
1923		}
1924	}
1925
1926	CachedBlock cached(fVolume);
1927
1928	// check the indirect range
1929
1930	if (data->max_indirect_range) {
1931		status = CheckBlockRun(data->indirect, "indirect");
1932		if (status < B_OK)
1933			return status;
1934
1935		off_t block = fVolume->ToBlock(data->indirect);
1936
1937		for (int32 i = 0; i < data->indirect.Length(); i++) {
1938			block_run* runs = (block_run*)cached.SetTo(block + i);
1939			if (runs == NULL)
1940				RETURN_ERROR(B_IO_ERROR);
1941
1942			int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
1943			int32 index = 0;
1944			for (; index < runsPerBlock; index++) {
1945				if (runs[index].IsZero())
1946					break;
1947
1948				status = CheckBlockRun(runs[index], "indirect->run");
1949				if (status < B_OK)
1950					return status;
1951
1952				fCheckCookie->control.stats.indirect_block_runs++;
1953				fCheckCookie->control.stats.blocks_in_indirect
1954					+= runs[index].Length();
1955			}
1956			fCheckCookie->control.stats.indirect_array_blocks++;
1957
1958			if (index < runsPerBlock)
1959				break;
1960		}
1961	}
1962
1963	// check the double indirect range
1964
1965	if (data->max_double_indirect_range) {
1966		status = CheckBlockRun(data->double_indirect, "double indirect");
1967		if (status != B_OK)
1968			return status;
1969
1970		int32 runsPerBlock = runs_per_block(fVolume->BlockSize());
1971		int32 runsPerArray = runsPerBlock * data->double_indirect.Length();
1972
1973		CachedBlock cachedDirect(fVolume);
1974
1975		for (int32 indirectIndex = 0; indirectIndex < runsPerArray;
1976				indirectIndex++) {
1977			// get the indirect array block
1978			block_run* array = (block_run*)cached.SetTo(
1979				fVolume->ToBlock(data->double_indirect)
1980					+ indirectIndex / runsPerBlock);
1981			if (array == NULL)
1982				return B_IO_ERROR;
1983
1984			block_run indirect = array[indirectIndex % runsPerBlock];
1985			// are we finished yet?
1986			if (indirect.IsZero())
1987				return B_OK;
1988
1989			status = CheckBlockRun(indirect, "double indirect->runs");
1990			if (status != B_OK)
1991				return status;
1992
1993			int32 maxIndex
1994				= ((uint32)indirect.Length() << fVolume->BlockShift())
1995					/ sizeof(block_run);
1996
1997			for (int32 index = 0; index < maxIndex; ) {
1998				block_run* runs = (block_run*)cachedDirect.SetTo(
1999					fVolume->ToBlock(indirect) + index / runsPerBlock);
2000				if (runs == NULL)
2001					return B_IO_ERROR;
2002
2003				do {
2004					// are we finished yet?
2005					if (runs[index % runsPerBlock].IsZero())
2006						return B_OK;
2007
2008					status = CheckBlockRun(runs[index % runsPerBlock],
2009						"double indirect->runs->run");
2010					if (status != B_OK)
2011						return status;
2012
2013					fCheckCookie->control.stats.double_indirect_block_runs++;
2014					fCheckCookie->control.stats.blocks_in_double_indirect
2015						+= runs[index % runsPerBlock].Length();
2016				} while ((++index % runsPerArray) != 0);
2017			}
2018
2019			fCheckCookie->control.stats.double_indirect_array_blocks++;
2020		}
2021	}
2022
2023	return B_OK;
2024}
2025
2026
2027status_t
2028BlockAllocator::_PrepareIndices()
2029{
2030	int32 count = 0;
2031
2032	for (int32 i = 0; i < fCheckCookie->indices.CountItems(); i++) {
2033		check_index* index = fCheckCookie->indices.Array()[i];
2034		Vnode vnode(fVolume, index->run);
2035		Inode* inode;
2036		status_t status = vnode.Get(&inode);
2037		if (status != B_OK) {
2038			FATAL(("check: Could not open index at %" B_PRIdOFF "\n",
2039				fVolume->ToBlock(index->run)));
2040			return status;
2041		}
2042
2043		BPlusTree* tree = inode->Tree();
2044		if (tree == NULL) {
2045			// TODO: We can't yet repair those
2046			continue;
2047		}
2048
2049		status = tree->MakeEmpty();
2050		if (status != B_OK)
2051			return status;
2052
2053		index->inode = inode;
2054		vnode.Keep();
2055		count++;
2056	}
2057
2058	return count == 0 ? B_ENTRY_NOT_FOUND : B_OK;
2059}
2060
2061
2062void
2063BlockAllocator::_FreeIndices()
2064{
2065	for (int32 i = 0; i < fCheckCookie->indices.CountItems(); i++) {
2066		check_index* index = fCheckCookie->indices.Array()[i];
2067		if (index->inode != NULL) {
2068			put_vnode(fVolume->FSVolume(),
2069				fVolume->ToVnode(index->inode->BlockRun()));
2070		}
2071	}
2072	fCheckCookie->indices.MakeEmpty();
2073}
2074
2075
2076status_t
2077BlockAllocator::_AddInodeToIndex(Inode* inode)
2078{
2079	Transaction transaction(fVolume, inode->BlockNumber());
2080
2081	for (int32 i = 0; i < fCheckCookie->indices.CountItems(); i++) {
2082		check_index* index = fCheckCookie->indices.Array()[i];
2083		if (index->inode == NULL)
2084			continue;
2085
2086		index->inode->WriteLockInTransaction(transaction);
2087
2088		BPlusTree* tree = index->inode->Tree();
2089		if (tree == NULL)
2090			return B_ERROR;
2091
2092		status_t status = B_OK;
2093
2094		if (!strcmp(index->name, "name")) {
2095			if (inode->InNameIndex()) {
2096				char name[B_FILE_NAME_LENGTH];
2097				if (inode->GetName(name, B_FILE_NAME_LENGTH) != B_OK)
2098					return B_ERROR;
2099
2100				status = tree->Insert(transaction, name, inode->ID());
2101			}
2102		} else if (!strcmp(index->name, "last_modified")) {
2103			if (inode->InLastModifiedIndex()) {
2104				status = tree->Insert(transaction, inode->OldLastModified(),
2105					inode->ID());
2106			}
2107		} else if (!strcmp(index->name, "size")) {
2108			if (inode->InSizeIndex())
2109				status = tree->Insert(transaction, inode->Size(), inode->ID());
2110		} else {
2111			uint8 key[BPLUSTREE_MAX_KEY_LENGTH];
2112			size_t keyLength = BPLUSTREE_MAX_KEY_LENGTH;
2113			if (inode->ReadAttribute(index->name, B_ANY_TYPE, 0, key,
2114					&keyLength) == B_OK) {
2115				status = tree->Insert(transaction, key, keyLength, inode->ID());
2116			}
2117		}
2118
2119		if (status != B_OK)
2120			return status;
2121	}
2122
2123	return transaction.Done();
2124}
2125
2126
2127//	#pragma mark - debugger commands
2128
2129
2130#ifdef BFS_DEBUGGER_COMMANDS
2131
2132void
2133BlockAllocator::Dump(int32 index)
2134{
2135	kprintf("allocation groups: %" B_PRId32 " (base %p)\n", fNumGroups, fGroups);
2136	kprintf("blocks per group: %" B_PRId32 "\n", fBlocksPerGroup);
2137
2138	for (int32 i = 0; i < fNumGroups; i++) {
2139		if (index != -1 && i != index)
2140			continue;
2141
2142		AllocationGroup& group = fGroups[i];
2143
2144		kprintf("[%3" B_PRId32 "] num bits:       %" B_PRIu32 "  (%p)\n", i,
2145			group.NumBits(), &group);
2146		kprintf("      num blocks:     %" B_PRIu32 "\n", group.NumBlocks());
2147		kprintf("      start:          %" B_PRId32 "\n", group.Start());
2148		kprintf("      first free:     %" B_PRId32 "\n", group.fFirstFree);
2149		kprintf("      largest start:  %" B_PRId32 "%s\n", group.fLargestStart,
2150			group.fLargestValid ? "" : "  (invalid)");
2151		kprintf("      largest length: %" B_PRId32 "\n", group.fLargestLength);
2152		kprintf("      free bits:      %" B_PRId32 "\n", group.fFreeBits);
2153	}
2154}
2155
2156
2157#if BFS_TRACING
2158static char kTraceBuffer[256];
2159
2160
2161int
2162dump_block_allocator_blocks(int argc, char** argv)
2163{
2164	if (argc != 3 || !strcmp(argv[1], "--help")) {
2165		kprintf("usage: %s <ptr-to-volume> <block>\n", argv[0]);
2166		return 0;
2167	}
2168
2169	Volume* volume = (Volume*)parse_expression(argv[1]);
2170	off_t block = parse_expression(argv[2]);
2171
2172	// iterate over all tracing entries to find overlapping actions
2173
2174	using namespace BFSBlockTracing;
2175
2176	LazyTraceOutput out(kTraceBuffer, sizeof(kTraceBuffer), 0);
2177	TraceEntryIterator iterator;
2178	while (TraceEntry* _entry = iterator.Next()) {
2179		if (Allocate* entry = dynamic_cast<Allocate*>(_entry)) {
2180			off_t first = volume->ToBlock(entry->Run());
2181			off_t last = first - 1 + entry->Run().Length();
2182			if (block >= first && block <= last) {
2183				out.Clear();
2184				const char* dump = out.DumpEntry(entry);
2185				kprintf("%5ld. %s\n", iterator.Index(), dump);
2186			}
2187		} else if (Free* entry = dynamic_cast<Free*>(_entry)) {
2188			off_t first = volume->ToBlock(entry->Run());
2189			off_t last = first - 1 + entry->Run().Length();
2190			if (block >= first && block <= last) {
2191				out.Clear();
2192				const char* dump = out.DumpEntry(entry);
2193				kprintf("%5ld. %s\n", iterator.Index(), dump);
2194			}
2195		}
2196	}
2197
2198	return 0;
2199}
2200#endif
2201
2202
2203int
2204dump_block_allocator(int argc, char** argv)
2205{
2206	int32 group = -1;
2207	if (argc == 3) {
2208		group = parse_expression(argv[2]);
2209		argc--;
2210	}
2211
2212	if (argc != 2 || !strcmp(argv[1], "--help")) {
2213		kprintf("usage: %s <ptr-to-volume> [group]\n", argv[0]);
2214		return 0;
2215	}
2216
2217	Volume* volume = (Volume*)parse_expression(argv[1]);
2218	BlockAllocator& allocator = volume->Allocator();
2219
2220	allocator.Dump(group);
2221	return 0;
2222}
2223
2224#endif	// BFS_DEBUGGER_COMMANDS
2225
2226