1/*
2 * Copyright 2001-2019, Axel D��rfler, axeld@pinc-software.de.
3 * This file may be used under the terms of the MIT License.
4 */
5
6
7//! superblock, mounting, etc.
8
9
10#include "Attribute.h"
11#include "CheckVisitor.h"
12#include "Debug.h"
13#include "file_systems/DeviceOpener.h"
14#include "Inode.h"
15#include "Journal.h"
16#include "Query.h"
17#include "Volume.h"
18
19
20static const int32 kDesiredAllocationGroups = 56;
21	// This is the number of allocation groups that will be tried
22	// to be given for newly initialized disks.
23	// That's only relevant for smaller disks, though, since any
24	// of today's disk sizes already reach the maximum length
25	// of an allocation group (65536 blocks).
26	// It seems to create appropriate numbers for smaller disks
27	// with this setting, though (i.e. you can create a 400 MB
28	// file on a 1 GB disk without the need for double indirect
29	// blocks).
30
31
32//	#pragma mark -
33
34
35bool
36disk_super_block::IsValid() const
37{
38	if (Magic1() != (int32)SUPER_BLOCK_MAGIC1
39		|| Magic2() != (int32)SUPER_BLOCK_MAGIC2
40		|| Magic3() != (int32)SUPER_BLOCK_MAGIC3
41		|| (int32)block_size != inode_size
42		|| ByteOrder() != SUPER_BLOCK_FS_LENDIAN
43		|| (1UL << BlockShift()) != BlockSize()
44		|| AllocationGroups() < 1
45		|| AllocationGroupShift() < 1
46		|| BlocksPerAllocationGroup() < 1
47		|| NumBlocks() < 10
48		|| AllocationGroups() != divide_roundup(NumBlocks(),
49			1L << AllocationGroupShift()))
50		return false;
51
52	return true;
53}
54
55
56void
57disk_super_block::Initialize(const char* diskName, off_t numBlocks,
58	uint32 blockSize)
59{
60	memset(this, 0, sizeof(disk_super_block));
61
62	magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1);
63	magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2);
64	magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3);
65	fs_byte_order = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_FS_LENDIAN);
66	flags = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_DISK_CLEAN);
67
68	strlcpy(name, diskName, sizeof(name));
69
70	int32 blockShift = 9;
71	while ((1UL << blockShift) < blockSize) {
72		blockShift++;
73	}
74
75	block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize);
76	block_shift = HOST_ENDIAN_TO_BFS_INT32(blockShift);
77
78	num_blocks = HOST_ENDIAN_TO_BFS_INT64(numBlocks);
79	used_blocks = 0;
80
81	// Get the minimum ag_shift (that's determined by the block size)
82
83	int32 bitsPerBlock = blockSize << 3;
84	off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock;
85	int32 blocksPerGroup = 1;
86	int32 groupShift = 13;
87
88	for (int32 i = 8192; i < bitsPerBlock; i *= 2) {
89		groupShift++;
90	}
91
92	// Many allocation groups help applying allocation policies, but if
93	// they are too small, we will need to many block_runs to cover large
94	// files (see above to get an explanation of the kDesiredAllocationGroups
95	// constant).
96
97	int32 numGroups;
98
99	while (true) {
100		numGroups = (bitmapBlocks + blocksPerGroup - 1) / blocksPerGroup;
101		if (numGroups > kDesiredAllocationGroups) {
102			if (groupShift == 16)
103				break;
104
105			groupShift++;
106			blocksPerGroup *= 2;
107		} else
108			break;
109	}
110
111	num_ags = HOST_ENDIAN_TO_BFS_INT32(numGroups);
112	blocks_per_ag = HOST_ENDIAN_TO_BFS_INT32(blocksPerGroup);
113	ag_shift = HOST_ENDIAN_TO_BFS_INT32(groupShift);
114}
115
116
117//	#pragma mark -
118
119
120Volume::Volume(fs_volume* volume)
121	:
122	fVolume(volume),
123	fBlockAllocator(this),
124	fRootNode(NULL),
125	fIndicesNode(NULL),
126	fDirtyCachedBlocks(0),
127	fFlags(0),
128	fCheckingThread(-1),
129	fCheckVisitor(NULL)
130{
131	mutex_init(&fLock, "bfs volume");
132	mutex_init(&fQueryLock, "bfs queries");
133}
134
135
136Volume::~Volume()
137{
138	mutex_destroy(&fQueryLock);
139	mutex_destroy(&fLock);
140}
141
142
143bool
144Volume::IsValidSuperBlock() const
145{
146	return fSuperBlock.IsValid();
147}
148
149
150/*!	Checks whether the given block number may be the location of an inode block.
151*/
152bool
153Volume::IsValidInodeBlock(off_t block) const
154{
155	return block > fSuperBlock.LogEnd() && block < NumBlocks();
156}
157
158
159void
160Volume::Panic()
161{
162	FATAL(("Disk corrupted... switch to read-only mode!\n"));
163	fFlags |= VOLUME_READ_ONLY;
164#if KDEBUG
165	kernel_debugger("BFS panics!");
166#endif
167}
168
169
170status_t
171Volume::Mount(const char* deviceName, uint32 flags)
172{
173	// TODO: validate the FS in write mode as well!
174#if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \
175	|| (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY))
176	// in big endian mode, we only mount read-only for now
177	flags |= B_MOUNT_READ_ONLY;
178#endif
179
180	DeviceOpener opener(deviceName, (flags & B_MOUNT_READ_ONLY) != 0
181		? O_RDONLY : O_RDWR);
182	fDevice = opener.Device();
183	if (fDevice < B_OK)
184		RETURN_ERROR(fDevice);
185
186	if (opener.IsReadOnly())
187		fFlags |= VOLUME_READ_ONLY;
188
189	// read the superblock
190	if (Identify(fDevice, &fSuperBlock) != B_OK) {
191		FATAL(("invalid superblock!\n"));
192		return B_BAD_VALUE;
193	}
194
195	// initialize short hands to the superblock (to save byte swapping)
196	fBlockSize = fSuperBlock.BlockSize();
197	fBlockShift = fSuperBlock.BlockShift();
198	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
199
200	// check if the device size is large enough to hold the file system
201	off_t diskSize;
202	if (opener.GetSize(&diskSize, &fDeviceBlockSize) != B_OK)
203		RETURN_ERROR(B_ERROR);
204	if (diskSize < (NumBlocks() << BlockShift())) {
205		FATAL(("Disk size (%" B_PRIdOFF " bytes) < file system size (%"
206			B_PRIdOFF " bytes)!\n", diskSize, NumBlocks() << BlockShift()));
207		RETURN_ERROR(B_BAD_VALUE);
208	}
209
210	// set the current log pointers, so that journaling will work correctly
211	fLogStart = fSuperBlock.LogStart();
212	fLogEnd = fSuperBlock.LogEnd();
213
214	if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL)
215		return B_ERROR;
216
217	fJournal = new(std::nothrow) Journal(this);
218	if (fJournal == NULL)
219		return B_NO_MEMORY;
220
221	status_t status = fJournal->InitCheck();
222	if (status < B_OK) {
223		FATAL(("could not initialize journal: %s!\n", strerror(status)));
224		return status;
225	}
226
227	// replaying the log is the first thing we will do on this disk
228	status = fJournal->ReplayLog();
229	if (status != B_OK) {
230		FATAL(("Replaying log failed, data may be corrupted, volume "
231			"read-only.\n"));
232		fFlags |= VOLUME_READ_ONLY;
233			// TODO: if this is the boot volume, Bootscript will assume this
234			// is a CD...
235			// TODO: it would be nice to have a user visible alert instead
236			// of letting him just find this in the syslog.
237	}
238
239	status = fBlockAllocator.Initialize();
240	if (status != B_OK) {
241		FATAL(("could not initialize block bitmap allocator!\n"));
242		return status;
243	}
244
245	fRootNode = new(std::nothrow) Inode(this, ToVnode(Root()));
246	if (fRootNode != NULL && fRootNode->InitCheck() == B_OK) {
247		status = publish_vnode(fVolume, ToVnode(Root()), (void*)fRootNode,
248			&gBFSVnodeOps, fRootNode->Mode(), 0);
249		if (status == B_OK) {
250			// try to get indices root dir
251
252			if (!Indices().IsZero()) {
253				fIndicesNode = new(std::nothrow) Inode(this,
254					ToVnode(Indices()));
255			}
256
257			if (fIndicesNode == NULL
258				|| fIndicesNode->InitCheck() < B_OK
259				|| !fIndicesNode->IsContainer()) {
260				INFORM(("bfs: volume doesn't have indices!\n"));
261
262				if (fIndicesNode) {
263					// if this is the case, the index root node is gone bad,
264					// and BFS switch to read-only mode
265					fFlags |= VOLUME_READ_ONLY;
266					delete fIndicesNode;
267					fIndicesNode = NULL;
268				}
269			} else {
270				// we don't use the vnode layer to access the indices node
271			}
272		} else {
273			FATAL(("could not create root node: publish_vnode() failed!\n"));
274			delete fRootNode;
275			return status;
276		}
277	} else {
278		status = B_BAD_VALUE;
279		FATAL(("could not create root node!\n"));
280
281		// We need to wait for the block allocator to finish
282		fBlockAllocator.Uninitialize();
283		return status;
284	}
285
286	// all went fine
287	opener.Keep();
288	return B_OK;
289}
290
291
292status_t
293Volume::Unmount()
294{
295	put_vnode(fVolume, ToVnode(Root()));
296
297	fBlockAllocator.Uninitialize();
298
299	// This will also flush the log & all blocks to disk
300	delete fJournal;
301	fJournal = NULL;
302
303	delete fIndicesNode;
304
305	block_cache_delete(fBlockCache, !IsReadOnly());
306	close(fDevice);
307
308	return B_OK;
309}
310
311
312status_t
313Volume::Sync()
314{
315	return fJournal->FlushLogAndBlocks();
316}
317
318
319status_t
320Volume::ValidateBlockRun(block_run run)
321{
322	if (run.AllocationGroup() < 0
323		|| run.AllocationGroup() > (int32)AllocationGroups()
324		|| run.Start() > (1UL << AllocationGroupShift())
325		|| run.length == 0
326		|| uint32(run.Length() + run.Start())
327				> (1UL << AllocationGroupShift())) {
328		Panic();
329		FATAL(("*** invalid run(%d,%d,%d)\n", (int)run.AllocationGroup(),
330			run.Start(), run.Length()));
331		return B_BAD_DATA;
332	}
333	return B_OK;
334}
335
336
337block_run
338Volume::ToBlockRun(off_t block) const
339{
340	block_run run;
341	run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(
342		block >> AllocationGroupShift());
343	run.start = HOST_ENDIAN_TO_BFS_INT16(
344		block & ((1LL << AllocationGroupShift()) - 1));
345	run.length = HOST_ENDIAN_TO_BFS_INT16(1);
346	return run;
347}
348
349
350status_t
351Volume::CreateIndicesRoot(Transaction& transaction)
352{
353	off_t id;
354	status_t status = Inode::Create(transaction, NULL, NULL,
355		S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, NULL, &id,
356		&fIndicesNode, NULL, BFS_DO_NOT_PUBLISH_VNODE);
357	if (status < B_OK)
358		RETURN_ERROR(status);
359
360	fSuperBlock.indices = ToBlockRun(id);
361	return WriteSuperBlock();
362}
363
364
365status_t
366Volume::CreateVolumeID(Transaction& transaction)
367{
368	Attribute attr(fRootNode);
369	status_t status;
370	attr_cookie* cookie;
371	status = attr.Create("be:volume_id", B_UINT64_TYPE, O_RDWR, &cookie);
372	if (status == B_OK) {
373		static bool seeded = false;
374		if (!seeded) {
375			// seed the random number generator for the be:volume_id attribute.
376			srand(time(NULL));
377			seeded = true;
378		}
379		uint64_t id;
380		size_t length = sizeof(id);
381		id = ((uint64_t)rand() << 32) | rand();
382		attr.Write(transaction, cookie, 0, (uint8_t *)&id, &length, NULL);
383	}
384	return status;
385}
386
387
388
389status_t
390Volume::AllocateForInode(Transaction& transaction, const Inode* parent,
391	mode_t type, block_run& run)
392{
393	return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(),
394		type, run);
395}
396
397
398status_t
399Volume::WriteSuperBlock()
400{
401	if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block))
402			!= sizeof(disk_super_block))
403		return B_IO_ERROR;
404
405	return B_OK;
406}
407
408
409void
410Volume::UpdateLiveQueries(Inode* inode, const char* attribute, int32 type,
411	const uint8* oldKey, size_t oldLength, const uint8* newKey,
412	size_t newLength)
413{
414	MutexLocker _(fQueryLock);
415
416	SinglyLinkedList<Query>::Iterator iterator = fQueries.GetIterator();
417	while (iterator.HasNext()) {
418		Query* query = iterator.Next();
419		query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey,
420			newLength);
421	}
422}
423
424
425void
426Volume::UpdateLiveQueriesRenameMove(Inode* inode, ino_t oldDirectoryID,
427	const char* oldName, ino_t newDirectoryID, const char* newName)
428{
429	MutexLocker _(fQueryLock);
430
431	size_t oldLength = strlen(oldName);
432	size_t newLength = strlen(newName);
433
434	SinglyLinkedList<Query>::Iterator iterator = fQueries.GetIterator();
435	while (iterator.HasNext()) {
436		Query* query = iterator.Next();
437		query->LiveUpdateRenameMove(inode, oldDirectoryID, oldName, oldLength,
438			newDirectoryID, newName, newLength);
439	}
440}
441
442
443/*!	Checks if there is a live query whose results depend on the presence
444	or value of the specified attribute.
445	Don't use it if you already have all the data together to evaluate
446	the queries - it wouldn't safe you anything in this case.
447*/
448bool
449Volume::CheckForLiveQuery(const char* attribute)
450{
451	// TODO: check for a live query that depends on the specified attribute
452	return true;
453}
454
455
456void
457Volume::AddQuery(Query* query)
458{
459	MutexLocker _(fQueryLock);
460	fQueries.Add(query);
461}
462
463
464void
465Volume::RemoveQuery(Query* query)
466{
467	MutexLocker _(fQueryLock);
468	fQueries.Remove(query);
469}
470
471
472status_t
473Volume::CreateCheckVisitor()
474{
475	if (fCheckVisitor != NULL)
476		return B_BUSY;
477
478	fCheckVisitor = new(std::nothrow) ::CheckVisitor(this);
479	if (fCheckVisitor == NULL)
480		return B_NO_MEMORY;
481
482	return B_OK;
483}
484
485
486void
487Volume::DeleteCheckVisitor()
488{
489	delete fCheckVisitor;
490	fCheckVisitor = NULL;
491}
492
493
494//	#pragma mark - Disk scanning and initialization
495
496
497/*static*/ status_t
498Volume::CheckSuperBlock(const uint8* data, uint32* _offset)
499{
500	disk_super_block* superBlock = (disk_super_block*)(data + 512);
501	if (superBlock->IsValid()) {
502		if (_offset != NULL)
503			*_offset = 512;
504		return B_OK;
505	}
506
507#ifndef BFS_LITTLE_ENDIAN_ONLY
508	// For PPC, the superblock might be located at offset 0
509	superBlock = (disk_super_block*)data;
510	if (superBlock->IsValid()) {
511		if (_offset != NULL)
512			*_offset = 0;
513		return B_OK;
514	}
515#endif
516
517	return B_BAD_VALUE;
518}
519
520
521/*static*/ status_t
522Volume::Identify(int fd, disk_super_block* superBlock)
523{
524	uint8 buffer[1024];
525	if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer))
526		return B_IO_ERROR;
527
528	uint32 offset;
529	if (CheckSuperBlock(buffer, &offset) != B_OK)
530		return B_BAD_VALUE;
531
532	memcpy(superBlock, buffer + offset, sizeof(disk_super_block));
533	return B_OK;
534}
535
536
537status_t
538Volume::Initialize(int fd, const char* name, uint32 blockSize,
539	uint32 flags)
540{
541	// although there is no really good reason for it, we won't
542	// accept '/' in disk names (mkbfs does this, too - and since
543	// Tracker names mounted volumes like their name)
544	if (strchr(name, '/') != NULL)
545		return B_BAD_VALUE;
546
547	if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096
548		&& blockSize != 8192)
549		return B_BAD_VALUE;
550
551	DeviceOpener opener(fd, O_RDWR);
552	if (opener.Device() < B_OK)
553		return B_BAD_VALUE;
554
555	if (opener.IsReadOnly())
556		return B_READ_ONLY_DEVICE;
557
558	fDevice = opener.Device();
559
560	uint32 deviceBlockSize;
561	off_t deviceSize;
562	if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK)
563		return B_ERROR;
564
565	off_t numBlocks = deviceSize / blockSize;
566
567	// create valid superblock
568
569	fSuperBlock.Initialize(name, numBlocks, blockSize);
570
571	// initialize short hands to the superblock (to save byte swapping)
572	fBlockSize = fSuperBlock.BlockSize();
573	fBlockShift = fSuperBlock.BlockShift();
574	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
575
576	// determine log size depending on the size of the volume
577	off_t logSize = 2048;
578	if (numBlocks <= 20480)
579		logSize = 512;
580	if (deviceSize > 1LL * 1024 * 1024 * 1024)
581		logSize = 4096;
582
583	// since the allocator has not been initialized yet, we
584	// cannot use BlockAllocator::BitmapSize() here
585	off_t bitmapBlocks = (numBlocks + blockSize * 8 - 1) / (blockSize * 8);
586
587	fSuperBlock.log_blocks = ToBlockRun(bitmapBlocks + 1);
588	fSuperBlock.log_blocks.length = HOST_ENDIAN_TO_BFS_INT16(logSize);
589	fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(
590		ToBlock(Log()));
591
592	// set the current log pointers, so that journaling will work correctly
593	fLogStart = fSuperBlock.LogStart();
594	fLogEnd = fSuperBlock.LogEnd();
595
596	if (!IsValidSuperBlock())
597		RETURN_ERROR(B_ERROR);
598
599	if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL)
600		return B_ERROR;
601
602	fJournal = new(std::nothrow) Journal(this);
603	if (fJournal == NULL || fJournal->InitCheck() < B_OK)
604		RETURN_ERROR(B_ERROR);
605
606	// ready to write data to disk
607
608	Transaction transaction(this, 0);
609
610	if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK)
611		RETURN_ERROR(B_ERROR);
612
613	off_t id;
614	status_t status = Inode::Create(transaction, NULL, NULL,
615		S_DIRECTORY | 0755, 0, 0, NULL, &id, &fRootNode);
616	if (status < B_OK)
617		RETURN_ERROR(status);
618
619	fSuperBlock.root_dir = ToBlockRun(id);
620
621	if ((flags & VOLUME_NO_INDICES) == 0) {
622		// The indices root directory will be created automatically
623		// when the standard indices are created (or any other).
624		Index index(this);
625		status = index.Create(transaction, "name", B_STRING_TYPE);
626		if (status < B_OK)
627			return status;
628
629		status = index.Create(transaction, "BEOS:APP_SIG", B_STRING_TYPE);
630		if (status < B_OK)
631			return status;
632
633		status = index.Create(transaction, "last_modified", B_INT64_TYPE);
634		if (status < B_OK)
635			return status;
636
637		status = index.Create(transaction, "size", B_INT64_TYPE);
638		if (status < B_OK)
639			return status;
640	}
641
642	status = CreateVolumeID(transaction);
643	if (status < B_OK)
644		return status;
645
646	status = _EraseUnusedBootBlock();
647	if (status < B_OK)
648		return status;
649
650	status = WriteSuperBlock();
651	if (status < B_OK)
652		return status;
653
654	status = transaction.Done();
655	if (status < B_OK)
656		return status;
657
658	Sync();
659	opener.RemoveCache(true);
660	return B_OK;
661}
662
663
664/*!	Erase the first boot block, as we don't use it and there
665 *	might be leftovers from other file systems. This can cause
666 *	confusion for identifying the partition if not erased.
667 */
668status_t
669Volume::_EraseUnusedBootBlock()
670{
671	const int32 blockSize = 512;
672	const char emptySector[blockSize] = { 0 };
673	// Erase boot block if any
674	if (write_pos(fDevice, 0, emptySector, blockSize) != blockSize)
675		return B_IO_ERROR;
676	// Erase ext2 superblock if any
677	if (write_pos(fDevice, 1024, emptySector, blockSize) != blockSize)
678		return B_IO_ERROR;
679
680	return B_OK;
681}
682