1/* 2 * Copyright 2001-2008, Axel D��rfler, axeld@pinc-software.de. All Rights Reserved. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6//! superblock, mounting, etc. 7 8 9#include "Debug.h" 10#include "Volume.h" 11#include "Journal.h" 12#include "Inode.h" 13#include "Query.h" 14 15#include <util/kernel_cpp.h> 16#include <KernelExport.h> 17#include <Drivers.h> 18 19#include <stdlib.h> 20#include <stdio.h> 21#include <string.h> 22#include <ctype.h> 23 24 25static const int32 kDesiredAllocationGroups = 56; 26 // This is the number of allocation groups that will be tried 27 // to be given for newly initialized disks. 28 // That's only relevant for smaller disks, though, since any 29 // of today's disk sizes already reach the maximum length 30 // of an allocation group (65536 blocks). 31 // It seems to create appropriate numbers for smaller disks 32 // with this setting, though (i.e. you can create a 400 MB 33 // file on a 1 GB disk without the need for double indirect 34 // blocks). 35 36 37class DeviceOpener { 38 public: 39 DeviceOpener(const char *device, int mode); 40 ~DeviceOpener(); 41 42 int Open(const char *device, int mode); 43 status_t InitCache(off_t numBlocks); 44 void RemoveCache(int mode); 45 46 void Keep(); 47 48 int Device() const { return fDevice; } 49 50 status_t GetSize(off_t *_size, uint32 *_blockSize = NULL); 51 52 private: 53 int fDevice; 54 bool fCached; 55}; 56 57 58DeviceOpener::DeviceOpener(const char *device, int mode) 59 : 60 fCached(false) 61{ 62 Open(device, mode); 63} 64 65 66DeviceOpener::~DeviceOpener() 67{ 68 if (fDevice >= B_OK) { 69 close(fDevice); 70 if (fCached) 71 remove_cached_device_blocks(fDevice, NO_WRITES); 72 } 73} 74 75 76int 77DeviceOpener::Open(const char *device, int mode) 78{ 79 fDevice = open(device, mode); 80 return fDevice; 81} 82 83 84status_t 85DeviceOpener::InitCache(off_t numBlocks) 86{ 87 if (init_cache_for_device(fDevice, numBlocks) == B_OK) { 88 fCached = true; 89 return B_OK; 90 } 91 92 return B_ERROR; 93} 94 95 96void 97DeviceOpener::RemoveCache(int mode) 98{ 99 if (!fCached) 100 return; 101 102 remove_cached_device_blocks(fDevice, mode); 103 fCached = false; 104} 105 106 107void 108DeviceOpener::Keep() 109{ 110 fDevice = -1; 111} 112 113 114/** Returns the size of the device in bytes. It uses B_GET_GEOMETRY 115 * to compute the size, or fstat() if that failed. 116 */ 117 118status_t 119DeviceOpener::GetSize(off_t *_size, uint32 *_blockSize) 120{ 121 device_geometry geometry; 122 if (ioctl(fDevice, B_GET_GEOMETRY, &geometry) < 0) { 123 // maybe it's just a file 124 struct stat stat; 125 if (fstat(fDevice, &stat) < 0) 126 return B_ERROR; 127 128 if (_size) 129 *_size = stat.st_size; 130 if (_blockSize) // that shouldn't cause us any problems 131 *_blockSize = 512; 132 133 return B_OK; 134 } 135 136 if (_size) { 137 *_size = 1LL * geometry.head_count * geometry.cylinder_count 138 * geometry.sectors_per_track * geometry.bytes_per_sector; 139 } 140 if (_blockSize) 141 *_blockSize = geometry.bytes_per_sector; 142 143 return B_OK; 144} 145 146 147// #pragma mark - 148 149 150bool 151disk_super_block::IsValid() 152{ 153 if (Magic1() != (int32)SUPER_BLOCK_MAGIC1 154 || Magic2() != (int32)SUPER_BLOCK_MAGIC2 155 || Magic3() != (int32)SUPER_BLOCK_MAGIC3 156 || (int32)block_size != inode_size 157 || ByteOrder() != SUPER_BLOCK_FS_LENDIAN 158 || (1UL << BlockShift()) != BlockSize() 159 || AllocationGroups() < 1 160 || AllocationGroupShift() < 1 161 || BlocksPerAllocationGroup() < 1 162 || NumBlocks() < 10 163 || AllocationGroups() != divide_roundup(NumBlocks(), 164 1L << AllocationGroupShift())) 165 return false; 166 167 return true; 168} 169 170 171void 172disk_super_block::Initialize(const char *diskName, off_t numBlocks, uint32 blockSize) 173{ 174 memset(this, 0, sizeof(disk_super_block)); 175 176 magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1); 177 magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2); 178 magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3); 179 fs_byte_order = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_FS_LENDIAN); 180 flags = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_DISK_CLEAN); 181 182 strlcpy(name, diskName, sizeof(name)); 183 184 int32 blockShift = 9; 185 while ((1UL << blockShift) < blockSize) { 186 blockShift++; 187 } 188 189 block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize); 190 block_shift = HOST_ENDIAN_TO_BFS_INT32(blockShift); 191 192 num_blocks = HOST_ENDIAN_TO_BFS_INT64(numBlocks); 193 used_blocks = 0; 194 195 // Get the minimum ag_shift (that's determined by the block size) 196 197 int32 bitsPerBlock = blockSize << 3; 198 off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock; 199 int32 blocksPerGroup = 1; 200 int32 groupShift = 13; 201 202 for (int32 i = 8192; i < bitsPerBlock; i *= 2) { 203 groupShift++; 204 } 205 206 // Many allocation groups help applying allocation policies, but if 207 // they are too small, we will need to many block_runs to cover large 208 // files (see above to get an explanation of the kDesiredAllocationGroups 209 // constant). 210 211 int32 numGroups; 212 213 while (true) { 214 numGroups = (bitmapBlocks + blocksPerGroup - 1) / blocksPerGroup; 215 if (numGroups > kDesiredAllocationGroups) { 216 if (groupShift == 16) 217 break; 218 219 groupShift++; 220 blocksPerGroup *= 2; 221 } else 222 break; 223 } 224 225 num_ags = HOST_ENDIAN_TO_BFS_INT32(numGroups); 226 blocks_per_ag = HOST_ENDIAN_TO_BFS_INT32(blocksPerGroup); 227 ag_shift = HOST_ENDIAN_TO_BFS_INT32(groupShift); 228} 229 230 231// #pragma mark - 232 233 234Volume::Volume(dev_t id) 235 : 236 fID(id), 237 fBlockAllocator(this), 238 fLock("bfs volume"), 239 fRootNode(NULL), 240 fIndicesNode(NULL), 241 fDirtyCachedBlocks(0), 242 fUniqueID(0), 243 fFlags(0) 244{ 245} 246 247 248Volume::~Volume() 249{ 250} 251 252 253bool 254Volume::IsValidSuperBlock() 255{ 256 return fSuperBlock.IsValid(); 257} 258 259 260void 261Volume::Panic() 262{ 263 FATAL(("we have to panic... switch to read-only mode!\n")); 264 fFlags |= VOLUME_READ_ONLY; 265#ifdef USER 266 debugger("BFS panics!"); 267#elif defined(DEBUG) 268 kernel_debugger("BFS panics!"); 269#endif 270} 271 272 273status_t 274Volume::Mount(const char *deviceName, uint32 flags) 275{ 276 if (flags & B_MOUNT_READ_ONLY) 277 fFlags |= VOLUME_READ_ONLY; 278 279 // ToDo: validate the FS in write mode as well! 280#if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \ 281 || (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY)) 282 // in big endian mode, we only mount read-only for now 283 flags |= B_MOUNT_READ_ONLY; 284#endif 285 286 DeviceOpener opener(deviceName, flags & B_MOUNT_READ_ONLY ? O_RDONLY : O_RDWR); 287 288 // if we couldn't open the device, try read-only (don't rely on a specific error code) 289 if (opener.Device() < B_OK && (flags & B_MOUNT_READ_ONLY) == 0) { 290 opener.Open(deviceName, O_RDONLY); 291 fFlags |= VOLUME_READ_ONLY; 292 } 293 294 fDevice = opener.Device(); 295 if (fDevice < B_OK) 296 RETURN_ERROR(fDevice); 297 298 // check if it's a regular file, and if so, disable the cache for the 299 // underlaying file system 300 struct stat stat; 301 if (fstat(fDevice, &stat) < 0) 302 RETURN_ERROR(B_ERROR); 303 304#ifndef NO_FILE_UNCACHED_IO 305 if (stat.st_mode & S_FILE && ioctl(fDevice, IOCTL_FILE_UNCACHED_IO, NULL) < 0) { 306 // mount read-only if the cache couldn't be disabled 307# ifdef DEBUG 308 FATAL(("couldn't disable cache for image file - system may dead-lock!\n")); 309# else 310 FATAL(("couldn't disable cache for image file!\n")); 311 Panic(); 312# endif 313 } 314#endif 315 316 // read the superblock 317 if (Identify(fDevice, &fSuperBlock) != B_OK) { 318 FATAL(("invalid superblock!\n")); 319 return B_BAD_VALUE; 320 } 321 322 // initialize short hands to the superblock (to save byte swapping) 323 fBlockSize = fSuperBlock.BlockSize(); 324 fBlockShift = fSuperBlock.BlockShift(); 325 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 326 327 // check if the device size is large enough to hold the file system 328 off_t diskSize; 329 if (opener.GetSize(&diskSize) < B_OK) 330 RETURN_ERROR(B_ERROR); 331 if (diskSize < (NumBlocks() << BlockShift())) 332 RETURN_ERROR(B_BAD_VALUE); 333 334 // set the current log pointers, so that journaling will work correctly 335 fLogStart = fSuperBlock.LogStart(); 336 fLogEnd = fSuperBlock.LogEnd(); 337 338 if (opener.InitCache(NumBlocks()) != B_OK) 339 return B_ERROR; 340 341 fJournal = new Journal(this); 342 // replaying the log is the first thing we will do on this disk 343 if (fJournal && fJournal->InitCheck() < B_OK 344 || fBlockAllocator.Initialize() < B_OK) { 345 // ToDo: improve error reporting for a bad journal 346 FATAL(("could not initialize journal/block bitmap allocator!\n")); 347 return B_NO_MEMORY; 348 } 349 350 status_t status = B_OK; 351 352 fRootNode = new Inode(this, ToVnode(Root())); 353 if (fRootNode && fRootNode->InitCheck() == B_OK) { 354 status = new_vnode(fID, ToVnode(Root()), (void *)fRootNode); 355 if (status == B_OK) { 356 // try to get indices root dir 357 358 // question: why doesn't get_vnode() work here?? 359 // answer: we have not yet backpropagated the pointer to the 360 // volume in bfs_mount(), so bfs_read_vnode() can't get it. 361 // But it's not needed to do that anyway. 362 363 if (!Indices().IsZero()) 364 fIndicesNode = new Inode(this, ToVnode(Indices())); 365 366 if (fIndicesNode == NULL 367 || fIndicesNode->InitCheck() < B_OK 368 || !fIndicesNode->IsContainer()) { 369 INFORM(("bfs: volume doesn't have indices!\n")); 370 371 if (fIndicesNode) { 372 // if this is the case, the index root node is gone bad, and 373 // BFS switch to read-only mode 374 fFlags |= VOLUME_READ_ONLY; 375 delete fIndicesNode; 376 fIndicesNode = NULL; 377 } 378 } 379 380 // all went fine 381 opener.Keep(); 382 return B_OK; 383 } else 384 FATAL(("could not create root node: new_vnode() failed!\n")); 385 386 delete fRootNode; 387 } else { 388 status = B_BAD_VALUE; 389 FATAL(("could not create root node!\n")); 390 } 391 392 return status; 393} 394 395 396status_t 397Volume::Unmount() 398{ 399 // This will also flush the log & all blocks to disk 400 delete fJournal; 401 fJournal = NULL; 402 403 delete fIndicesNode; 404 405 remove_cached_device_blocks(fDevice, IsReadOnly() ? NO_WRITES : ALLOW_WRITES); 406 close(fDevice); 407 408 return B_OK; 409} 410 411 412status_t 413Volume::Sync() 414{ 415 return fJournal->FlushLogAndBlocks(); 416} 417 418 419status_t 420Volume::ValidateBlockRun(block_run run) 421{ 422 if (run.AllocationGroup() < 0 || run.AllocationGroup() > (int32)AllocationGroups() 423 || run.Start() > (1UL << AllocationGroupShift()) 424 || run.length == 0 425 || uint32(run.Length() + run.Start()) > (1UL << AllocationGroupShift())) { 426 Panic(); 427 FATAL(("*** invalid run(%ld,%d,%d)\n", run.AllocationGroup(), run.Start(), run.Length())); 428 return B_BAD_DATA; 429 } 430 return B_OK; 431} 432 433 434block_run 435Volume::ToBlockRun(off_t block) const 436{ 437 block_run run; 438 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(block >> AllocationGroupShift()); 439 run.start = HOST_ENDIAN_TO_BFS_INT16(block & ((1LL << AllocationGroupShift()) - 1)); 440 run.length = HOST_ENDIAN_TO_BFS_INT16(1); 441 return run; 442} 443 444 445status_t 446Volume::CreateIndicesRoot(Transaction *transaction) 447{ 448 off_t id; 449 status_t status = Inode::Create(transaction, NULL, NULL, 450 S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, &id, &fIndicesNode); 451 if (status < B_OK) 452 RETURN_ERROR(status); 453 454 fSuperBlock.indices = ToBlockRun(id); 455 return WriteSuperBlock(); 456} 457 458 459status_t 460Volume::AllocateForInode(Transaction *transaction, const Inode *parent, mode_t type, block_run &run) 461{ 462 return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(), type, run); 463} 464 465 466status_t 467Volume::WriteSuperBlock() 468{ 469 if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block)) != sizeof(disk_super_block)) 470 return B_IO_ERROR; 471 472 return B_OK; 473} 474 475 476void 477Volume::UpdateLiveQueries(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey, 478 size_t oldLength, const uint8 *newKey, size_t newLength) 479{ 480 if (fQueryLock.Lock() < B_OK) 481 return; 482 483 Query *query = NULL; 484 while ((query = fQueries.Next(query)) != NULL) 485 query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey, newLength); 486 487 fQueryLock.Unlock(); 488} 489 490 491/** Checks if there is a live query whose results depend on the presence 492 * or value of the specified attribute. 493 * Don't use it if you already have all the data together to evaluate 494 * the queries - it wouldn't safe you anything in this case. 495 */ 496 497bool 498Volume::CheckForLiveQuery(const char *attribute) 499{ 500 // ToDo: check for a live query that depends on the specified attribute 501 return true; 502} 503 504 505void 506Volume::AddQuery(Query *query) 507{ 508 if (fQueryLock.Lock() < B_OK) 509 return; 510 511 fQueries.Add(query); 512 513 fQueryLock.Unlock(); 514} 515 516 517void 518Volume::RemoveQuery(Query *query) 519{ 520 if (fQueryLock.Lock() < B_OK) 521 return; 522 523 fQueries.Remove(query); 524 525 fQueryLock.Unlock(); 526} 527 528 529// #pragma mark - 530// Disk scanning and initialization 531 532 533status_t 534Volume::Identify(int fd, disk_super_block *superBlock) 535{ 536 char buffer[1024]; 537 if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer)) 538 return B_IO_ERROR; 539 540 // Note: that does work only for x86, for PowerPC, the superblock 541 // may be located at offset 0! 542 memcpy(superBlock, buffer + 512, sizeof(disk_super_block)); 543 if (!superBlock->IsValid()) { 544#ifndef BFS_LITTLE_ENDIAN_ONLY 545 memcpy(superBlock, buffer, sizeof(disk_super_block)); 546 if (!superBlock->IsValid()) 547 return B_BAD_VALUE; 548#else 549 return B_BAD_VALUE; 550#endif 551 } 552 553 return B_OK; 554} 555 556 557#ifdef USER 558extern "C" void kill_device_vnodes(dev_t id); 559 // This call is only available in the userland fs_shell 560 561status_t 562Volume::Initialize(const char *device, const char *name, uint32 blockSize, uint32 flags) 563{ 564 // although there is no really good reason for it, we won't 565 // accept '/' in disk names (mkbfs does this, too - and since 566 // Tracker names mounted volumes like their name) 567 if (strchr(name, '/') != NULL) 568 return B_BAD_VALUE; 569 570 if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096 && blockSize != 8192) 571 return B_BAD_VALUE; 572 573 DeviceOpener opener(device, O_RDWR); 574 if (opener.Device() < B_OK) 575 return B_BAD_VALUE; 576 577 fDevice = opener.Device(); 578 579 uint32 deviceBlockSize; 580 off_t deviceSize; 581 if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK) 582 return B_ERROR; 583 584 off_t numBlocks = deviceSize / blockSize; 585 586 // create valid superblock 587 588 fSuperBlock.Initialize(name, numBlocks, blockSize); 589 590 // initialize short hands to the superblock (to save byte swapping) 591 fBlockSize = fSuperBlock.BlockSize(); 592 fBlockShift = fSuperBlock.BlockShift(); 593 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 594 595 // since the allocator has not been initialized yet, we 596 // cannot use BlockAllocator::BitmapSize() here 597 fSuperBlock.log_blocks = ToBlockRun(AllocationGroups() 598 * fSuperBlock.BlocksPerAllocationGroup() + 1); 599 fSuperBlock.log_blocks.length = HOST_ENDIAN_TO_BFS_INT16(2048); 600 // ToDo: set the log size depending on the disk size 601 fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(ToBlock(Log())); 602 603 // set the current log pointers, so that journaling will work correctly 604 fLogStart = fSuperBlock.LogStart(); 605 fLogEnd = fSuperBlock.LogEnd(); 606 607 if (!IsValidSuperBlock()) 608 RETURN_ERROR(B_ERROR); 609 610 if (opener.InitCache(numBlocks) != B_OK) 611 return B_ERROR; 612 613 fJournal = new Journal(this); 614 if (fJournal == NULL || fJournal->InitCheck() < B_OK) 615 RETURN_ERROR(B_ERROR); 616 617 // ready to write data to disk 618 619 Transaction transaction(this, 0); 620 621 if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK) 622 RETURN_ERROR(B_ERROR); 623 624 off_t id; 625 status_t status = Inode::Create(&transaction, NULL, NULL, 626 S_DIRECTORY | 0755, 0, 0, &id, &fRootNode); 627 if (status < B_OK) 628 RETURN_ERROR(status); 629 630 fSuperBlock.root_dir = ToBlockRun(id); 631 632 if ((flags & VOLUME_NO_INDICES) == 0) { 633 // The indices root directory will be created automatically 634 // when the standard indices are created (or any other). 635 Index index(this); 636 status = index.Create(&transaction, "name", B_STRING_TYPE); 637 if (status < B_OK) 638 return status; 639 640 status = index.Create(&transaction, "last_modified", B_INT64_TYPE); 641 if (status < B_OK) 642 return status; 643 644 status = index.Create(&transaction, "size", B_INT64_TYPE); 645 if (status < B_OK) 646 return status; 647 } 648 649 WriteSuperBlock(); 650 transaction.Done(); 651 652 put_vnode(ID(), fRootNode->ID()); 653 if (fIndicesNode != NULL) 654 put_vnode(ID(), fIndicesNode->ID()); 655 656 kill_device_vnodes(ID()); 657 // This call is only available in the userland fs_shell 658 659 Sync(); 660 opener.RemoveCache(ALLOW_WRITES); 661 return B_OK; 662} 663#endif 664