/* * Copyright 2001-2008, Axel Dörfler, axeld@pinc-software.de * This file may be used under the terms of the MIT License. */ //! inode access functions #include "Debug.h" #include "Inode.h" #include "BPlusTree.h" #include "Stream.h" #include "Index.h" #include #include #include class InodeAllocator { public: InodeAllocator(Transaction *transaction); ~InodeAllocator(); status_t New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode); status_t CreateTree(); status_t Keep(); private: Transaction *fTransaction; block_run fRun; Inode *fInode; }; InodeAllocator::InodeAllocator(Transaction *transaction) : fTransaction(transaction), fInode(NULL) { } InodeAllocator::~InodeAllocator() { if (fTransaction != NULL) { if (fInode != NULL) { fInode->Node()->flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE | INODE_NOT_READY); // this unblocks any pending bfs_read_vnode() calls fInode->Free(fTransaction); } else fTransaction->GetVolume()->Free(fTransaction, fRun); } delete fInode; } status_t InodeAllocator::New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode) { Volume *volume = fTransaction->GetVolume(); status_t status = volume->AllocateForInode(fTransaction, parentRun, mode, fRun); if (status < B_OK) { // don't free the space in the destructor, because // the allocation failed fTransaction = NULL; RETURN_ERROR(status); } run = fRun; fInode = new Inode(volume, volume->ToVnode(run), true); if (fInode == NULL) RETURN_ERROR(B_NO_MEMORY); // initialize the on-disk bfs_inode structure bfs_inode *node = fInode->Node(); node->magic1 = HOST_ENDIAN_TO_BFS_INT32(INODE_MAGIC1); node->inode_num = run; node->mode = HOST_ENDIAN_TO_BFS_INT32(mode); node->flags = HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE | INODE_NOT_READY); // INODE_NOT_READY prevents the inode from being opened - it is // cleared in InodeAllocator::Keep() node->etc = (uint32)fInode; // this is temporarily set along INODE_NOT_READY and lets bfs_read_vnode() // find the associated Inode object node->create_time = HOST_ENDIAN_TO_BFS_INT64((bigtime_t)time(NULL) << INODE_TIME_SHIFT); node->last_modified_time = HOST_ENDIAN_TO_BFS_INT64(node->create_time | (volume->GetUniqueID() & INODE_TIME_MASK)); // we use Volume::GetUniqueID() to avoid having too many duplicates in the // last_modified index node->inode_size = HOST_ENDIAN_TO_BFS_INT32(volume->InodeSize()); *_inode = fInode; return B_OK; } status_t InodeAllocator::CreateTree() { Volume *volume = fTransaction->GetVolume(); // force S_STR_INDEX to be set, if no type is set if ((fInode->Mode() & S_INDEX_TYPES) == 0) fInode->Node()->mode |= HOST_ENDIAN_TO_BFS_INT32(S_STR_INDEX); BPlusTree *tree = fInode->fTree = new BPlusTree(fTransaction, fInode); if (tree == NULL || tree->InitCheck() < B_OK) return B_ERROR; if (fInode->IsRegularNode()) { if (tree->Insert(fTransaction, ".", fInode->ID()) < B_OK || tree->Insert(fTransaction, "..", volume->ToVnode(fInode->Parent())) < B_OK) return B_ERROR; } return B_OK; } status_t InodeAllocator::Keep() { ASSERT(fInode != NULL && fTransaction != NULL); fInode->Node()->flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_NOT_READY); status_t status = fInode->WriteBack(fTransaction); fTransaction = NULL; fInode = NULL; return status; } // #pragma mark - status_t bfs_inode::InitCheck(Volume *volume) { if (Flags() & INODE_NOT_READY) { // the other fields may not yet contain valid values return B_BUSY; } if (Flags() & INODE_DELETED) return B_NOT_ALLOWED; if (Magic1() != INODE_MAGIC1 || !(Flags() & INODE_IN_USE) || inode_num.Length() != 1 // matches inode size? || (uint32)InodeSize() != volume->InodeSize() // parent resides on disk? || parent.AllocationGroup() > int32(volume->AllocationGroups()) || parent.AllocationGroup() < 0 || parent.Start() > (1L << volume->AllocationGroupShift()) || parent.Length() != 1 // attributes, too? || attributes.AllocationGroup() > int32(volume->AllocationGroups()) || attributes.AllocationGroup() < 0 || attributes.Start() > (1L << volume->AllocationGroupShift())) RETURN_ERROR(B_BAD_DATA); // ToDo: Add some tests to check the integrity of the other stuff here, // especially for the data_stream! return B_OK; } // #pragma mark - Inode::Inode(Volume *volume, vnode_id id, bool empty, uint8 reenter) : CachedBlock(volume, volume->VnodeToBlock(id), empty), fTree(NULL), fLock() { PRINT(("Inode::Inode(%p, %Ld, %s, %s) @ %p\n", volume, id, empty ? "empty" : "not-empty", reenter ? "reenter":"not-reenter", this)); Initialize(); } Inode::Inode(CachedBlock *cached) : CachedBlock(cached), fTree(NULL), fLock() { PRINT(("Inode::Inode(%p) @ %p\n", cached, this)); Initialize(); } Inode::~Inode() { PRINT(("Inode::~Inode() @ %p\n", this)); delete fTree; } void Inode::Initialize() { char lockName[32]; sprintf(lockName, "bfs inode %ld.%d", BlockRun().AllocationGroup(), BlockRun().Start()); fLock.Initialize(lockName); Node()->flags &= HOST_ENDIAN_TO_BFS_INT32(INODE_PERMANENT_FLAGS); // these two will help to maintain the indices fOldSize = Size(); fOldLastModified = LastModified(); if (IsContainer()) fTree = new BPlusTree(this); fCache = NULL; } status_t Inode::InitCheck(bool checkNode) { if (!Node()) RETURN_ERROR(B_IO_ERROR); // test inode magic and flags if (checkNode) { status_t status = Node()->InitCheck(fVolume); if (status == B_BUSY) return B_BUSY; if (status < B_OK) { FATAL(("inode at block %Ld corrupt!\n", fBlockNumber)); RETURN_ERROR(B_BAD_DATA); } } if (IsContainer()) { // inodes that have a if (fTree == NULL) RETURN_ERROR(B_NO_MEMORY); status_t status = fTree->InitCheck(); if (status < B_OK) { FATAL(("inode tree at block %Ld corrupt!\n", fBlockNumber)); RETURN_ERROR(B_BAD_DATA); } } // it's more important to know that the inode is corrupt // so we check for the lock not until here return fLock.InitCheck(); } status_t Inode::CheckPermissions(int accessMode) const { uid_t user = geteuid(); gid_t group = getegid(); // you never have write access to a read-only volume if (accessMode & W_OK && fVolume->IsReadOnly()) return B_READ_ONLY_DEVICE; // root users always have full access (but they can't execute anything) if (user == 0 && !((accessMode & X_OK) && (Mode() & S_IXUSR) == 0)) return B_OK; // shift mode bits, to check directly against accessMode mode_t mode = Mode(); if (user == (uid_t)Node()->UserID()) mode >>= 6; else if (group == (gid_t)Node()->GroupID()) mode >>= 3; if (accessMode & ~(mode & S_IRWXO)) return B_NOT_ALLOWED; return B_OK; } // #pragma mark - void Inode::AddIterator(AttributeIterator *iterator) { if (fSmallDataLock.Lock() < B_OK) return; fIterators.Add(iterator); fSmallDataLock.Unlock(); } void Inode::RemoveIterator(AttributeIterator *iterator) { if (fSmallDataLock.Lock() < B_OK) return; fIterators.Remove(iterator); fSmallDataLock.Unlock(); } /** Tries to free up "bytes" space in the small_data section by moving * attributes to real files. Used for system attributes like the name. * You need to hold the fSmallDataLock when you call this method */ status_t Inode::MakeSpaceForSmallData(Transaction *transaction, const char *name, int32 bytes) { ASSERT(fSmallDataLock.IsLocked()); while (bytes > 0) { small_data *item = Node()->SmallDataStart(), *max = NULL; int32 index = 0, maxIndex = 0; for (; !item->IsLast(Node()); item = item->Next(), index++) { // should not remove those if (*item->Name() == FILE_NAME_NAME || !strcmp(name, item->Name())) continue; if (max == NULL || max->Size() < item->Size()) { maxIndex = index; max = item; } // remove the first one large enough to free the needed amount of bytes if (bytes < (int32)item->Size()) break; } if (item->IsLast(Node()) || (int32)item->Size() < bytes) return B_ERROR; bytes -= max->Size(); // Move the attribute to a real attribute file // Luckily, this doesn't cause any index updates Inode *attribute; status_t status = CreateAttribute(transaction, item->Name(), item->Type(), &attribute); if (status < B_OK) RETURN_ERROR(status); size_t length = item->DataSize(); status = attribute->WriteAt(transaction, 0, item->Data(), &length); ReleaseAttribute(attribute); if (status < B_OK) { Vnode vnode(fVolume,Attributes()); Inode *attributes; if (vnode.Get(&attributes) < B_OK || attributes->Remove(transaction, name) < B_OK) { FATAL(("Could not remove newly created attribute!\n")); } RETURN_ERROR(status); } RemoveSmallData(max, maxIndex); } return B_OK; } /** Private function which removes the given attribute from the small_data * section. * You need to hold the fSmallDataLock when you call this method */ status_t Inode::RemoveSmallData(small_data *item, int32 index) { ASSERT(fSmallDataLock.IsLocked()); small_data *next = item->Next(); if (!next->IsLast(Node())) { // find the last attribute small_data *last = next; while (!last->IsLast(Node())) last = last->Next(); int32 size = (uint8 *)last - (uint8 *)next; if (size < 0 || size > (uint8 *)Node() + fVolume->BlockSize() - (uint8 *)next) return B_BAD_DATA; memmove(item, next, size); // Move the "last" one to its new location and // correctly terminate the small_data section last = (small_data *)((uint8 *)last - ((uint8 *)next - (uint8 *)item)); memset(last, 0, (uint8 *)Node() + fVolume->BlockSize() - (uint8 *)last); } else memset(item, 0, item->Size()); // update all current iterators AttributeIterator *iterator = NULL; while ((iterator = fIterators.Next(iterator)) != NULL) iterator->Update(index, -1); return B_OK; } /** Removes the given attribute from the small_data section. * Note that you need to write back the inode yourself after having called * that method. */ status_t Inode::RemoveSmallData(Transaction *transaction, const char *name) { if (name == NULL) return B_BAD_VALUE; SimpleLocker locker(fSmallDataLock); // search for the small_data item small_data *item = Node()->SmallDataStart(); int32 index = 0; while (!item->IsLast(Node()) && strcmp(item->Name(), name)) { item = item->Next(); index++; } if (item->IsLast(Node())) return B_ENTRY_NOT_FOUND; return RemoveSmallData(item, index); } /** Try to place the given attribute in the small_data section - if the * new attribute is too big to fit in that section, it returns B_DEVICE_FULL. * In that case, the attribute should be written to a real attribute file; * if the attribute was already part of the small_data section, but the new * one wouldn't fit, the old one is automatically removed from the small_data * section. * Note that you need to write back the inode yourself after having called that * method - it's a bad API decision that it needs a transaction but enforces you * to write back the inode all by yourself, but it's just more efficient in most * cases... */ status_t Inode::AddSmallData(Transaction *transaction, const char *name, uint32 type, const uint8 *data, size_t length, bool force) { if (name == NULL || data == NULL || type == 0) return B_BAD_VALUE; // reject any requests that can't fit into the small_data section uint32 nameLength = strlen(name); uint32 spaceNeeded = sizeof(small_data) + nameLength + 3 + length + 1; if (spaceNeeded > fVolume->InodeSize() - sizeof(bfs_inode)) return B_DEVICE_FULL; SimpleLocker locker(fSmallDataLock); small_data *item = Node()->SmallDataStart(); int32 index = 0; while (!item->IsLast(Node()) && strcmp(item->Name(), name)) { item = item->Next(); index++; } // is the attribute already in the small_data section? // then just replace the data part of that one if (!item->IsLast(Node())) { // find last attribute small_data *last = item; while (!last->IsLast(Node())) last = last->Next(); // try to change the attributes value if (item->data_size > length || force || ((uint8 *)last + length - item->DataSize()) <= ((uint8 *)Node() + fVolume->InodeSize())) { // make room for the new attribute if needed (and we are forced to do so) if (force && ((uint8 *)last + length - item->DataSize()) > ((uint8 *)Node() + fVolume->InodeSize())) { // We also take the free space at the end of the small_data section // into account, and request only what's really needed uint32 needed = length - item->DataSize() - (uint32)((uint8 *)Node() + fVolume->InodeSize() - (uint8 *)last); if (MakeSpaceForSmallData(transaction, name, needed) < B_OK) return B_ERROR; // reset our pointers item = Node()->SmallDataStart(); index = 0; while (!item->IsLast(Node()) && strcmp(item->Name(), name)) { item = item->Next(); index++; } last = item; while (!last->IsLast(Node())) last = last->Next(); } // Normally, we can just overwrite the attribute data as the size // is specified by the type and does not change that often if (length != item->DataSize()) { // move the attributes after the current one small_data *next = item->Next(); if (!next->IsLast(Node())) memmove((uint8 *)item + spaceNeeded, next, (uint8 *)last - (uint8 *)next); // Move the "last" one to its new location and // correctly terminate the small_data section last = (small_data *)((uint8 *)last - ((uint8 *)next - ((uint8 *)item + spaceNeeded))); if ((uint8 *)last < (uint8 *)Node() + fVolume->BlockSize()) memset(last, 0, (uint8 *)Node() + fVolume->BlockSize() - (uint8 *)last); item->data_size = HOST_ENDIAN_TO_BFS_INT16(length); } item->type = HOST_ENDIAN_TO_BFS_INT32(type); memcpy(item->Data(), data, length); item->Data()[length] = '\0'; return B_OK; } // Could not replace the old attribute, so remove it to let // let the calling function create an attribute file for it if (RemoveSmallData(item, index) < B_OK) return B_ERROR; return B_DEVICE_FULL; } // try to add the new attribute! if ((uint8 *)item + spaceNeeded > (uint8 *)Node() + fVolume->InodeSize()) { // there is not enough space for it! if (!force) return B_DEVICE_FULL; // make room for the new attribute if (MakeSpaceForSmallData(transaction, name, spaceNeeded) < B_OK) return B_ERROR; // get new last item! item = Node()->SmallDataStart(); index = 0; while (!item->IsLast(Node())) { item = item->Next(); index++; } } memset(item, 0, spaceNeeded); item->type = HOST_ENDIAN_TO_BFS_INT32(type); item->name_size = HOST_ENDIAN_TO_BFS_INT16(nameLength); item->data_size = HOST_ENDIAN_TO_BFS_INT16(length); strcpy(item->Name(), name); memcpy(item->Data(), data, length); // correctly terminate the small_data section item = item->Next(); if (!item->IsLast(Node())) memset(item, 0, (uint8 *)Node() + fVolume->InodeSize() - (uint8 *)item); // update all current iterators AttributeIterator *iterator = NULL; while ((iterator = fIterators.Next(iterator)) != NULL) iterator->Update(index, 1); return B_OK; } /** Iterates through the small_data section of an inode. * To start at the beginning of this section, you let smallData * point to NULL, like: * small_data *data = NULL; * while (inode->GetNextSmallData(&data) { ... } * * This function is reentrant and doesn't allocate any memory; * you can safely stop calling it at any point (you don't need * to iterate through the whole list). * You need to hold the fSmallDataLock when you call this method */ status_t Inode::GetNextSmallData(small_data **_smallData) const { if (!Node()) RETURN_ERROR(B_ERROR); ASSERT(fSmallDataLock.IsLocked()); small_data *data = *_smallData; // begin from the start? if (data == NULL) data = Node()->SmallDataStart(); else data = data->Next(); // is already last item? if (data->IsLast(Node())) return B_ENTRY_NOT_FOUND; *_smallData = data; return B_OK; } /** Finds the attribute "name" in the small data section, and * returns a pointer to it (or NULL if it doesn't exist). * You need to hold the fSmallDataLock when you call this method */ small_data * Inode::FindSmallData(const char *name) const { ASSERT(fSmallDataLock.IsLocked()); small_data *smallData = NULL; while (GetNextSmallData(&smallData) == B_OK) { if (!strcmp(smallData->Name(), name)) return smallData; } return NULL; } /** Returns a pointer to the node's name if present in the small data * section, NULL otherwise. * You need to hold the fSmallDataLock when you call this method */ const char * Inode::Name() const { ASSERT(fSmallDataLock.IsLocked()); small_data *smallData = NULL; while (GetNextSmallData(&smallData) == B_OK) { if (*smallData->Name() == FILE_NAME_NAME && smallData->NameSize() == FILE_NAME_NAME_LENGTH) return (const char *)smallData->Data(); } return NULL; } /** Copies the node's name into the provided buffer. * The buffer must be B_FILE_NAME_LENGTH bytes large. */ status_t Inode::GetName(char *buffer) const { SimpleLocker locker(fSmallDataLock); const char *name = Name(); if (name == NULL) return B_ENTRY_NOT_FOUND; strlcpy(buffer, name, B_FILE_NAME_LENGTH); return B_OK; } /** Changes or set the name of a file: in the inode small_data section only, it * doesn't change it in the parent directory's b+tree. * Note that you need to write back the inode yourself after having called * that method. It suffers from the same API decision as AddSmallData() does * (and for the same reason). */ status_t Inode::SetName(Transaction *transaction, const char *name) { if (name == NULL || *name == '\0') return B_BAD_VALUE; const char nameTag[2] = {FILE_NAME_NAME, 0}; return AddSmallData(transaction, nameTag, FILE_NAME_TYPE, (uint8 *)name, strlen(name), true); } /** Reads data from the specified attribute. * This is a high-level attribute function that understands attributes * in the small_data section as well as real attribute files. */ status_t Inode::ReadAttribute(const char *name, int32 type, off_t pos, uint8 *buffer, size_t *_length) { if (pos < 0) pos = 0; // search in the small_data section (which has to be locked first) { SimpleLocker locker(fSmallDataLock); small_data *smallData = FindSmallData(name); if (smallData != NULL) { size_t length = *_length; if (pos >= smallData->data_size) { *_length = 0; return B_OK; } if (length + pos > smallData->DataSize()) length = smallData->DataSize() - pos; memcpy(buffer, smallData->Data() + pos, length); *_length = length; return B_OK; } } // search in the attribute directory Inode *attribute; status_t status = GetAttribute(name, &attribute); if (status == B_OK) { if (attribute->Lock().Lock() == B_OK) { status = attribute->ReadAt(pos, (uint8 *)buffer, _length); attribute->Lock().Unlock(); } else status = B_ERROR; ReleaseAttribute(attribute); } RETURN_ERROR(status); } /** Writes data to the specified attribute. * This is a high-level attribute function that understands attributes * in the small_data section as well as real attribute files. */ status_t Inode::WriteAttribute(Transaction *transaction, const char *name, int32 type, off_t pos, const uint8 *buffer, size_t *_length) { // needed to maintain the index uint8 oldBuffer[BPLUSTREE_MAX_KEY_LENGTH], *oldData = NULL; size_t oldLength = 0; // ToDo: we actually depend on that the contents of "buffer" are constant. // If they get changed during the write (hey, user programs), we may mess // up our index trees! Index index(fVolume); index.SetTo(name); Inode *attribute = NULL; status_t status = B_OK; if (GetAttribute(name, &attribute) < B_OK) { // save the old attribute data fSmallDataLock.Lock(); small_data *smallData = FindSmallData(name); if (smallData != NULL) { oldLength = smallData->DataSize(); if (oldLength > 0) { if (oldLength > BPLUSTREE_MAX_KEY_LENGTH) oldLength = BPLUSTREE_MAX_KEY_LENGTH; memcpy(oldData = oldBuffer, smallData->Data(), oldLength); } } fSmallDataLock.Unlock(); // if the attribute doesn't exist yet (as a file), try to put it in the // small_data section first - if that fails (due to insufficent space), // create a real attribute file status = AddSmallData(transaction, name, type, buffer, *_length); if (status == B_DEVICE_FULL) { status = CreateAttribute(transaction, name, type, &attribute); if (status < B_OK) RETURN_ERROR(status); } else if (status == B_OK) status = WriteBack(transaction); } if (attribute != NULL) { if (attribute->Lock().LockWrite() == B_OK) { // save the old attribute data (if this fails, oldLength will reflect it) if (fVolume->CheckForLiveQuery(name) && attribute->Size() > 0) { oldLength = BPLUSTREE_MAX_KEY_LENGTH; if (attribute->ReadAt(0, oldBuffer, &oldLength) == B_OK) oldData = oldBuffer; } // ToDo: check if the data fits in the inode now and delete the attribute file if so status = attribute->WriteAt(transaction, pos, buffer, _length); if (status == B_OK) { // The attribute type might have been changed - we need to adopt // the new one attribute->Node()->type = HOST_ENDIAN_TO_BFS_INT32(type); status = attribute->WriteBack(transaction); } attribute->Lock().UnlockWrite(); } else status = B_ERROR; ReleaseAttribute(attribute); } // ToDo: find a better way than this "pos" thing (the begin of the old key // must be copied to the start of the new one for a comparison) if (status == B_OK && pos == 0) { // index only the first BPLUSTREE_MAX_KEY_LENGTH bytes uint16 length = *_length; if (length > BPLUSTREE_MAX_KEY_LENGTH) length = BPLUSTREE_MAX_KEY_LENGTH; // Update index. Note, Index::Update() may be called even if initializing // the index failed - it will just update the live queries in this case if (pos < length || pos < oldLength) index.Update(transaction, name, type, oldData, oldLength, buffer, length, this); } return status; } /** Removes the specified attribute from the inode. * This is a high-level attribute function that understands attributes * in the small_data section as well as real attribute files. */ status_t Inode::RemoveAttribute(Transaction *transaction, const char *name) { Index index(fVolume); bool hasIndex = index.SetTo(name) == B_OK; // update index for attributes in the small_data section { fSmallDataLock.Lock(); small_data *smallData = FindSmallData(name); if (smallData != NULL) { uint32 length = smallData->DataSize(); if (length > BPLUSTREE_MAX_KEY_LENGTH) length = BPLUSTREE_MAX_KEY_LENGTH; index.Update(transaction, name, smallData->Type(), smallData->Data(), length, NULL, 0, this); } fSmallDataLock.Unlock(); } status_t status = RemoveSmallData(transaction, name); if (status == B_OK) { status = WriteBack(transaction); } else if (status == B_ENTRY_NOT_FOUND && !Attributes().IsZero()) { // remove the attribute file if it exists Vnode vnode(fVolume, Attributes()); Inode *attributes; if ((status = vnode.Get(&attributes)) < B_OK) return status; // update index Inode *attribute; if ((hasIndex || fVolume->CheckForLiveQuery(name)) && GetAttribute(name, &attribute) == B_OK) { uint8 data[BPLUSTREE_MAX_KEY_LENGTH]; size_t length = BPLUSTREE_MAX_KEY_LENGTH; if (attribute->ReadAt(0, data, &length) == B_OK) index.Update(transaction, name, attribute->Type(), data, length, NULL, 0, this); ReleaseAttribute(attribute); } if ((status = attributes->Remove(transaction, name)) < B_OK) return status; if (attributes->IsEmpty()) { // remove attribute directory (don't fail if that can't be done) if (remove_vnode(fVolume->ID(), attributes->ID()) == B_OK) { // update the inode, so that no one will ever doubt it's deleted :-) attributes->Node()->flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED); if (attributes->WriteBack(transaction) == B_OK) { Attributes().SetTo(0, 0, 0); WriteBack(transaction); } else unremove_vnode(fVolume->ID(), attributes->ID()); } } } return status; } status_t Inode::GetAttribute(const char *name, Inode **_attribute) { // does this inode even have attributes? if (Attributes().IsZero()) return B_ENTRY_NOT_FOUND; Vnode vnode(fVolume, Attributes()); Inode *attributes; if (vnode.Get(&attributes) < B_OK) { FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n", name)); return B_ERROR; } BPlusTree *tree; status_t status = attributes->GetTree(&tree); if (status == B_OK) { vnode_id id; if ((status = tree->Find((uint8 *)name, (uint16)strlen(name), &id)) == B_OK) { Vnode vnode(fVolume, id); Inode *inode; // Check if the attribute is really an attribute if (vnode.Get(&inode) < B_OK || !inode->IsAttribute()) return B_ERROR; *_attribute = inode; vnode.Keep(); return B_OK; } } return status; } void Inode::ReleaseAttribute(Inode *attribute) { if (attribute == NULL) return; put_vnode(fVolume->ID(), attribute->ID()); } status_t Inode::CreateAttribute(Transaction *transaction, const char *name, uint32 type, Inode **attribute) { // do we need to create the attribute directory first? if (Attributes().IsZero()) { status_t status = Inode::Create(transaction, this, NULL, S_ATTR_DIR | S_DIRECTORY | 0666, 0, 0, NULL); if (status < B_OK) RETURN_ERROR(status); } Vnode vnode(fVolume, Attributes()); Inode *attributes; if (vnode.Get(&attributes) < B_OK) return B_ERROR; // Inode::Create() locks the inode for us return Inode::Create(transaction, attributes, name, S_ATTR | S_FILE | 0666, 0, type, NULL, attribute); } // #pragma mark - /** Gives the caller direct access to the b+tree for a given directory. * The tree is no longer created on demand, but when the inode is first * created. That will report any potential errors upfront, saves locking, * and should work as good (though a bit slower). */ status_t Inode::GetTree(BPlusTree **tree) { if (fTree) { *tree = fTree; return B_OK; } RETURN_ERROR(B_BAD_VALUE); } bool Inode::IsEmpty() { BPlusTree *tree; status_t status = GetTree(&tree); if (status < B_OK) return status; TreeIterator iterator(tree); // index and attribute directories are really empty when they are // empty - directories for standard files always contain ".", and // "..", so we need to ignore those two uint32 count = 0; char name[BPLUSTREE_MAX_KEY_LENGTH]; uint16 length; vnode_id id; while (iterator.GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id) == B_OK) { if (Mode() & (S_ATTR_DIR | S_INDEX_DIR)) return false; if (++count > 2 || strcmp(".", name) && strcmp("..", name)) return false; } return true; } /** Finds the block_run where "pos" is located in the data_stream of * the inode. * If successful, "offset" will then be set to the file offset * of the block_run returned; so "pos - offset" is for the block_run * what "pos" is for the whole stream. * The caller has to make sure that "pos" is inside the stream. */ status_t Inode::FindBlockRun(off_t pos, block_run &run, off_t &offset) { // The BPlusTree class will call this function, we'll provide // standard cached access only from here return ((Stream *)this)->FindBlockRun(pos, run, offset); } status_t Inode::ReadAt(off_t pos, uint8 *buffer, size_t *_length) { // call the right ReadAt() method, depending on the inode flags if (Flags() & INODE_NO_CACHE) return ((Stream *)this)->ReadAt(pos, buffer, _length); if (Flags() & INODE_LOGGED) return ((Stream *)this)->ReadAt(pos, buffer, _length); return ((Stream *)this)->ReadAt(pos, buffer, _length); } status_t Inode::WriteAt(Transaction *transaction, off_t pos, const uint8 *buffer, size_t *_length) { // call the right WriteAt() method, depending on the inode flags // update the last modification time in memory, it will be written // back to the inode, and the index when the file is closed // ToDo: should update the internal last modified time only at this point! Node()->last_modified_time = (bigtime_t)time(NULL) << INODE_TIME_SHIFT; if (Flags() & INODE_NO_CACHE) return ((Stream *)this)->WriteAt(transaction, pos, buffer, _length); if (Flags() & INODE_LOGGED) return ((Stream *)this)->WriteAt(transaction, pos, buffer, _length); return ((Stream *)this)->WriteAt(transaction, pos, buffer, _length); } /** Fills the gap between the old file size and the new file size * with zeros. * It's more or less a copy of Inode::WriteAt() but it can handle * length differences of more than just 4 GB, and it never uses * the log, even if the INODE_LOGGED flag is set. */ status_t Inode::FillGapWithZeros(off_t pos, off_t newSize) { // ToDo: we currently do anything here, same as original BFS! //if (pos >= newSize) return B_OK; block_run run; off_t offset; if (FindBlockRun(pos, run, offset) < B_OK) RETURN_ERROR(B_BAD_VALUE); off_t length = newSize - pos; uint32 bytesWritten = 0; uint32 blockSize = fVolume->BlockSize(); uint32 blockShift = fVolume->BlockShift(); uint8 *block; // the first block_run we write could not be aligned to the block_size boundary // (write partial block at the beginning) // pos % block_size == (pos - offset) % block_size, offset % block_size == 0 if (pos % blockSize != 0) { run.start += (pos - offset) / blockSize; run.length -= (pos - offset) / blockSize; CachedBlock cached(fVolume,run); if ((block = cached.Block()) == NULL) RETURN_ERROR(B_BAD_VALUE); bytesWritten = blockSize - (pos % blockSize); if (length < bytesWritten) bytesWritten = length; memset(block + (pos % blockSize), 0, bytesWritten); if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK) RETURN_ERROR(B_IO_ERROR); pos += bytesWritten; length -= bytesWritten; if (length == 0) return B_OK; if (FindBlockRun(pos, run, offset) < B_OK) RETURN_ERROR(B_BAD_VALUE); } while (length > 0) { // offset is the offset to the current pos in the block_run run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift)); run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift)); CachedBlock cached(fVolume); off_t blockNumber = fVolume->ToBlock(run); for (int32 i = 0; i < run.Length(); i++) { if ((block = cached.SetTo(blockNumber + i, true)) == NULL) RETURN_ERROR(B_IO_ERROR); if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK) RETURN_ERROR(B_IO_ERROR); } int32 bytes = run.Length() << blockShift; length -= bytes; bytesWritten += bytes; // since we don't respect a last partial block, length can be lower if (length <= 0) break; pos += bytes; if (FindBlockRun(pos, run, offset) < B_OK) RETURN_ERROR(B_BAD_VALUE); } return B_OK; } /** Allocates NUM_ARRAY_BLOCKS blocks, and clears their contents. Growing * the indirect and double indirect range uses this method. * The allocated block_run is saved in "run" */ status_t Inode::AllocateBlockArray(Transaction *transaction, block_run &run) { if (!run.IsZero()) return B_BAD_VALUE; status_t status = fVolume->Allocate(transaction, this, NUM_ARRAY_BLOCKS, run, NUM_ARRAY_BLOCKS); if (status < B_OK) return status; // make sure those blocks are empty CachedBlock cached(fVolume); off_t block = fVolume->ToBlock(run); for (int32 i = 0; i < run.Length(); i++) { block_run *runs = (block_run *)cached.SetTo(block + i, true); if (runs == NULL) return B_IO_ERROR; if (cached.WriteBack(transaction) < B_OK) return B_IO_ERROR; } return B_OK; } status_t Inode::GrowStream(Transaction *transaction, off_t size) { data_stream *data = &Node()->data; // is the data stream already large enough to hold the new size? // (can be the case with preallocated blocks) if (size < data->MaxDirectRange() || size < data->MaxIndirectRange() || size < data->MaxDoubleIndirectRange()) { data->size = HOST_ENDIAN_TO_BFS_INT64(size); return B_OK; } // how many bytes are still needed? (unused ranges are always zero) uint16 minimum = 1; off_t bytes; if (data->Size() < data->MaxDoubleIndirectRange()) { bytes = size - data->MaxDoubleIndirectRange(); // the double indirect range can only handle multiple of NUM_ARRAY_BLOCKS minimum = NUM_ARRAY_BLOCKS; } else if (data->Size() < data->MaxIndirectRange()) bytes = size - data->MaxIndirectRange(); else if (data->Size() < data->MaxDirectRange()) bytes = size - data->MaxDirectRange(); else bytes = size - data->Size(); // do we have enough free blocks on the disk? off_t blocksRequested = (bytes + fVolume->BlockSize() - 1) >> fVolume->BlockShift(); if (blocksRequested > fVolume->FreeBlocks()) return B_DEVICE_FULL; off_t blocksNeeded = blocksRequested; // because of preallocations and partial allocations, the number of // blocks we need to allocate may be different from the one we request // from the block allocator // Should we preallocate some blocks (currently, always 64k)? // Attributes, attribute directories, and long symlinks usually won't get that big, // and should stay close to the inode - preallocating could be counterproductive. // Also, if free disk space is tight, we probably don't want to do this as well. if (!IsAttribute() && !IsAttributeDirectory() && !IsSymLink() && blocksRequested < (65536 >> fVolume->BlockShift()) && fVolume->FreeBlocks() > 128) blocksRequested = 65536 >> fVolume->BlockShift(); while (blocksNeeded > 0) { // the requested blocks do not need to be returned with a // single allocation, so we need to iterate until we have // enough blocks allocated block_run run; status_t status = fVolume->Allocate(transaction, this, blocksRequested, run, minimum); if (status < B_OK) return status; // okay, we have the needed blocks, so just distribute them to the // different ranges of the stream (direct, indirect & double indirect) // ToDo: if anything goes wrong here, we probably want to free the // blocks that couldn't be distributed into the stream! blocksNeeded -= run.Length(); // don't preallocate if the first allocation was already too small blocksRequested = blocksNeeded; if (minimum > 1) { // make sure that "blocks" is a multiple of minimum blocksRequested = (blocksRequested + minimum - 1) & ~(minimum - 1); } // Direct block range if (data->Size() <= data->MaxDirectRange()) { // let's try to put them into the direct block range int32 free = 0; for (; free < NUM_DIRECT_BLOCKS; free++) if (data->direct[free].IsZero()) break; if (free < NUM_DIRECT_BLOCKS) { // can we merge the last allocated run with the new one? int32 last = free - 1; if (free > 0 && data->direct[last].MergeableWith(run)) data->direct[last].length = HOST_ENDIAN_TO_BFS_INT16(data->direct[last].Length() + run.Length()); else data->direct[free] = run; data->max_direct_range = HOST_ENDIAN_TO_BFS_INT64(data->MaxDirectRange() + run.Length() * fVolume->BlockSize()); data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0 ? data->max_direct_range : size); continue; } } // Indirect block range if (data->Size() <= data->MaxIndirectRange() || !data->MaxIndirectRange()) { CachedBlock cached(fVolume); block_run *runs = NULL; uint32 free = 0; off_t block; // if there is no indirect block yet, create one if (data->indirect.IsZero()) { status = AllocateBlockArray(transaction, data->indirect); if (status < B_OK) return status; data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(data->MaxDirectRange()); // insert the block_run in the first block runs = (block_run *)cached.SetTo(data->indirect); } else { uint32 numberOfRuns = fVolume->BlockSize() / sizeof(block_run); block = fVolume->ToBlock(data->indirect); // search first empty entry int32 i = 0; for (; i < data->indirect.Length(); i++) { if ((runs = (block_run *)cached.SetTo(block + i)) == NULL) return B_IO_ERROR; for (free = 0; free < numberOfRuns; free++) if (runs[free].IsZero()) break; if (free < numberOfRuns) break; } if (i == data->indirect.Length()) runs = NULL; } if (runs != NULL) { // try to insert the run to the last one - note that this doesn't // take block borders into account, so it could be further optimized int32 last = free - 1; if (free > 0 && runs[last].MergeableWith(run)) runs[last].length = HOST_ENDIAN_TO_BFS_INT16(runs[last].Length() + run.Length()); else runs[free] = run; data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(data->MaxIndirectRange() + (run.Length() << fVolume->BlockShift())); data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0 ? data->MaxIndirectRange() : size); cached.WriteBack(transaction); continue; } } // Double indirect block range if (data->Size() <= data->MaxDoubleIndirectRange() || !data->max_double_indirect_range) { while ((run.Length() % NUM_ARRAY_BLOCKS) != 0) { // The number of allocated blocks isn't a multiple of NUM_ARRAY_BLOCKS, // so we have to change this. This can happen the first time the stream // grows into the double indirect range. // First, free the remaining blocks that don't fit into a multiple // of NUM_ARRAY_BLOCKS int32 rest = run.Length() % NUM_ARRAY_BLOCKS; run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - rest); status = fVolume->Free(transaction, block_run::Run(run.AllocationGroup(), run.Start() + run.Length(), rest)); if (status < B_OK) return status; blocksNeeded += rest; blocksRequested = (blocksNeeded + NUM_ARRAY_BLOCKS - 1) & ~(NUM_ARRAY_BLOCKS - 1); minimum = NUM_ARRAY_BLOCKS; // we make sure here that we have at minimum NUM_ARRAY_BLOCKS allocated, // so if the allocation succeeds, we don't run into an endless loop // Are there any blocks left in the run? If not, allocate a new one if (run.length == 0) continue; } // if there is no double indirect block yet, create one if (data->double_indirect.IsZero()) { status = AllocateBlockArray(transaction, data->double_indirect); if (status < B_OK) return status; data->max_double_indirect_range = data->max_indirect_range; } // calculate the index where to insert the new blocks int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); int32 indirectSize = ((1L << INDIRECT_BLOCKS_SHIFT) << fVolume->BlockShift()) * runsPerBlock; int32 directSize = NUM_ARRAY_BLOCKS << fVolume->BlockShift(); int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; off_t start = data->MaxDoubleIndirectRange() - data->MaxIndirectRange(); int32 indirectIndex = start / indirectSize; int32 index = start / directSize; // distribute the blocks to the array and allocate // new array blocks when needed CachedBlock cached(fVolume); CachedBlock cachedDirect(fVolume); block_run *array = NULL; uint32 runLength = run.Length(); // ToDo: the following code is commented - it could be used to // preallocate all needed block arrays to see in advance if the // allocation will succeed. // I will probably remove it later, because it's no perfect solution // either: if the allocation was broken up before (blocksNeeded != 0), // it doesn't guarantee anything. // And since failing in this case is not that common, it doesn't have // to be optimized in that way. // Anyway, I wanted to have it in CVS - all those lines, and they will // be removed soon :-) /* // allocate new block arrays if needed off_t block = -1; for (int32 i = 0;i < needed;i++) { // get the block to insert the run into block = fVolume->ToBlock(data->double_indirect) + i + indirectIndex / runsPerBlock; if (cached.BlockNumber() != block) array = (block_run *)cached.SetTo(block); if (array == NULL) return B_ERROR; status = AllocateBlockArray(transaction, array[i + indirectIndex % runsPerBlock]); if (status < B_OK) return status; } */ while (run.length != 0) { // get the indirect array block if (array == NULL) { if (cached.Block() != NULL && cached.WriteBack(transaction) < B_OK) return B_IO_ERROR; array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect) + indirectIndex / runsPerBlock); if (array == NULL) return B_IO_ERROR; } do { // do we need a new array block? if (array[indirectIndex % runsPerBlock].IsZero()) { status = AllocateBlockArray(transaction, array[indirectIndex % runsPerBlock]); if (status < B_OK) return status; } block_run *runs = (block_run *)cachedDirect.SetTo( fVolume->ToBlock(array[indirectIndex % runsPerBlock]) + index / runsPerBlock); if (runs == NULL) return B_IO_ERROR; do { // insert the block_run into the array runs[index % runsPerBlock] = run; runs[index % runsPerBlock].length = HOST_ENDIAN_TO_BFS_INT16(NUM_ARRAY_BLOCKS); // alter the remaining block_run run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + NUM_ARRAY_BLOCKS); run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - NUM_ARRAY_BLOCKS); } while ((++index % runsPerBlock) != 0 && run.length); if (cachedDirect.WriteBack(transaction) < B_OK) return B_IO_ERROR; } while ((index % runsPerArray) != 0 && run.length); if (++indirectIndex % runsPerBlock == 0) { array = NULL; index = 0; } } data->max_double_indirect_range = HOST_ENDIAN_TO_BFS_INT64(data->MaxDoubleIndirectRange() + (runLength << fVolume->BlockShift())); data->size = blocksNeeded > 0 ? HOST_ENDIAN_TO_BFS_INT64(data->max_double_indirect_range) : size; continue; } RETURN_ERROR(EFBIG); } // update the size of the data stream data->size = HOST_ENDIAN_TO_BFS_INT64(size); return B_OK; } status_t Inode::FreeStaticStreamArray(Transaction *transaction, int32 level, block_run run, off_t size, off_t offset, off_t &max) { int32 indirectSize = 0; if (level == 0) indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT + fVolume->BlockShift())) * (fVolume->BlockSize() / sizeof(block_run)); else if (level == 1) indirectSize = 4 << fVolume->BlockShift(); off_t start; if (size > offset) start = size - offset; else start = 0; int32 index = start / indirectSize; int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); CachedBlock cached(fVolume); off_t blockNumber = fVolume->ToBlock(run); // set the file offset to the current block run offset += (off_t)index * indirectSize; for (int32 i = index / runsPerBlock; i < run.Length(); i++) { block_run *array = (block_run *)cached.SetTo(blockNumber + i); if (array == NULL) RETURN_ERROR(B_ERROR); for (index = index % runsPerBlock; index < runsPerBlock; index++) { if (array[index].IsZero()) { // we also want to break out of the outer loop i = run.Length(); break; } status_t status = B_OK; if (level == 0) status = FreeStaticStreamArray(transaction, 1, array[index], size, offset, max); else if (offset >= size) status = fVolume->Free(transaction, array[index]); else max = HOST_ENDIAN_TO_BFS_INT64(offset + indirectSize); if (status < B_OK) RETURN_ERROR(status); if (offset >= size) array[index].SetTo(0, 0, 0); offset += indirectSize; } index = 0; cached.WriteBack(transaction); } return B_OK; } /** Frees all block_runs in the array which come after the specified size. * It also trims the last block_run that contain the size. * "offset" and "max" are maintained until the last block_run that doesn't * have to be freed - after this, the values won't be correct anymore, but * will still assure correct function for all subsequent calls. * "max" is considered to be in file system byte order. */ status_t Inode::FreeStreamArray(Transaction *transaction, block_run *array, uint32 arrayLength, off_t size, off_t &offset, off_t &max) { off_t newOffset = offset; uint32 i = 0; for (; i < arrayLength; i++, offset = newOffset) { if (array[i].IsZero()) break; newOffset += (off_t)array[i].Length() << fVolume->BlockShift(); if (newOffset <= size) continue; block_run run = array[i]; // determine the block_run to be freed if (newOffset > size && offset < size) { // free partial block_run (and update the original block_run) run.start = array[i].start + ((size - offset) >> fVolume->BlockShift()) + 1; array[i].length = HOST_ENDIAN_TO_BFS_INT16(run.Start() - array[i].Start()); run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - array[i].Length()); if (run.length == 0) continue; // update maximum range max = HOST_ENDIAN_TO_BFS_INT64(offset + ((off_t)array[i].Length() << fVolume->BlockShift())); } else { // free the whole block_run array[i].SetTo(0, 0, 0); if ((off_t)BFS_ENDIAN_TO_HOST_INT64(max) > offset) max = HOST_ENDIAN_TO_BFS_INT64(offset); } if (fVolume->Free(transaction, run) < B_OK) return B_IO_ERROR; } return B_OK; } status_t Inode::ShrinkStream(Transaction *transaction, off_t size) { data_stream *data = &Node()->data; if (data->MaxDoubleIndirectRange() > size) { off_t *maxDoubleIndirect = &data->max_double_indirect_range; // gcc 4 work-around: "error: cannot bind packed field // 'data->data_stream::max_double_indirect_range' to 'off_t&'" FreeStaticStreamArray(transaction, 0, data->double_indirect, size, data->MaxIndirectRange(), *maxDoubleIndirect); if (size <= data->MaxIndirectRange()) { fVolume->Free(transaction, data->double_indirect); data->double_indirect.SetTo(0, 0, 0); data->max_double_indirect_range = 0; } } if (data->MaxIndirectRange() > size) { CachedBlock cached(fVolume); off_t block = fVolume->ToBlock(data->indirect); off_t offset = data->MaxDirectRange(); for (int32 i = 0; i < data->indirect.Length(); i++) { block_run *array = (block_run *)cached.SetTo(block + i); if (array == NULL) break; off_t *maxIndirect = &data->max_indirect_range; // gcc 4 work-around: "error: cannot bind packed field // 'data->data_stream::max_indirect_range' to 'off_t&'" if (FreeStreamArray(transaction, array, fVolume->BlockSize() / sizeof(block_run), size, offset, *maxIndirect) == B_OK) cached.WriteBack(transaction); } if (data->max_direct_range == data->max_indirect_range) { fVolume->Free(transaction, data->indirect); data->indirect.SetTo(0, 0, 0); data->max_indirect_range = 0; } } if (data->MaxDirectRange() > size) { off_t offset = 0; off_t *maxDirect = &data->max_direct_range; // gcc 4 work-around: "error: cannot bind packed field // 'data->data_stream::max_indirect_range' to 'off_t&'" FreeStreamArray(transaction, data->direct, NUM_DIRECT_BLOCKS, size, offset, *maxDirect); } data->size = HOST_ENDIAN_TO_BFS_INT64(size); return B_OK; } status_t Inode::SetFileSize(Transaction *transaction, off_t size) { if (size < 0 // uncached files can't be resized (Stream::WriteAt() specifically // denies growing uncached files because of efficiency, so it had to be // adapted if this ever changes [which will probably happen in OpenBeOS]). || Flags() & INODE_NO_CACHE) return B_BAD_VALUE; off_t oldSize = Size(); if (size == oldSize) return B_OK; // should the data stream grow or shrink? status_t status; if (size > oldSize) { status = GrowStream(transaction, size); if (status < B_OK) { // if the growing of the stream fails, the whole operation // fails, so we should shrink the stream to its former size ShrinkStream(transaction, oldSize); } } else status = ShrinkStream(transaction, size); if (status < B_OK) return status; return WriteBack(transaction); } status_t Inode::Append(Transaction *transaction, off_t bytes) { return SetFileSize(transaction, Size() + bytes); } status_t Inode::Trim(Transaction *transaction) { status_t status = ShrinkStream(transaction, Size()); if (status < B_OK) return status; return WriteBack(transaction); } status_t Inode::Free(Transaction *transaction) { FUNCTION(); // Perhaps there should be an implementation of Inode::ShrinkStream() that // just frees the data_stream, but doesn't change the inode (since it is // freed anyway) - that would make an undelete command possible status_t status = SetFileSize(transaction, 0); if (status < B_OK) return status; // Free all attributes, and remove their indices { // We have to limit the scope of AttributeIterator, so that its // destructor is not called after the inode is deleted AttributeIterator iterator(this); char name[B_FILE_NAME_LENGTH]; uint32 type; size_t length; vnode_id id; while ((status = iterator.GetNext(name, &length, &type, &id)) == B_OK) RemoveAttribute(transaction, name); } if (WriteBack(transaction) < B_OK) return B_ERROR; return fVolume->Free(transaction, BlockRun()); } status_t Inode::Sync() { // We may also want to flush the attribute's data stream to // disk here... (do we?) data_stream *data = &Node()->data; status_t status; // flush direct range for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { if (data->direct[i].IsZero()) return B_OK; status = flush_blocks(fVolume->Device(), fVolume->ToBlock(data->direct[i]), data->direct[i].Length()); if (status != B_OK) return status; } // flush indirect range if (data->max_indirect_range == 0) return B_OK; CachedBlock cached(fVolume); off_t block = fVolume->ToBlock(data->indirect); int32 count = fVolume->BlockSize() / sizeof(block_run); for (int32 j = 0; j < data->indirect.Length(); j++) { block_run *runs = (block_run *)cached.SetTo(block + j); if (runs == NULL) break; for (int32 i = 0; i < count; i++) { if (runs[i].IsZero()) return B_OK; status = flush_blocks(fVolume->Device(), fVolume->ToBlock(runs[i]), runs[i].Length()); if (status != B_OK) return status; } } // flush double indirect range if (data->max_double_indirect_range == 0) return B_OK; off_t indirectBlock = fVolume->ToBlock(data->double_indirect); for (int32 l = 0; l < data->double_indirect.Length(); l++) { block_run *indirectRuns = (block_run *)cached.SetTo(indirectBlock + l); if (indirectRuns == NULL) return B_FILE_ERROR; CachedBlock directCached(fVolume); for (int32 k = 0; k < count; k++) { if (indirectRuns[k].IsZero()) return B_OK; block = fVolume->ToBlock(indirectRuns[k]); for (int32 j = 0; j < indirectRuns[k].Length(); j++) { block_run *runs = (block_run *)directCached.SetTo(block + j); if (runs == NULL) return B_FILE_ERROR; for (int32 i = 0; i < count; i++) { if (runs[i].IsZero()) return B_OK; // ToDo: combine single block_runs to bigger ones when // they are adjacent status = flush_blocks(fVolume->Device(), fVolume->ToBlock(runs[i]), runs[i].Length()); if (status != B_OK) return status; } } } } return B_OK; } status_t Inode::Remove(Transaction *transaction, const char *name, off_t *_id, bool isDirectory) { BPlusTree *tree; if (GetTree(&tree) != B_OK) RETURN_ERROR(B_BAD_VALUE); RecursiveLocker locker(fVolume->Lock()); // does the file even exist? off_t id; if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) < B_OK) return B_ENTRY_NOT_FOUND; if (_id) *_id = id; Vnode vnode(fVolume, id); Inode *inode; status_t status = vnode.Get(&inode); if (status < B_OK) { REPORT_ERROR(status); return B_ENTRY_NOT_FOUND; } // You can't unlink a mounted image or the VM file while it is being used - while // this is not really necessary, it copies the behaviour of the original BFS // and let you and me feel a little bit safer if (inode->Flags() & INODE_NO_CACHE) return B_NOT_ALLOWED; // Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR // bit is set for indices in BFS, not for attribute directories) - but you // should really be able to do whatever you want with your indices // without having to remove all files first :) if (!inode->IsIndex()) { // if it's not of the correct type, don't delete it! if (inode->IsContainer() != isDirectory) return isDirectory ? B_NOT_A_DIRECTORY : B_IS_A_DIRECTORY; // only delete empty directories if (isDirectory && !inode->IsEmpty()) return B_DIRECTORY_NOT_EMPTY; } // remove_vnode() allows the inode to be accessed until the last put_vnode() if (remove_vnode(fVolume->ID(), id) != B_OK) return B_ERROR; if (tree->Remove(transaction, name, id) < B_OK) { unremove_vnode(fVolume->ID(), id); RETURN_ERROR(B_ERROR); } #ifdef DEBUG if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) == B_OK) { DIE(("deleted entry still there")); } #endif // update the inode, so that no one will ever doubt it's deleted :-) inode->Node()->flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED); inode->Node()->flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); // In balance to the Inode::Create() method, the main indices // are updated here (name, size, & last_modified) Index index(fVolume); if (inode->IsRegularNode()) { index.RemoveName(transaction, name, inode); // If removing from the index fails, it is not regarded as a // fatal error and will not be reported back! // Deleted inodes won't be visible in queries anyway. } if ((inode->Mode() & (S_FILE | S_SYMLINK)) != 0) { if (inode->IsFile()) index.RemoveSize(transaction, inode); index.RemoveLastModified(transaction, inode); } if (inode->WriteBack(transaction) < B_OK) return B_ERROR; return B_OK; } /** Creates the inode with the specified parent directory, and automatically * adds the created inode to that parent directory. If an attribute directory * is created, it will also automatically added to the parent inode as such. * However, the indices root node, and the regular root node won't be added * to the superblock. * It will also create the initial B+tree for the inode if it's a directory * of any kind. * If the "_id" or "_inode" variable is given and non-NULL to store the inode's * ID, the inode stays locked - you have to call put_vnode() if you don't use it * anymore. */ status_t Inode::Create(Transaction *transaction, Inode *parent, const char *name, int32 mode, int omode, uint32 type, off_t *_id, Inode **_inode) { FUNCTION(); block_run parentRun = parent ? parent->BlockRun() : block_run::Run(0, 0, 0); Volume *volume = transaction->GetVolume(); BPlusTree *tree = NULL; RecursiveLocker locker(volume->Lock()); // ToDo: it would be nicer to only lock the parent directory, if possible // (but that lock will already be held during any B+tree action) if (parent && (mode & S_ATTR_DIR) == 0 && parent->IsContainer()) { // check if the file already exists in the directory if (parent->GetTree(&tree) != B_OK) RETURN_ERROR(B_BAD_VALUE); // does the file already exist? off_t offset; if (tree->Find((uint8 *)name, (uint16)strlen(name), &offset) == B_OK) { // return if the file should be a directory or opened in exclusive mode if (mode & S_DIRECTORY || omode & O_EXCL) return B_FILE_EXISTS; Vnode vnode(volume, offset); Inode *inode; status_t status = vnode.Get(&inode); if (status < B_OK) { REPORT_ERROR(status); return B_ENTRY_NOT_FOUND; } // if it's a directory, bail out! if (inode->IsDirectory()) return B_IS_A_DIRECTORY; // if it is a mounted device or the VM file, we don't allow to delete it // while it is open and in use if (inode->Flags() & INODE_NO_CACHE) return B_NOT_ALLOWED; // if omode & O_TRUNC, truncate the existing file if (omode & O_TRUNC) { WriteLocked locked(inode->Lock()); status_t status = inode->SetFileSize(transaction, 0); if (status < B_OK) return status; } if (_id) *_id = offset; if (_inode) *_inode = inode; // only keep the vnode in memory if the _id or _inode pointer is provided if (_id != NULL || _inode != NULL) vnode.Keep(); return B_OK; } } else if (parent && (mode & S_ATTR_DIR) == 0) return B_BAD_VALUE; // allocate space for the new inode InodeAllocator allocator(transaction); block_run run; Inode *inode; status_t status = allocator.New(&parentRun, mode, run, &inode); if (status < B_OK) return status; // Initialize the parts of the bfs_inode structure that // InodeAllocator::New() hasn't touched yet bfs_inode *node = inode->Node(); if (parent == NULL) { // we set the parent to itself in this case // (only happens for the root and indices node) node->parent = run; } else node->parent = parentRun; node->uid = HOST_ENDIAN_TO_BFS_INT32(geteuid()); node->gid = HOST_ENDIAN_TO_BFS_INT32(parent ? parent->Node()->gid : getegid()); // the group ID is inherited from the parent, if available node->type = HOST_ENDIAN_TO_BFS_INT32(type); // only add the name to regular files, directories, or symlinks // don't add it to attributes, or indices if (tree && inode->IsRegularNode() && inode->SetName(transaction, name) < B_OK) return B_ERROR; // Initialize b+tree if it's a directory (and add "." & ".." if it's // a standard directory for files - not for attributes or indices) if (inode->IsContainer()) { status = allocator.CreateTree(); if (status < B_OK) return status; } // Add a link to the inode from the parent, depending on its type // (the INODE_NOT_READY flag is set, so it is safe to make the inode // accessable to the file system here) if (tree) { status = tree->Insert(transaction, name, inode->ID()); } else if (parent && (mode & S_ATTR_DIR) != 0) { parent->Attributes() = run; status = parent->WriteBack(transaction); } // Note, we only care if the inode could be made accessable for the // two cases above; the root node or the indices root node must // handle this case on their own (or other cases where "parent" is // NULL) if (status < B_OK) RETURN_ERROR(status); // Update the main indices (name, size & last_modified) // (live queries might want to access us after this) Index index(volume); if (inode->IsRegularNode() && name != NULL) { // the name index only contains regular files // (but not the root node where name == NULL) status = index.InsertName(transaction, name, inode); if (status < B_OK && status != B_BAD_INDEX) { // We have to remove the node from the parent at this point, // because the InodeAllocator destructor can't handle this // case (and if it fails, we can't do anything about it...) if (tree) tree->Remove(transaction, name, inode->ID()); else if (parent != NULL && (mode & S_ATTR_DIR) != 0) parent->Node()->attributes.SetTo(0, 0, 0); return status; } } inode->UpdateOldLastModified(); // The "size" & "last_modified" indices don't contain directories if (inode->IsFile() || inode->IsSymLink()) { // if adding to these indices fails, the inode creation will not be harmed; // they are considered less important than the "name" index if (inode->IsFile()) index.InsertSize(transaction, inode); index.InsertLastModified(transaction, inode); } // Everything worked well until this point, we have a fully // initialized inode, and we want to keep it allocator.Keep(); // We hold the volume lock to make sure that bfs_read_vnode() // won't succeed in the meantime (between the call right // above and below)! if ((status = new_vnode(volume->ID(), inode->ID(), inode)) != B_OK) { // this is a really fatal error, and we can't recover from that // The only exception is that the volume is being initialized. if (volume->ID() >= 0) { FATAL(("new_vnode() failed with: %s\n", strerror(status))); DIE(("new_vnode() failed for inode!")); } } if (_id != NULL) *_id = inode->ID(); if (_inode != NULL) *_inode = inode; // if either _id or _inode is passed, we will keep the inode locked if (_id == NULL && _inode == NULL) put_vnode(volume->ID(), inode->ID()); return B_OK; } // #pragma mark - AttributeIterator::AttributeIterator(Inode *inode) : fCurrentSmallData(0), fInode(inode), fAttributes(NULL), fIterator(NULL), fBuffer(NULL) { inode->AddIterator(this); } AttributeIterator::~AttributeIterator() { if (fAttributes) put_vnode(fAttributes->GetVolume()->ID(), fAttributes->ID()); delete fIterator; fInode->RemoveIterator(this); } status_t AttributeIterator::Rewind() { fCurrentSmallData = 0; if (fIterator != NULL) fIterator->Rewind(); return B_OK; } status_t AttributeIterator::GetNext(char *name, size_t *_length, uint32 *_type, vnode_id *_id) { // read attributes out of the small data section if (fCurrentSmallData >= 0) { small_data *item = fInode->Node()->SmallDataStart(); fInode->SmallDataLock().Lock(); int32 i = 0; for (;;item = item->Next()) { if (item->IsLast(fInode->Node())) break; if (item->NameSize() == FILE_NAME_NAME_LENGTH && *item->Name() == FILE_NAME_NAME) continue; if (i++ == fCurrentSmallData) break; } if (!item->IsLast(fInode->Node())) { strncpy(name, item->Name(), B_FILE_NAME_LENGTH); *_type = item->Type(); *_length = item->NameSize(); *_id = (vnode_id)fCurrentSmallData; fCurrentSmallData = i; } else { // stop traversing the small_data section fCurrentSmallData = -1; } fInode->SmallDataLock().Unlock(); if (fCurrentSmallData != -1) return B_OK; } // read attributes out of the attribute directory if (fInode->Attributes().IsZero()) return B_ENTRY_NOT_FOUND; Volume *volume = fInode->GetVolume(); // if you haven't yet access to the attributes directory, get it if (fAttributes == NULL) { #ifdef UNSAFE_GET_VNODE RecursiveLocker locker(volume->Lock()); #endif if (get_vnode(volume->ID(), volume->ToVnode(fInode->Attributes()), (void **)&fAttributes) != 0 || fAttributes == NULL) { FATAL(("get_vnode() failed in AttributeIterator::GetNext(vnode_id = %Ld,name = \"%s\")\n",fInode->ID(),name)); return B_ENTRY_NOT_FOUND; } BPlusTree *tree; if (fAttributes->GetTree(&tree) < B_OK || (fIterator = new TreeIterator(tree)) == NULL) { FATAL(("could not get tree in AttributeIterator::GetNext(vnode_id = %Ld,name = \"%s\")\n",fInode->ID(),name)); return B_ENTRY_NOT_FOUND; } } uint16 length; vnode_id id; status_t status = fIterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id); if (status < B_OK) return status; Vnode vnode(volume,id); Inode *attribute; if ((status = vnode.Get(&attribute)) == B_OK) { *_type = attribute->Type(); *_length = attribute->Size(); *_id = id; } return status; } void AttributeIterator::Update(uint16 index, int8 change) { // fCurrentSmallData points already to the next item if (index < fCurrentSmallData) fCurrentSmallData += change; }