1/*
2 * Copyright 2001-2008, Axel Dörfler, axeld@pinc-software.de
3 * This file may be used under the terms of the MIT License.
4 */
5
6//! inode access functions
7
8
9#include "Debug.h"
10#include "Inode.h"
11#include "BPlusTree.h"
12#include "Stream.h"
13#include "Index.h"
14
15#include <util/kernel_cpp.h>
16
17#include <string.h>
18#include <stdio.h>
19
20
21class InodeAllocator {
22	public:
23		InodeAllocator(Transaction *transaction);
24		~InodeAllocator();
25
26		status_t New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode);
27		status_t CreateTree();
28		status_t Keep();
29
30	private:
31		Transaction *fTransaction;
32		block_run fRun;
33		Inode *fInode;
34};
35
36
37InodeAllocator::InodeAllocator(Transaction *transaction)
38	:
39	fTransaction(transaction),
40	fInode(NULL)
41{
42}
43
44
45InodeAllocator::~InodeAllocator()
46{
47	if (fTransaction != NULL) {
48		if (fInode != NULL) {
49			fInode->Node()->flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE | INODE_NOT_READY);
50				// this unblocks any pending bfs_read_vnode() calls
51			fInode->Free(fTransaction);
52		} else
53			fTransaction->GetVolume()->Free(fTransaction, fRun);
54	}
55
56	delete fInode;
57}
58
59
60status_t
61InodeAllocator::New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode)
62{
63	Volume *volume = fTransaction->GetVolume();
64
65	status_t status = volume->AllocateForInode(fTransaction, parentRun, mode, fRun);
66	if (status < B_OK) {
67		// don't free the space in the destructor, because
68		// the allocation failed
69		fTransaction = NULL;
70		RETURN_ERROR(status);
71	}
72
73	run = fRun;
74	fInode = new Inode(volume, volume->ToVnode(run), true);
75	if (fInode == NULL)
76		RETURN_ERROR(B_NO_MEMORY);
77
78	// initialize the on-disk bfs_inode structure
79
80	bfs_inode *node = fInode->Node();
81
82	node->magic1 = HOST_ENDIAN_TO_BFS_INT32(INODE_MAGIC1);
83	node->inode_num = run;
84	node->mode = HOST_ENDIAN_TO_BFS_INT32(mode);
85	node->flags = HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE | INODE_NOT_READY);
86		// INODE_NOT_READY prevents the inode from being opened - it is
87		// cleared in InodeAllocator::Keep()
88	node->etc = (uint32)fInode;
89		// this is temporarily set along INODE_NOT_READY and lets bfs_read_vnode()
90		// find the associated Inode object
91
92	node->create_time = HOST_ENDIAN_TO_BFS_INT64((bigtime_t)time(NULL) << INODE_TIME_SHIFT);
93	node->last_modified_time = HOST_ENDIAN_TO_BFS_INT64(node->create_time
94		| (volume->GetUniqueID() & INODE_TIME_MASK));
95		// we use Volume::GetUniqueID() to avoid having too many duplicates in the
96		// last_modified index
97
98	node->inode_size = HOST_ENDIAN_TO_BFS_INT32(volume->InodeSize());
99
100	*_inode = fInode;
101	return B_OK;
102}
103
104
105status_t
106InodeAllocator::CreateTree()
107{
108	Volume *volume = fTransaction->GetVolume();
109
110	// force S_STR_INDEX to be set, if no type is set
111	if ((fInode->Mode() & S_INDEX_TYPES) == 0)
112		fInode->Node()->mode |= HOST_ENDIAN_TO_BFS_INT32(S_STR_INDEX);
113
114	BPlusTree *tree = fInode->fTree = new BPlusTree(fTransaction, fInode);
115	if (tree == NULL || tree->InitCheck() < B_OK)
116		return B_ERROR;
117
118	if (fInode->IsRegularNode()) {
119		if (tree->Insert(fTransaction, ".", fInode->ID()) < B_OK
120			|| tree->Insert(fTransaction, "..", volume->ToVnode(fInode->Parent())) < B_OK)
121			return B_ERROR;
122	}
123	return B_OK;
124}
125
126
127status_t
128InodeAllocator::Keep()
129{
130	ASSERT(fInode != NULL && fTransaction != NULL);
131
132	fInode->Node()->flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_NOT_READY);
133	status_t status = fInode->WriteBack(fTransaction);
134
135	fTransaction = NULL;
136	fInode = NULL;
137
138	return status;
139}
140
141
142//	#pragma mark -
143
144
145status_t
146bfs_inode::InitCheck(Volume *volume)
147{
148	if (Flags() & INODE_NOT_READY) {
149		// the other fields may not yet contain valid values
150		return B_BUSY;
151	}
152	if (Flags() & INODE_DELETED)
153		return B_NOT_ALLOWED;
154
155	if (Magic1() != INODE_MAGIC1
156		|| !(Flags() & INODE_IN_USE)
157		|| inode_num.Length() != 1
158		// matches inode size?
159		|| (uint32)InodeSize() != volume->InodeSize()
160		// parent resides on disk?
161		|| parent.AllocationGroup() > int32(volume->AllocationGroups())
162		|| parent.AllocationGroup() < 0
163		|| parent.Start() > (1L << volume->AllocationGroupShift())
164		|| parent.Length() != 1
165		// attributes, too?
166		|| attributes.AllocationGroup() > int32(volume->AllocationGroups())
167		|| attributes.AllocationGroup() < 0
168		|| attributes.Start() > (1L << volume->AllocationGroupShift()))
169		RETURN_ERROR(B_BAD_DATA);
170
171	// ToDo: Add some tests to check the integrity of the other stuff here,
172	// especially for the data_stream!
173
174	return B_OK;
175}
176
177
178//	#pragma mark -
179
180
181Inode::Inode(Volume *volume, vnode_id id, bool empty, uint8 reenter)
182	: CachedBlock(volume, volume->VnodeToBlock(id), empty),
183	fTree(NULL),
184	fLock()
185{
186	PRINT(("Inode::Inode(%p, %Ld, %s, %s) @ %p\n",
187		volume, id, empty ? "empty" : "not-empty", reenter ? "reenter":"not-reenter", this));
188
189	Initialize();
190}
191
192
193Inode::Inode(CachedBlock *cached)
194	: CachedBlock(cached),
195	fTree(NULL),
196	fLock()
197{
198	PRINT(("Inode::Inode(%p) @ %p\n", cached, this));
199
200	Initialize();
201}
202
203
204Inode::~Inode()
205{
206	PRINT(("Inode::~Inode() @ %p\n", this));
207
208	delete fTree;
209}
210
211
212void
213Inode::Initialize()
214{
215	char lockName[32];
216	sprintf(lockName, "bfs inode %ld.%d", BlockRun().AllocationGroup(), BlockRun().Start());
217	fLock.Initialize(lockName);
218
219	Node()->flags &= HOST_ENDIAN_TO_BFS_INT32(INODE_PERMANENT_FLAGS);
220
221	// these two will help to maintain the indices
222	fOldSize = Size();
223	fOldLastModified = LastModified();
224
225	if (IsContainer())
226		fTree = new BPlusTree(this);
227
228	fCache = NULL;
229}
230
231
232status_t
233Inode::InitCheck(bool checkNode)
234{
235	if (!Node())
236		RETURN_ERROR(B_IO_ERROR);
237
238	// test inode magic and flags
239	if (checkNode) {
240		status_t status = Node()->InitCheck(fVolume);
241		if (status == B_BUSY)
242			return B_BUSY;
243
244		if (status < B_OK) {
245			FATAL(("inode at block %Ld corrupt!\n", fBlockNumber));
246			RETURN_ERROR(B_BAD_DATA);
247		}
248	}
249
250	if (IsContainer()) {
251		// inodes that have a
252		if (fTree == NULL)
253			RETURN_ERROR(B_NO_MEMORY);
254
255		status_t status = fTree->InitCheck();
256		if (status < B_OK) {
257			FATAL(("inode tree at block %Ld corrupt!\n", fBlockNumber));
258			RETURN_ERROR(B_BAD_DATA);
259		}
260	}
261
262	// it's more important to know that the inode is corrupt
263	// so we check for the lock not until here
264	return fLock.InitCheck();
265}
266
267
268status_t
269Inode::CheckPermissions(int accessMode) const
270{
271	uid_t user = geteuid();
272	gid_t group = getegid();
273
274	// you never have write access to a read-only volume
275	if (accessMode & W_OK && fVolume->IsReadOnly())
276		return B_READ_ONLY_DEVICE;
277
278	// root users always have full access (but they can't execute anything)
279	if (user == 0 && !((accessMode & X_OK) && (Mode() & S_IXUSR) == 0))
280		return B_OK;
281
282	// shift mode bits, to check directly against accessMode
283	mode_t mode = Mode();
284	if (user == (uid_t)Node()->UserID())
285		mode >>= 6;
286	else if (group == (gid_t)Node()->GroupID())
287		mode >>= 3;
288
289	if (accessMode & ~(mode & S_IRWXO))
290		return B_NOT_ALLOWED;
291
292	return B_OK;
293}
294
295
296//	#pragma mark -
297
298
299void
300Inode::AddIterator(AttributeIterator *iterator)
301{
302	if (fSmallDataLock.Lock() < B_OK)
303		return;
304
305	fIterators.Add(iterator);
306
307	fSmallDataLock.Unlock();
308}
309
310
311void
312Inode::RemoveIterator(AttributeIterator *iterator)
313{
314	if (fSmallDataLock.Lock() < B_OK)
315		return;
316
317	fIterators.Remove(iterator);
318
319	fSmallDataLock.Unlock();
320}
321
322
323/**	Tries to free up "bytes" space in the small_data section by moving
324 *	attributes to real files. Used for system attributes like the name.
325 *	You need to hold the fSmallDataLock when you call this method
326 */
327
328status_t
329Inode::MakeSpaceForSmallData(Transaction *transaction, const char *name, int32 bytes)
330{
331	ASSERT(fSmallDataLock.IsLocked());
332
333	while (bytes > 0) {
334		small_data *item = Node()->SmallDataStart(), *max = NULL;
335		int32 index = 0, maxIndex = 0;
336		for (; !item->IsLast(Node()); item = item->Next(), index++) {
337			// should not remove those
338			if (*item->Name() == FILE_NAME_NAME || !strcmp(name, item->Name()))
339				continue;
340
341			if (max == NULL || max->Size() < item->Size()) {
342				maxIndex = index;
343				max = item;
344			}
345
346			// remove the first one large enough to free the needed amount of bytes
347			if (bytes < (int32)item->Size())
348				break;
349		}
350
351		if (item->IsLast(Node()) || (int32)item->Size() < bytes)
352			return B_ERROR;
353
354		bytes -= max->Size();
355
356		// Move the attribute to a real attribute file
357		// Luckily, this doesn't cause any index updates
358
359		Inode *attribute;
360		status_t status = CreateAttribute(transaction, item->Name(), item->Type(), &attribute);
361		if (status < B_OK)
362			RETURN_ERROR(status);
363
364		size_t length = item->DataSize();
365		status = attribute->WriteAt(transaction, 0, item->Data(), &length);
366
367		ReleaseAttribute(attribute);
368
369		if (status < B_OK) {
370			Vnode vnode(fVolume,Attributes());
371			Inode *attributes;
372			if (vnode.Get(&attributes) < B_OK
373				|| attributes->Remove(transaction, name) < B_OK) {
374				FATAL(("Could not remove newly created attribute!\n"));
375			}
376
377			RETURN_ERROR(status);
378		}
379
380		RemoveSmallData(max, maxIndex);
381	}
382	return B_OK;
383}
384
385
386/**	Private function which removes the given attribute from the small_data
387 *	section.
388 *	You need to hold the fSmallDataLock when you call this method
389 */
390
391status_t
392Inode::RemoveSmallData(small_data *item, int32 index)
393{
394	ASSERT(fSmallDataLock.IsLocked());
395
396	small_data *next = item->Next();
397	if (!next->IsLast(Node())) {
398		// find the last attribute
399		small_data *last = next;
400		while (!last->IsLast(Node()))
401			last = last->Next();
402
403		int32 size = (uint8 *)last - (uint8 *)next;
404		if (size < 0 || size > (uint8 *)Node() + fVolume->BlockSize() - (uint8 *)next)
405			return B_BAD_DATA;
406
407		memmove(item, next, size);
408
409		// Move the "last" one to its new location and
410		// correctly terminate the small_data section
411		last = (small_data *)((uint8 *)last - ((uint8 *)next - (uint8 *)item));
412		memset(last, 0, (uint8 *)Node() + fVolume->BlockSize() - (uint8 *)last);
413	} else
414		memset(item, 0, item->Size());
415
416	// update all current iterators
417	AttributeIterator *iterator = NULL;
418	while ((iterator = fIterators.Next(iterator)) != NULL)
419		iterator->Update(index, -1);
420
421	return B_OK;
422}
423
424
425/**	Removes the given attribute from the small_data section.
426 *	Note that you need to write back the inode yourself after having called
427 *	that method.
428 */
429
430status_t
431Inode::RemoveSmallData(Transaction *transaction, const char *name)
432{
433	if (name == NULL)
434		return B_BAD_VALUE;
435
436	SimpleLocker locker(fSmallDataLock);
437
438	// search for the small_data item
439
440	small_data *item = Node()->SmallDataStart();
441	int32 index = 0;
442	while (!item->IsLast(Node()) && strcmp(item->Name(), name)) {
443		item = item->Next();
444		index++;
445	}
446
447	if (item->IsLast(Node()))
448		return B_ENTRY_NOT_FOUND;
449
450	return RemoveSmallData(item, index);
451}
452
453
454/**	Try to place the given attribute in the small_data section - if the
455 *	new attribute is too big to fit in that section, it returns B_DEVICE_FULL.
456 *	In that case, the attribute should be written to a real attribute file;
457 *	if the attribute was already part of the small_data section, but the new
458 *	one wouldn't fit, the old one is automatically removed from the small_data
459 *	section.
460 *	Note that you need to write back the inode yourself after having called that
461 *	method - it's a bad API decision that it needs a transaction but enforces you
462 *	to write back the inode all by yourself, but it's just more efficient in most
463 *	cases...
464 */
465
466status_t
467Inode::AddSmallData(Transaction *transaction, const char *name, uint32 type,
468	const uint8 *data, size_t length, bool force)
469{
470	if (name == NULL || data == NULL || type == 0)
471		return B_BAD_VALUE;
472
473	// reject any requests that can't fit into the small_data section
474	uint32 nameLength = strlen(name);
475	uint32 spaceNeeded = sizeof(small_data) + nameLength + 3 + length + 1;
476	if (spaceNeeded > fVolume->InodeSize() - sizeof(bfs_inode))
477		return B_DEVICE_FULL;
478
479	SimpleLocker locker(fSmallDataLock);
480
481	small_data *item = Node()->SmallDataStart();
482	int32 index = 0;
483	while (!item->IsLast(Node()) && strcmp(item->Name(), name)) {
484		item = item->Next();
485		index++;
486	}
487
488	// is the attribute already in the small_data section?
489	// then just replace the data part of that one
490	if (!item->IsLast(Node())) {
491		// find last attribute
492		small_data *last = item;
493		while (!last->IsLast(Node()))
494			last = last->Next();
495
496		// try to change the attributes value
497		if (item->data_size > length
498			|| force
499			|| ((uint8 *)last + length - item->DataSize()) <= ((uint8 *)Node() + fVolume->InodeSize())) {
500			// make room for the new attribute if needed (and we are forced to do so)
501			if (force
502				&& ((uint8 *)last + length - item->DataSize()) > ((uint8 *)Node() + fVolume->InodeSize())) {
503				// We also take the free space at the end of the small_data section
504				// into account, and request only what's really needed
505				uint32 needed = length - item->DataSize() -
506						(uint32)((uint8 *)Node() + fVolume->InodeSize() - (uint8 *)last);
507
508				if (MakeSpaceForSmallData(transaction, name, needed) < B_OK)
509					return B_ERROR;
510
511				// reset our pointers
512				item = Node()->SmallDataStart();
513				index = 0;
514				while (!item->IsLast(Node()) && strcmp(item->Name(), name)) {
515					item = item->Next();
516					index++;
517				}
518
519				last = item;
520				while (!last->IsLast(Node()))
521					last = last->Next();
522			}
523
524			// Normally, we can just overwrite the attribute data as the size
525			// is specified by the type and does not change that often
526			if (length != item->DataSize()) {
527				// move the attributes after the current one
528				small_data *next = item->Next();
529				if (!next->IsLast(Node()))
530					memmove((uint8 *)item + spaceNeeded, next, (uint8 *)last - (uint8 *)next);
531
532				// Move the "last" one to its new location and
533				// correctly terminate the small_data section
534				last = (small_data *)((uint8 *)last - ((uint8 *)next - ((uint8 *)item + spaceNeeded)));
535				if ((uint8 *)last < (uint8 *)Node() + fVolume->BlockSize())
536					memset(last, 0, (uint8 *)Node() + fVolume->BlockSize() - (uint8 *)last);
537
538				item->data_size = HOST_ENDIAN_TO_BFS_INT16(length);
539			}
540
541			item->type = HOST_ENDIAN_TO_BFS_INT32(type);
542			memcpy(item->Data(), data, length);
543			item->Data()[length] = '\0';
544
545			return B_OK;
546		}
547
548		// Could not replace the old attribute, so remove it to let
549		// let the calling function create an attribute file for it
550		if (RemoveSmallData(item, index) < B_OK)
551			return B_ERROR;
552
553		return B_DEVICE_FULL;
554	}
555
556	// try to add the new attribute!
557
558	if ((uint8 *)item + spaceNeeded > (uint8 *)Node() + fVolume->InodeSize()) {
559		// there is not enough space for it!
560		if (!force)
561			return B_DEVICE_FULL;
562
563		// make room for the new attribute
564		if (MakeSpaceForSmallData(transaction, name, spaceNeeded) < B_OK)
565			return B_ERROR;
566
567		// get new last item!
568		item = Node()->SmallDataStart();
569		index = 0;
570		while (!item->IsLast(Node())) {
571			item = item->Next();
572			index++;
573		}
574	}
575
576	memset(item, 0, spaceNeeded);
577	item->type = HOST_ENDIAN_TO_BFS_INT32(type);
578	item->name_size = HOST_ENDIAN_TO_BFS_INT16(nameLength);
579	item->data_size = HOST_ENDIAN_TO_BFS_INT16(length);
580	strcpy(item->Name(), name);
581	memcpy(item->Data(), data, length);
582
583	// correctly terminate the small_data section
584	item = item->Next();
585	if (!item->IsLast(Node()))
586		memset(item, 0, (uint8 *)Node() + fVolume->InodeSize() - (uint8 *)item);
587
588	// update all current iterators
589	AttributeIterator *iterator = NULL;
590	while ((iterator = fIterators.Next(iterator)) != NULL)
591		iterator->Update(index, 1);
592
593	return B_OK;
594}
595
596
597/**	Iterates through the small_data section of an inode.
598 *	To start at the beginning of this section, you let smallData
599 *	point to NULL, like:
600 *		small_data *data = NULL;
601 *		while (inode->GetNextSmallData(&data) { ... }
602 *
603 *	This function is reentrant and doesn't allocate any memory;
604 *	you can safely stop calling it at any point (you don't need
605 *	to iterate through the whole list).
606 *	You need to hold the fSmallDataLock when you call this method
607 */
608
609status_t
610Inode::GetNextSmallData(small_data **_smallData) const
611{
612	if (!Node())
613		RETURN_ERROR(B_ERROR);
614
615	ASSERT(fSmallDataLock.IsLocked());
616
617	small_data *data = *_smallData;
618
619	// begin from the start?
620	if (data == NULL)
621		data = Node()->SmallDataStart();
622	else
623		data = data->Next();
624
625	// is already last item?
626	if (data->IsLast(Node()))
627		return B_ENTRY_NOT_FOUND;
628
629	*_smallData = data;
630
631	return B_OK;
632}
633
634
635/**	Finds the attribute "name" in the small data section, and
636 *	returns a pointer to it (or NULL if it doesn't exist).
637 *	You need to hold the fSmallDataLock when you call this method
638 */
639
640small_data *
641Inode::FindSmallData(const char *name) const
642{
643	ASSERT(fSmallDataLock.IsLocked());
644
645	small_data *smallData = NULL;
646	while (GetNextSmallData(&smallData) == B_OK) {
647		if (!strcmp(smallData->Name(), name))
648			return smallData;
649	}
650	return NULL;
651}
652
653
654/** Returns a pointer to the node's name if present in the small data
655 *	section, NULL otherwise.
656 *	You need to hold the fSmallDataLock when you call this method
657 */
658
659const char *
660Inode::Name() const
661{
662	ASSERT(fSmallDataLock.IsLocked());
663
664	small_data *smallData = NULL;
665	while (GetNextSmallData(&smallData) == B_OK) {
666		if (*smallData->Name() == FILE_NAME_NAME && smallData->NameSize() == FILE_NAME_NAME_LENGTH)
667			return (const char *)smallData->Data();
668	}
669	return NULL;
670}
671
672
673/** Copies the node's name into the provided buffer.
674 *	The buffer must be B_FILE_NAME_LENGTH bytes large.
675 */
676
677status_t
678Inode::GetName(char *buffer) const
679{
680	SimpleLocker locker(fSmallDataLock);
681
682	const char *name = Name();
683	if (name == NULL)
684		return B_ENTRY_NOT_FOUND;
685
686	strlcpy(buffer, name, B_FILE_NAME_LENGTH);
687	return B_OK;
688}
689
690
691/**	Changes or set the name of a file: in the inode small_data section only, it
692 *	doesn't change it in the parent directory's b+tree.
693 *	Note that you need to write back the inode yourself after having called
694 *	that method. It suffers from the same API decision as AddSmallData() does
695 *	(and for the same reason).
696 */
697
698status_t
699Inode::SetName(Transaction *transaction, const char *name)
700{
701	if (name == NULL || *name == '\0')
702		return B_BAD_VALUE;
703
704	const char nameTag[2] = {FILE_NAME_NAME, 0};
705
706	return AddSmallData(transaction, nameTag, FILE_NAME_TYPE, (uint8 *)name, strlen(name), true);
707}
708
709
710/**	Reads data from the specified attribute.
711 *	This is a high-level attribute function that understands attributes
712 *	in the small_data section as well as real attribute files.
713 */
714
715status_t
716Inode::ReadAttribute(const char *name, int32 type, off_t pos, uint8 *buffer, size_t *_length)
717{
718	if (pos < 0)
719		pos = 0;
720
721	// search in the small_data section (which has to be locked first)
722	{
723		SimpleLocker locker(fSmallDataLock);
724
725		small_data *smallData = FindSmallData(name);
726		if (smallData != NULL) {
727			size_t length = *_length;
728			if (pos >= smallData->data_size) {
729				*_length = 0;
730				return B_OK;
731			}
732			if (length + pos > smallData->DataSize())
733				length = smallData->DataSize() - pos;
734
735			memcpy(buffer, smallData->Data() + pos, length);
736			*_length = length;
737			return B_OK;
738		}
739	}
740
741	// search in the attribute directory
742	Inode *attribute;
743	status_t status = GetAttribute(name, &attribute);
744	if (status == B_OK) {
745		if (attribute->Lock().Lock() == B_OK) {
746			status = attribute->ReadAt(pos, (uint8 *)buffer, _length);
747			attribute->Lock().Unlock();
748		} else
749			status = B_ERROR;
750
751		ReleaseAttribute(attribute);
752	}
753
754	RETURN_ERROR(status);
755}
756
757
758/**	Writes data to the specified attribute.
759 *	This is a high-level attribute function that understands attributes
760 *	in the small_data section as well as real attribute files.
761 */
762
763status_t
764Inode::WriteAttribute(Transaction *transaction, const char *name, int32 type, off_t pos,
765	const uint8 *buffer, size_t *_length)
766{
767	// needed to maintain the index
768	uint8 oldBuffer[BPLUSTREE_MAX_KEY_LENGTH], *oldData = NULL;
769	size_t oldLength = 0;
770
771	// ToDo: we actually depend on that the contents of "buffer" are constant.
772	// If they get changed during the write (hey, user programs), we may mess
773	// up our index trees!
774
775	Index index(fVolume);
776	index.SetTo(name);
777
778	Inode *attribute = NULL;
779	status_t status = B_OK;
780
781	if (GetAttribute(name, &attribute) < B_OK) {
782		// save the old attribute data
783		fSmallDataLock.Lock();
784
785		small_data *smallData = FindSmallData(name);
786		if (smallData != NULL) {
787			oldLength = smallData->DataSize();
788			if (oldLength > 0) {
789				if (oldLength > BPLUSTREE_MAX_KEY_LENGTH)
790					oldLength = BPLUSTREE_MAX_KEY_LENGTH;
791				memcpy(oldData = oldBuffer, smallData->Data(), oldLength);
792			}
793		}
794		fSmallDataLock.Unlock();
795
796		// if the attribute doesn't exist yet (as a file), try to put it in the
797		// small_data section first - if that fails (due to insufficent space),
798		// create a real attribute file
799		status = AddSmallData(transaction, name, type, buffer, *_length);
800		if (status == B_DEVICE_FULL) {
801			status = CreateAttribute(transaction, name, type, &attribute);
802			if (status < B_OK)
803				RETURN_ERROR(status);
804		} else if (status == B_OK)
805			status = WriteBack(transaction);
806	}
807
808	if (attribute != NULL) {
809		if (attribute->Lock().LockWrite() == B_OK) {
810			// save the old attribute data (if this fails, oldLength will reflect it)
811			if (fVolume->CheckForLiveQuery(name) && attribute->Size() > 0) {
812				oldLength = BPLUSTREE_MAX_KEY_LENGTH;
813				if (attribute->ReadAt(0, oldBuffer, &oldLength) == B_OK)
814					oldData = oldBuffer;
815			}
816			// ToDo: check if the data fits in the inode now and delete the attribute file if so
817			status = attribute->WriteAt(transaction, pos, buffer, _length);
818			if (status == B_OK) {
819				// The attribute type might have been changed - we need to adopt
820				// the new one
821				attribute->Node()->type = HOST_ENDIAN_TO_BFS_INT32(type);
822				status = attribute->WriteBack(transaction);
823			}
824
825			attribute->Lock().UnlockWrite();
826		} else
827			status = B_ERROR;
828
829		ReleaseAttribute(attribute);
830	}
831
832	// ToDo: find a better way than this "pos" thing (the begin of the old key
833	//	must be copied to the start of the new one for a comparison)
834	if (status == B_OK && pos == 0) {
835		// index only the first BPLUSTREE_MAX_KEY_LENGTH bytes
836		uint16 length = *_length;
837		if (length > BPLUSTREE_MAX_KEY_LENGTH)
838			length = BPLUSTREE_MAX_KEY_LENGTH;
839
840		// Update index. Note, Index::Update() may be called even if initializing
841		// the index failed - it will just update the live queries in this case
842		if (pos < length || pos < oldLength)
843			index.Update(transaction, name, type, oldData, oldLength, buffer, length, this);
844	}
845	return status;
846}
847
848
849/**	Removes the specified attribute from the inode.
850 *	This is a high-level attribute function that understands attributes
851 *	in the small_data section as well as real attribute files.
852 */
853
854status_t
855Inode::RemoveAttribute(Transaction *transaction, const char *name)
856{
857	Index index(fVolume);
858	bool hasIndex = index.SetTo(name) == B_OK;
859
860	// update index for attributes in the small_data section
861	{
862		fSmallDataLock.Lock();
863
864		small_data *smallData = FindSmallData(name);
865		if (smallData != NULL) {
866			uint32 length = smallData->DataSize();
867			if (length > BPLUSTREE_MAX_KEY_LENGTH)
868				length = BPLUSTREE_MAX_KEY_LENGTH;
869			index.Update(transaction, name, smallData->Type(), smallData->Data(), length, NULL, 0, this);
870		}
871		fSmallDataLock.Unlock();
872	}
873
874	status_t status = RemoveSmallData(transaction, name);
875	if (status == B_OK) {
876		status = WriteBack(transaction);
877	} else if (status == B_ENTRY_NOT_FOUND && !Attributes().IsZero()) {
878		// remove the attribute file if it exists
879		Vnode vnode(fVolume, Attributes());
880		Inode *attributes;
881		if ((status = vnode.Get(&attributes)) < B_OK)
882			return status;
883
884		// update index
885		Inode *attribute;
886		if ((hasIndex || fVolume->CheckForLiveQuery(name))
887			&& GetAttribute(name, &attribute) == B_OK) {
888			uint8 data[BPLUSTREE_MAX_KEY_LENGTH];
889			size_t length = BPLUSTREE_MAX_KEY_LENGTH;
890			if (attribute->ReadAt(0, data, &length) == B_OK)
891				index.Update(transaction, name, attribute->Type(), data, length, NULL, 0, this);
892
893			ReleaseAttribute(attribute);
894		}
895
896		if ((status = attributes->Remove(transaction, name)) < B_OK)
897			return status;
898
899		if (attributes->IsEmpty()) {
900			// remove attribute directory (don't fail if that can't be done)
901			if (remove_vnode(fVolume->ID(), attributes->ID()) == B_OK) {
902				// update the inode, so that no one will ever doubt it's deleted :-)
903				attributes->Node()->flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
904				if (attributes->WriteBack(transaction) == B_OK) {
905					Attributes().SetTo(0, 0, 0);
906					WriteBack(transaction);
907				} else
908					unremove_vnode(fVolume->ID(), attributes->ID());
909			}
910		}
911	}
912	return status;
913}
914
915
916status_t
917Inode::GetAttribute(const char *name, Inode **_attribute)
918{
919	// does this inode even have attributes?
920	if (Attributes().IsZero())
921		return B_ENTRY_NOT_FOUND;
922
923	Vnode vnode(fVolume, Attributes());
924	Inode *attributes;
925	if (vnode.Get(&attributes) < B_OK) {
926		FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n", name));
927		return B_ERROR;
928	}
929
930	BPlusTree *tree;
931	status_t status = attributes->GetTree(&tree);
932	if (status == B_OK) {
933		vnode_id id;
934		if ((status = tree->Find((uint8 *)name, (uint16)strlen(name), &id)) == B_OK) {
935			Vnode vnode(fVolume, id);
936			Inode *inode;
937			// Check if the attribute is really an attribute
938			if (vnode.Get(&inode) < B_OK || !inode->IsAttribute())
939				return B_ERROR;
940
941			*_attribute = inode;
942			vnode.Keep();
943			return B_OK;
944		}
945	}
946	return status;
947}
948
949
950void
951Inode::ReleaseAttribute(Inode *attribute)
952{
953	if (attribute == NULL)
954		return;
955
956	put_vnode(fVolume->ID(), attribute->ID());
957}
958
959
960status_t
961Inode::CreateAttribute(Transaction *transaction, const char *name, uint32 type, Inode **attribute)
962{
963	// do we need to create the attribute directory first?
964	if (Attributes().IsZero()) {
965		status_t status = Inode::Create(transaction, this, NULL,
966			S_ATTR_DIR | S_DIRECTORY | 0666, 0, 0, NULL);
967		if (status < B_OK)
968			RETURN_ERROR(status);
969	}
970	Vnode vnode(fVolume, Attributes());
971	Inode *attributes;
972	if (vnode.Get(&attributes) < B_OK)
973		return B_ERROR;
974
975	// Inode::Create() locks the inode for us
976	return Inode::Create(transaction, attributes, name,
977		S_ATTR | S_FILE | 0666, 0, type, NULL, attribute);
978}
979
980
981//	#pragma mark -
982
983
984/**	Gives the caller direct access to the b+tree for a given directory.
985 *	The tree is no longer created on demand, but when the inode is first
986 *	created. That will report any potential errors upfront, saves locking,
987 *	and should work as good (though a bit slower).
988 */
989
990status_t
991Inode::GetTree(BPlusTree **tree)
992{
993	if (fTree) {
994		*tree = fTree;
995		return B_OK;
996	}
997
998	RETURN_ERROR(B_BAD_VALUE);
999}
1000
1001
1002bool
1003Inode::IsEmpty()
1004{
1005	BPlusTree *tree;
1006	status_t status = GetTree(&tree);
1007	if (status < B_OK)
1008		return status;
1009
1010	TreeIterator iterator(tree);
1011
1012	// index and attribute directories are really empty when they are
1013	// empty - directories for standard files always contain ".", and
1014	// "..", so we need to ignore those two
1015
1016	uint32 count = 0;
1017	char name[BPLUSTREE_MAX_KEY_LENGTH];
1018	uint16 length;
1019	vnode_id id;
1020	while (iterator.GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id) == B_OK) {
1021		if (Mode() & (S_ATTR_DIR | S_INDEX_DIR))
1022			return false;
1023
1024		if (++count > 2 || strcmp(".", name) && strcmp("..", name))
1025			return false;
1026	}
1027	return true;
1028}
1029
1030
1031/** Finds the block_run where "pos" is located in the data_stream of
1032 *	the inode.
1033 *	If successful, "offset" will then be set to the file offset
1034 *	of the block_run returned; so "pos - offset" is for the block_run
1035 *	what "pos" is for the whole stream.
1036 *	The caller has to make sure that "pos" is inside the stream.
1037 */
1038
1039status_t
1040Inode::FindBlockRun(off_t pos, block_run &run, off_t &offset)
1041{
1042	// The BPlusTree class will call this function, we'll provide
1043	// standard cached access only from here
1044	return ((Stream<Access::Cached> *)this)->FindBlockRun(pos, run, offset);
1045}
1046
1047
1048status_t
1049Inode::ReadAt(off_t pos, uint8 *buffer, size_t *_length)
1050{
1051	// call the right ReadAt() method, depending on the inode flags
1052
1053	if (Flags() & INODE_NO_CACHE)
1054		return ((Stream<Access::Uncached> *)this)->ReadAt(pos, buffer, _length);
1055
1056	if (Flags() & INODE_LOGGED)
1057		return ((Stream<Access::Logged> *)this)->ReadAt(pos, buffer, _length);
1058
1059	return ((Stream<Access::Cached> *)this)->ReadAt(pos, buffer, _length);
1060}
1061
1062
1063status_t
1064Inode::WriteAt(Transaction *transaction, off_t pos, const uint8 *buffer, size_t *_length)
1065{
1066	// call the right WriteAt() method, depending on the inode flags
1067
1068	// update the last modification time in memory, it will be written
1069	// back to the inode, and the index when the file is closed
1070	// ToDo: should update the internal last modified time only at this point!
1071	Node()->last_modified_time = (bigtime_t)time(NULL) << INODE_TIME_SHIFT;
1072
1073	if (Flags() & INODE_NO_CACHE)
1074		return ((Stream<Access::Uncached> *)this)->WriteAt(transaction, pos, buffer, _length);
1075
1076	if (Flags() & INODE_LOGGED)
1077		return ((Stream<Access::Logged> *)this)->WriteAt(transaction, pos, buffer, _length);
1078
1079	return ((Stream<Access::Cached> *)this)->WriteAt(transaction, pos, buffer, _length);
1080}
1081
1082
1083/**	Fills the gap between the old file size and the new file size
1084 *	with zeros.
1085 *	It's more or less a copy of Inode::WriteAt() but it can handle
1086 *	length differences of more than just 4 GB, and it never uses
1087 *	the log, even if the INODE_LOGGED flag is set.
1088 */
1089
1090status_t
1091Inode::FillGapWithZeros(off_t pos, off_t newSize)
1092{
1093	// ToDo: we currently do anything here, same as original BFS!
1094	//if (pos >= newSize)
1095		return B_OK;
1096
1097	block_run run;
1098	off_t offset;
1099	if (FindBlockRun(pos, run, offset) < B_OK)
1100		RETURN_ERROR(B_BAD_VALUE);
1101
1102	off_t length = newSize - pos;
1103	uint32 bytesWritten = 0;
1104	uint32 blockSize = fVolume->BlockSize();
1105	uint32 blockShift = fVolume->BlockShift();
1106	uint8 *block;
1107
1108	// the first block_run we write could not be aligned to the block_size boundary
1109	// (write partial block at the beginning)
1110
1111	// pos % block_size == (pos - offset) % block_size, offset % block_size == 0
1112	if (pos % blockSize != 0) {
1113		run.start += (pos - offset) / blockSize;
1114		run.length -= (pos - offset) / blockSize;
1115
1116		CachedBlock cached(fVolume,run);
1117		if ((block = cached.Block()) == NULL)
1118			RETURN_ERROR(B_BAD_VALUE);
1119
1120		bytesWritten = blockSize - (pos % blockSize);
1121		if (length < bytesWritten)
1122			bytesWritten = length;
1123
1124		memset(block + (pos % blockSize), 0, bytesWritten);
1125		if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK)
1126			RETURN_ERROR(B_IO_ERROR);
1127
1128		pos += bytesWritten;
1129
1130		length -= bytesWritten;
1131		if (length == 0)
1132			return B_OK;
1133
1134		if (FindBlockRun(pos, run, offset) < B_OK)
1135			RETURN_ERROR(B_BAD_VALUE);
1136	}
1137
1138	while (length > 0) {
1139		// offset is the offset to the current pos in the block_run
1140		run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift));
1141		run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift));
1142
1143		CachedBlock cached(fVolume);
1144		off_t blockNumber = fVolume->ToBlock(run);
1145		for (int32 i = 0; i < run.Length(); i++) {
1146			if ((block = cached.SetTo(blockNumber + i, true)) == NULL)
1147				RETURN_ERROR(B_IO_ERROR);
1148
1149			if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK)
1150				RETURN_ERROR(B_IO_ERROR);
1151		}
1152
1153		int32 bytes = run.Length() << blockShift;
1154		length -= bytes;
1155		bytesWritten += bytes;
1156
1157		// since we don't respect a last partial block, length can be lower
1158		if (length <= 0)
1159			break;
1160
1161		pos += bytes;
1162
1163		if (FindBlockRun(pos, run, offset) < B_OK)
1164			RETURN_ERROR(B_BAD_VALUE);
1165	}
1166	return B_OK;
1167}
1168
1169
1170/** Allocates NUM_ARRAY_BLOCKS blocks, and clears their contents. Growing
1171 *	the indirect and double indirect range uses this method.
1172 *	The allocated block_run is saved in "run"
1173 */
1174
1175status_t
1176Inode::AllocateBlockArray(Transaction *transaction, block_run &run)
1177{
1178	if (!run.IsZero())
1179		return B_BAD_VALUE;
1180
1181	status_t status = fVolume->Allocate(transaction, this, NUM_ARRAY_BLOCKS, run, NUM_ARRAY_BLOCKS);
1182	if (status < B_OK)
1183		return status;
1184
1185	// make sure those blocks are empty
1186	CachedBlock cached(fVolume);
1187	off_t block = fVolume->ToBlock(run);
1188
1189	for (int32 i = 0; i < run.Length(); i++) {
1190		block_run *runs = (block_run *)cached.SetTo(block + i, true);
1191		if (runs == NULL)
1192			return B_IO_ERROR;
1193
1194		if (cached.WriteBack(transaction) < B_OK)
1195			return B_IO_ERROR;
1196	}
1197	return B_OK;
1198}
1199
1200
1201status_t
1202Inode::GrowStream(Transaction *transaction, off_t size)
1203{
1204	data_stream *data = &Node()->data;
1205
1206	// is the data stream already large enough to hold the new size?
1207	// (can be the case with preallocated blocks)
1208	if (size < data->MaxDirectRange()
1209		|| size < data->MaxIndirectRange()
1210		|| size < data->MaxDoubleIndirectRange()) {
1211		data->size = HOST_ENDIAN_TO_BFS_INT64(size);
1212		return B_OK;
1213	}
1214
1215	// how many bytes are still needed? (unused ranges are always zero)
1216	uint16 minimum = 1;
1217	off_t bytes;
1218	if (data->Size() < data->MaxDoubleIndirectRange()) {
1219		bytes = size - data->MaxDoubleIndirectRange();
1220		// the double indirect range can only handle multiple of NUM_ARRAY_BLOCKS
1221		minimum = NUM_ARRAY_BLOCKS;
1222	} else if (data->Size() < data->MaxIndirectRange())
1223		bytes = size - data->MaxIndirectRange();
1224	else if (data->Size() < data->MaxDirectRange())
1225		bytes = size - data->MaxDirectRange();
1226	else
1227		bytes = size - data->Size();
1228
1229	// do we have enough free blocks on the disk?
1230	off_t blocksRequested = (bytes + fVolume->BlockSize() - 1) >> fVolume->BlockShift();
1231	if (blocksRequested > fVolume->FreeBlocks())
1232		return B_DEVICE_FULL;
1233
1234	off_t blocksNeeded = blocksRequested;
1235		// because of preallocations and partial allocations, the number of
1236		// blocks we need to allocate may be different from the one we request
1237		// from the block allocator
1238
1239	// Should we preallocate some blocks (currently, always 64k)?
1240	// Attributes, attribute directories, and long symlinks usually won't get that big,
1241	// and should stay close to the inode - preallocating could be counterproductive.
1242	// Also, if free disk space is tight, we probably don't want to do this as well.
1243	if (!IsAttribute() && !IsAttributeDirectory() && !IsSymLink()
1244		&& blocksRequested < (65536 >> fVolume->BlockShift())
1245		&& fVolume->FreeBlocks() > 128)
1246		blocksRequested = 65536 >> fVolume->BlockShift();
1247
1248	while (blocksNeeded > 0) {
1249		// the requested blocks do not need to be returned with a
1250		// single allocation, so we need to iterate until we have
1251		// enough blocks allocated
1252		block_run run;
1253		status_t status = fVolume->Allocate(transaction, this, blocksRequested, run, minimum);
1254		if (status < B_OK)
1255			return status;
1256
1257		// okay, we have the needed blocks, so just distribute them to the
1258		// different ranges of the stream (direct, indirect & double indirect)
1259
1260		// ToDo: if anything goes wrong here, we probably want to free the
1261		// blocks that couldn't be distributed into the stream!
1262
1263		blocksNeeded -= run.Length();
1264		// don't preallocate if the first allocation was already too small
1265		blocksRequested = blocksNeeded;
1266		if (minimum > 1) {
1267			// make sure that "blocks" is a multiple of minimum
1268			blocksRequested = (blocksRequested + minimum - 1) & ~(minimum - 1);
1269		}
1270
1271		// Direct block range
1272
1273		if (data->Size() <= data->MaxDirectRange()) {
1274			// let's try to put them into the direct block range
1275			int32 free = 0;
1276			for (; free < NUM_DIRECT_BLOCKS; free++)
1277				if (data->direct[free].IsZero())
1278					break;
1279
1280			if (free < NUM_DIRECT_BLOCKS) {
1281				// can we merge the last allocated run with the new one?
1282				int32 last = free - 1;
1283				if (free > 0 && data->direct[last].MergeableWith(run))
1284					data->direct[last].length = HOST_ENDIAN_TO_BFS_INT16(data->direct[last].Length() + run.Length());
1285				else
1286					data->direct[free] = run;
1287
1288				data->max_direct_range = HOST_ENDIAN_TO_BFS_INT64(data->MaxDirectRange() + run.Length() * fVolume->BlockSize());
1289				data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0 ? data->max_direct_range : size);
1290				continue;
1291			}
1292		}
1293
1294		// Indirect block range
1295
1296		if (data->Size() <= data->MaxIndirectRange() || !data->MaxIndirectRange()) {
1297			CachedBlock cached(fVolume);
1298			block_run *runs = NULL;
1299			uint32 free = 0;
1300			off_t block;
1301
1302			// if there is no indirect block yet, create one
1303			if (data->indirect.IsZero()) {
1304				status = AllocateBlockArray(transaction, data->indirect);
1305				if (status < B_OK)
1306					return status;
1307
1308				data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(data->MaxDirectRange());
1309				// insert the block_run in the first block
1310				runs = (block_run *)cached.SetTo(data->indirect);
1311			} else {
1312				uint32 numberOfRuns = fVolume->BlockSize() / sizeof(block_run);
1313				block = fVolume->ToBlock(data->indirect);
1314
1315				// search first empty entry
1316				int32 i = 0;
1317				for (; i < data->indirect.Length(); i++) {
1318					if ((runs = (block_run *)cached.SetTo(block + i)) == NULL)
1319						return B_IO_ERROR;
1320
1321					for (free = 0; free < numberOfRuns; free++)
1322						if (runs[free].IsZero())
1323							break;
1324
1325					if (free < numberOfRuns)
1326						break;
1327				}
1328				if (i == data->indirect.Length())
1329					runs = NULL;
1330			}
1331
1332			if (runs != NULL) {
1333				// try to insert the run to the last one - note that this doesn't
1334				// take block borders into account, so it could be further optimized
1335				int32 last = free - 1;
1336				if (free > 0 && runs[last].MergeableWith(run))
1337					runs[last].length = HOST_ENDIAN_TO_BFS_INT16(runs[last].Length() + run.Length());
1338				else
1339					runs[free] = run;
1340
1341				data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(data->MaxIndirectRange() + (run.Length() << fVolume->BlockShift()));
1342				data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0 ? data->MaxIndirectRange() : size);
1343
1344				cached.WriteBack(transaction);
1345				continue;
1346			}
1347		}
1348
1349		// Double indirect block range
1350
1351		if (data->Size() <= data->MaxDoubleIndirectRange() || !data->max_double_indirect_range) {
1352			while ((run.Length() % NUM_ARRAY_BLOCKS) != 0) {
1353				// The number of allocated blocks isn't a multiple of NUM_ARRAY_BLOCKS,
1354				// so we have to change this. This can happen the first time the stream
1355				// grows into the double indirect range.
1356				// First, free the remaining blocks that don't fit into a multiple
1357				// of NUM_ARRAY_BLOCKS
1358				int32 rest = run.Length() % NUM_ARRAY_BLOCKS;
1359				run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - rest);
1360
1361				status = fVolume->Free(transaction, block_run::Run(run.AllocationGroup(),
1362					run.Start() + run.Length(), rest));
1363				if (status < B_OK)
1364					return status;
1365
1366				blocksNeeded += rest;
1367				blocksRequested = (blocksNeeded + NUM_ARRAY_BLOCKS - 1) & ~(NUM_ARRAY_BLOCKS - 1);
1368				minimum = NUM_ARRAY_BLOCKS;
1369					// we make sure here that we have at minimum NUM_ARRAY_BLOCKS allocated,
1370					// so if the allocation succeeds, we don't run into an endless loop
1371
1372				// Are there any blocks left in the run? If not, allocate a new one
1373				if (run.length == 0)
1374					continue;
1375			}
1376
1377			// if there is no double indirect block yet, create one
1378			if (data->double_indirect.IsZero()) {
1379				status = AllocateBlockArray(transaction, data->double_indirect);
1380				if (status < B_OK)
1381					return status;
1382
1383				data->max_double_indirect_range = data->max_indirect_range;
1384			}
1385
1386			// calculate the index where to insert the new blocks
1387
1388			int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
1389			int32 indirectSize = ((1L << INDIRECT_BLOCKS_SHIFT) << fVolume->BlockShift())
1390				* runsPerBlock;
1391			int32 directSize = NUM_ARRAY_BLOCKS << fVolume->BlockShift();
1392			int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT;
1393
1394			off_t start = data->MaxDoubleIndirectRange() - data->MaxIndirectRange();
1395			int32 indirectIndex = start / indirectSize;
1396			int32 index = start / directSize;
1397
1398			// distribute the blocks to the array and allocate
1399			// new array blocks when needed
1400
1401			CachedBlock cached(fVolume);
1402			CachedBlock cachedDirect(fVolume);
1403			block_run *array = NULL;
1404			uint32 runLength = run.Length();
1405
1406			// ToDo: the following code is commented - it could be used to
1407			// preallocate all needed block arrays to see in advance if the
1408			// allocation will succeed.
1409			// I will probably remove it later, because it's no perfect solution
1410			// either: if the allocation was broken up before (blocksNeeded != 0),
1411			// it doesn't guarantee anything.
1412			// And since failing in this case is not that common, it doesn't have
1413			// to be optimized in that way.
1414			// Anyway, I wanted to have it in CVS - all those lines, and they will
1415			// be removed soon :-)
1416/*
1417			// allocate new block arrays if needed
1418
1419			off_t block = -1;
1420
1421			for (int32 i = 0;i < needed;i++) {
1422				// get the block to insert the run into
1423				block = fVolume->ToBlock(data->double_indirect) + i + indirectIndex / runsPerBlock;
1424				if (cached.BlockNumber() != block)
1425					array = (block_run *)cached.SetTo(block);
1426
1427				if (array == NULL)
1428					return B_ERROR;
1429
1430				status = AllocateBlockArray(transaction, array[i + indirectIndex % runsPerBlock]);
1431				if (status < B_OK)
1432					return status;
1433			}
1434*/
1435
1436			while (run.length != 0) {
1437				// get the indirect array block
1438				if (array == NULL) {
1439					if (cached.Block() != NULL
1440						&& cached.WriteBack(transaction) < B_OK)
1441						return B_IO_ERROR;
1442
1443					array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect)
1444						+ indirectIndex / runsPerBlock);
1445					if (array == NULL)
1446						return B_IO_ERROR;
1447				}
1448
1449				do {
1450					// do we need a new array block?
1451					if (array[indirectIndex % runsPerBlock].IsZero()) {
1452						status = AllocateBlockArray(transaction, array[indirectIndex % runsPerBlock]);
1453						if (status < B_OK)
1454							return status;
1455					}
1456
1457					block_run *runs = (block_run *)cachedDirect.SetTo(
1458						fVolume->ToBlock(array[indirectIndex % runsPerBlock])
1459						+ index / runsPerBlock);
1460					if (runs == NULL)
1461						return B_IO_ERROR;
1462
1463					do {
1464						// insert the block_run into the array
1465						runs[index % runsPerBlock] = run;
1466						runs[index % runsPerBlock].length = HOST_ENDIAN_TO_BFS_INT16(NUM_ARRAY_BLOCKS);
1467
1468						// alter the remaining block_run
1469						run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + NUM_ARRAY_BLOCKS);
1470						run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - NUM_ARRAY_BLOCKS);
1471					} while ((++index % runsPerBlock) != 0 && run.length);
1472
1473					if (cachedDirect.WriteBack(transaction) < B_OK)
1474						return B_IO_ERROR;
1475				} while ((index % runsPerArray) != 0 && run.length);
1476
1477				if (++indirectIndex % runsPerBlock == 0) {
1478					array = NULL;
1479					index = 0;
1480				}
1481			}
1482
1483			data->max_double_indirect_range = HOST_ENDIAN_TO_BFS_INT64(data->MaxDoubleIndirectRange() + (runLength << fVolume->BlockShift()));
1484			data->size = blocksNeeded > 0 ? HOST_ENDIAN_TO_BFS_INT64(data->max_double_indirect_range) : size;
1485
1486			continue;
1487		}
1488
1489		RETURN_ERROR(EFBIG);
1490	}
1491	// update the size of the data stream
1492	data->size = HOST_ENDIAN_TO_BFS_INT64(size);
1493
1494	return B_OK;
1495}
1496
1497
1498status_t
1499Inode::FreeStaticStreamArray(Transaction *transaction, int32 level, block_run run,
1500	off_t size, off_t offset, off_t &max)
1501{
1502	int32 indirectSize = 0;
1503	if (level == 0)
1504		indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT + fVolume->BlockShift()))
1505			* (fVolume->BlockSize() / sizeof(block_run));
1506	else if (level == 1)
1507		indirectSize = 4 << fVolume->BlockShift();
1508
1509	off_t start;
1510	if (size > offset)
1511		start = size - offset;
1512	else
1513		start = 0;
1514
1515	int32 index = start / indirectSize;
1516	int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
1517
1518	CachedBlock cached(fVolume);
1519	off_t blockNumber = fVolume->ToBlock(run);
1520
1521	// set the file offset to the current block run
1522	offset += (off_t)index * indirectSize;
1523
1524	for (int32 i = index / runsPerBlock; i < run.Length(); i++) {
1525		block_run *array = (block_run *)cached.SetTo(blockNumber + i);
1526		if (array == NULL)
1527			RETURN_ERROR(B_ERROR);
1528
1529		for (index = index % runsPerBlock; index < runsPerBlock; index++) {
1530			if (array[index].IsZero()) {
1531				// we also want to break out of the outer loop
1532				i = run.Length();
1533				break;
1534			}
1535
1536			status_t status = B_OK;
1537			if (level == 0)
1538				status = FreeStaticStreamArray(transaction, 1, array[index], size, offset, max);
1539			else if (offset >= size)
1540				status = fVolume->Free(transaction, array[index]);
1541			else
1542				max = HOST_ENDIAN_TO_BFS_INT64(offset + indirectSize);
1543
1544			if (status < B_OK)
1545				RETURN_ERROR(status);
1546
1547			if (offset >= size)
1548				array[index].SetTo(0, 0, 0);
1549
1550			offset += indirectSize;
1551		}
1552		index = 0;
1553
1554		cached.WriteBack(transaction);
1555	}
1556	return B_OK;
1557}
1558
1559
1560/** Frees all block_runs in the array which come after the specified size.
1561 *	It also trims the last block_run that contain the size.
1562 *	"offset" and "max" are maintained until the last block_run that doesn't
1563 *	have to be freed - after this, the values won't be correct anymore, but
1564 *	will still assure correct function for all subsequent calls.
1565 *	"max" is considered to be in file system byte order.
1566 */
1567
1568status_t
1569Inode::FreeStreamArray(Transaction *transaction, block_run *array, uint32 arrayLength,
1570	off_t size, off_t &offset, off_t &max)
1571{
1572	off_t newOffset = offset;
1573	uint32 i = 0;
1574	for (; i < arrayLength; i++, offset = newOffset) {
1575		if (array[i].IsZero())
1576			break;
1577
1578		newOffset += (off_t)array[i].Length() << fVolume->BlockShift();
1579		if (newOffset <= size)
1580			continue;
1581
1582		block_run run = array[i];
1583
1584		// determine the block_run to be freed
1585		if (newOffset > size && offset < size) {
1586			// free partial block_run (and update the original block_run)
1587			run.start = array[i].start + ((size - offset) >> fVolume->BlockShift()) + 1;
1588			array[i].length = HOST_ENDIAN_TO_BFS_INT16(run.Start() - array[i].Start());
1589			run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - array[i].Length());
1590
1591			if (run.length == 0)
1592				continue;
1593
1594			// update maximum range
1595			max = HOST_ENDIAN_TO_BFS_INT64(offset + ((off_t)array[i].Length() << fVolume->BlockShift()));
1596		} else {
1597			// free the whole block_run
1598			array[i].SetTo(0, 0, 0);
1599
1600			if ((off_t)BFS_ENDIAN_TO_HOST_INT64(max) > offset)
1601				max = HOST_ENDIAN_TO_BFS_INT64(offset);
1602		}
1603
1604		if (fVolume->Free(transaction, run) < B_OK)
1605			return B_IO_ERROR;
1606	}
1607	return B_OK;
1608}
1609
1610
1611status_t
1612Inode::ShrinkStream(Transaction *transaction, off_t size)
1613{
1614	data_stream *data = &Node()->data;
1615
1616	if (data->MaxDoubleIndirectRange() > size) {
1617		off_t *maxDoubleIndirect = &data->max_double_indirect_range;
1618			// gcc 4 work-around: "error: cannot bind packed field
1619			// 'data->data_stream::max_double_indirect_range' to 'off_t&'"
1620		FreeStaticStreamArray(transaction, 0, data->double_indirect, size,
1621			data->MaxIndirectRange(), *maxDoubleIndirect);
1622
1623		if (size <= data->MaxIndirectRange()) {
1624			fVolume->Free(transaction, data->double_indirect);
1625			data->double_indirect.SetTo(0, 0, 0);
1626			data->max_double_indirect_range = 0;
1627		}
1628	}
1629	if (data->MaxIndirectRange() > size) {
1630		CachedBlock cached(fVolume);
1631		off_t block = fVolume->ToBlock(data->indirect);
1632		off_t offset = data->MaxDirectRange();
1633
1634		for (int32 i = 0; i < data->indirect.Length(); i++) {
1635			block_run *array = (block_run *)cached.SetTo(block + i);
1636			if (array == NULL)
1637				break;
1638
1639			off_t *maxIndirect = &data->max_indirect_range;
1640				// gcc 4 work-around: "error: cannot bind packed field
1641				// 'data->data_stream::max_indirect_range' to 'off_t&'"
1642			if (FreeStreamArray(transaction, array, fVolume->BlockSize() / sizeof(block_run),
1643					size, offset, *maxIndirect) == B_OK)
1644				cached.WriteBack(transaction);
1645		}
1646		if (data->max_direct_range == data->max_indirect_range) {
1647			fVolume->Free(transaction, data->indirect);
1648			data->indirect.SetTo(0, 0, 0);
1649			data->max_indirect_range = 0;
1650		}
1651	}
1652	if (data->MaxDirectRange() > size) {
1653		off_t offset = 0;
1654		off_t *maxDirect = &data->max_direct_range;
1655			// gcc 4 work-around: "error: cannot bind packed field
1656			// 'data->data_stream::max_indirect_range' to 'off_t&'"
1657		FreeStreamArray(transaction, data->direct, NUM_DIRECT_BLOCKS, size, offset,
1658			*maxDirect);
1659	}
1660
1661	data->size = HOST_ENDIAN_TO_BFS_INT64(size);
1662	return B_OK;
1663}
1664
1665
1666status_t
1667Inode::SetFileSize(Transaction *transaction, off_t size)
1668{
1669	if (size < 0
1670		// uncached files can't be resized (Stream<Cache>::WriteAt() specifically
1671		// denies growing uncached files because of efficiency, so it had to be
1672		// adapted if this ever changes [which will probably happen in OpenBeOS]).
1673		|| Flags() & INODE_NO_CACHE)
1674		return B_BAD_VALUE;
1675
1676	off_t oldSize = Size();
1677
1678	if (size == oldSize)
1679		return B_OK;
1680
1681	// should the data stream grow or shrink?
1682	status_t status;
1683	if (size > oldSize) {
1684		status = GrowStream(transaction, size);
1685		if (status < B_OK) {
1686			// if the growing of the stream fails, the whole operation
1687			// fails, so we should shrink the stream to its former size
1688			ShrinkStream(transaction, oldSize);
1689		}
1690	}
1691	else
1692		status = ShrinkStream(transaction, size);
1693
1694	if (status < B_OK)
1695		return status;
1696
1697	return WriteBack(transaction);
1698}
1699
1700
1701status_t
1702Inode::Append(Transaction *transaction, off_t bytes)
1703{
1704	return SetFileSize(transaction, Size() + bytes);
1705}
1706
1707
1708status_t
1709Inode::Trim(Transaction *transaction)
1710{
1711	status_t status = ShrinkStream(transaction, Size());
1712	if (status < B_OK)
1713		return status;
1714
1715	return WriteBack(transaction);
1716}
1717
1718
1719status_t
1720Inode::Free(Transaction *transaction)
1721{
1722	FUNCTION();
1723
1724	// Perhaps there should be an implementation of Inode::ShrinkStream() that
1725	// just frees the data_stream, but doesn't change the inode (since it is
1726	// freed anyway) - that would make an undelete command possible
1727	status_t status = SetFileSize(transaction, 0);
1728	if (status < B_OK)
1729		return status;
1730
1731	// Free all attributes, and remove their indices
1732	{
1733		// We have to limit the scope of AttributeIterator, so that its
1734		// destructor is not called after the inode is deleted
1735		AttributeIterator iterator(this);
1736
1737		char name[B_FILE_NAME_LENGTH];
1738		uint32 type;
1739		size_t length;
1740		vnode_id id;
1741		while ((status = iterator.GetNext(name, &length, &type, &id)) == B_OK)
1742			RemoveAttribute(transaction, name);
1743	}
1744
1745	if (WriteBack(transaction) < B_OK)
1746		return B_ERROR;
1747
1748	return fVolume->Free(transaction, BlockRun());
1749}
1750
1751
1752status_t
1753Inode::Sync()
1754{
1755	// We may also want to flush the attribute's data stream to
1756	// disk here... (do we?)
1757
1758	data_stream *data = &Node()->data;
1759	status_t status;
1760
1761	// flush direct range
1762
1763	for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) {
1764		if (data->direct[i].IsZero())
1765			return B_OK;
1766
1767		status = flush_blocks(fVolume->Device(), fVolume->ToBlock(data->direct[i]),
1768			data->direct[i].Length());
1769		if (status != B_OK)
1770			return status;
1771	}
1772
1773	// flush indirect range
1774
1775	if (data->max_indirect_range == 0)
1776		return B_OK;
1777
1778	CachedBlock cached(fVolume);
1779	off_t block = fVolume->ToBlock(data->indirect);
1780	int32 count = fVolume->BlockSize() / sizeof(block_run);
1781
1782	for (int32 j = 0; j < data->indirect.Length(); j++) {
1783		block_run *runs = (block_run *)cached.SetTo(block + j);
1784		if (runs == NULL)
1785			break;
1786
1787		for (int32 i = 0; i < count; i++) {
1788			if (runs[i].IsZero())
1789				return B_OK;
1790
1791			status = flush_blocks(fVolume->Device(), fVolume->ToBlock(runs[i]), runs[i].Length());
1792			if (status != B_OK)
1793				return status;
1794		}
1795	}
1796
1797	// flush double indirect range
1798
1799	if (data->max_double_indirect_range == 0)
1800		return B_OK;
1801
1802	off_t indirectBlock = fVolume->ToBlock(data->double_indirect);
1803
1804	for (int32 l = 0; l < data->double_indirect.Length(); l++) {
1805		block_run *indirectRuns = (block_run *)cached.SetTo(indirectBlock + l);
1806		if (indirectRuns == NULL)
1807			return B_FILE_ERROR;
1808
1809		CachedBlock directCached(fVolume);
1810
1811		for (int32 k = 0; k < count; k++) {
1812			if (indirectRuns[k].IsZero())
1813				return B_OK;
1814
1815			block = fVolume->ToBlock(indirectRuns[k]);
1816			for (int32 j = 0; j < indirectRuns[k].Length(); j++) {
1817				block_run *runs = (block_run *)directCached.SetTo(block + j);
1818				if (runs == NULL)
1819					return B_FILE_ERROR;
1820
1821				for (int32 i = 0; i < count; i++) {
1822					if (runs[i].IsZero())
1823						return B_OK;
1824
1825					// ToDo: combine single block_runs to bigger ones when
1826					// they are adjacent
1827					status = flush_blocks(fVolume->Device(), fVolume->ToBlock(runs[i]),
1828						runs[i].Length());
1829					if (status != B_OK)
1830						return status;
1831				}
1832			}
1833		}
1834	}
1835	return B_OK;
1836}
1837
1838
1839status_t
1840Inode::Remove(Transaction *transaction, const char *name, off_t *_id, bool isDirectory)
1841{
1842	BPlusTree *tree;
1843	if (GetTree(&tree) != B_OK)
1844		RETURN_ERROR(B_BAD_VALUE);
1845
1846	RecursiveLocker locker(fVolume->Lock());
1847
1848	// does the file even exist?
1849	off_t id;
1850	if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) < B_OK)
1851		return B_ENTRY_NOT_FOUND;
1852
1853	if (_id)
1854		*_id = id;
1855
1856	Vnode vnode(fVolume, id);
1857	Inode *inode;
1858	status_t status = vnode.Get(&inode);
1859	if (status < B_OK) {
1860		REPORT_ERROR(status);
1861		return B_ENTRY_NOT_FOUND;
1862	}
1863
1864	// You can't unlink a mounted image or the VM file while it is being used - while
1865	// this is not really necessary, it copies the behaviour of the original BFS
1866	// and let you and me feel a little bit safer
1867	if (inode->Flags() & INODE_NO_CACHE)
1868		return B_NOT_ALLOWED;
1869
1870	// Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR
1871	// bit is set for indices in BFS, not for attribute directories) - but you
1872	// should really be able to do whatever you want with your indices
1873	// without having to remove all files first :)
1874	if (!inode->IsIndex()) {
1875		// if it's not of the correct type, don't delete it!
1876		if (inode->IsContainer() != isDirectory)
1877			return isDirectory ? B_NOT_A_DIRECTORY : B_IS_A_DIRECTORY;
1878
1879		// only delete empty directories
1880		if (isDirectory && !inode->IsEmpty())
1881			return B_DIRECTORY_NOT_EMPTY;
1882	}
1883
1884	// remove_vnode() allows the inode to be accessed until the last put_vnode()
1885	if (remove_vnode(fVolume->ID(), id) != B_OK)
1886		return B_ERROR;
1887
1888	if (tree->Remove(transaction, name, id) < B_OK) {
1889		unremove_vnode(fVolume->ID(), id);
1890		RETURN_ERROR(B_ERROR);
1891	}
1892
1893#ifdef DEBUG
1894	if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) == B_OK) {
1895		DIE(("deleted entry still there"));
1896	}
1897#endif
1898
1899	// update the inode, so that no one will ever doubt it's deleted :-)
1900	inode->Node()->flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
1901	inode->Node()->flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
1902
1903	// In balance to the Inode::Create() method, the main indices
1904	// are updated here (name, size, & last_modified)
1905
1906	Index index(fVolume);
1907	if (inode->IsRegularNode()) {
1908		index.RemoveName(transaction, name, inode);
1909			// If removing from the index fails, it is not regarded as a
1910			// fatal error and will not be reported back!
1911			// Deleted inodes won't be visible in queries anyway.
1912	}
1913
1914	if ((inode->Mode() & (S_FILE | S_SYMLINK)) != 0) {
1915		if (inode->IsFile())
1916			index.RemoveSize(transaction, inode);
1917		index.RemoveLastModified(transaction, inode);
1918	}
1919
1920	if (inode->WriteBack(transaction) < B_OK)
1921		return B_ERROR;
1922
1923	return B_OK;
1924}
1925
1926
1927/**	Creates the inode with the specified parent directory, and automatically
1928 *	adds the created inode to that parent directory. If an attribute directory
1929 *	is created, it will also automatically added to the parent inode as such.
1930 *	However, the indices root node, and the regular root node won't be added
1931 *	to the superblock.
1932 *	It will also create the initial B+tree for the inode if it's a directory
1933 *	of any kind.
1934 *	If the "_id" or "_inode" variable is given and non-NULL to store the inode's
1935 *	ID, the inode stays locked - you have to call put_vnode() if you don't use it
1936 *	anymore.
1937 */
1938
1939status_t
1940Inode::Create(Transaction *transaction, Inode *parent, const char *name, int32 mode,
1941	int omode, uint32 type, off_t *_id, Inode **_inode)
1942{
1943	FUNCTION();
1944
1945	block_run parentRun = parent ? parent->BlockRun() : block_run::Run(0, 0, 0);
1946	Volume *volume = transaction->GetVolume();
1947	BPlusTree *tree = NULL;
1948
1949	RecursiveLocker locker(volume->Lock());
1950		// ToDo: it would be nicer to only lock the parent directory, if possible
1951		//	(but that lock will already be held during any B+tree action)
1952
1953	if (parent && (mode & S_ATTR_DIR) == 0 && parent->IsContainer()) {
1954		// check if the file already exists in the directory
1955		if (parent->GetTree(&tree) != B_OK)
1956			RETURN_ERROR(B_BAD_VALUE);
1957
1958		// does the file already exist?
1959		off_t offset;
1960		if (tree->Find((uint8 *)name, (uint16)strlen(name), &offset) == B_OK) {
1961			// return if the file should be a directory or opened in exclusive mode
1962			if (mode & S_DIRECTORY || omode & O_EXCL)
1963				return B_FILE_EXISTS;
1964
1965			Vnode vnode(volume, offset);
1966			Inode *inode;
1967			status_t status = vnode.Get(&inode);
1968			if (status < B_OK) {
1969				REPORT_ERROR(status);
1970				return B_ENTRY_NOT_FOUND;
1971			}
1972
1973			// if it's a directory, bail out!
1974			if (inode->IsDirectory())
1975				return B_IS_A_DIRECTORY;
1976
1977			// if it is a mounted device or the VM file, we don't allow to delete it
1978			// while it is open and in use
1979			if (inode->Flags() & INODE_NO_CACHE)
1980				return B_NOT_ALLOWED;
1981
1982			// if omode & O_TRUNC, truncate the existing file
1983			if (omode & O_TRUNC) {
1984				WriteLocked locked(inode->Lock());
1985
1986				status_t status = inode->SetFileSize(transaction, 0);
1987				if (status < B_OK)
1988					return status;
1989			}
1990
1991			if (_id)
1992				*_id = offset;
1993			if (_inode)
1994				*_inode = inode;
1995
1996			// only keep the vnode in memory if the _id or _inode pointer is provided
1997			if (_id != NULL || _inode != NULL)
1998				vnode.Keep();
1999
2000			return B_OK;
2001		}
2002	} else if (parent && (mode & S_ATTR_DIR) == 0)
2003		return B_BAD_VALUE;
2004
2005	// allocate space for the new inode
2006	InodeAllocator allocator(transaction);
2007	block_run run;
2008	Inode *inode;
2009	status_t status = allocator.New(&parentRun, mode, run, &inode);
2010	if (status < B_OK)
2011		return status;
2012
2013	// Initialize the parts of the bfs_inode structure that
2014	// InodeAllocator::New() hasn't touched yet
2015
2016	bfs_inode *node = inode->Node();
2017
2018	if (parent == NULL) {
2019		// we set the parent to itself in this case
2020		// (only happens for the root and indices node)
2021		node->parent = run;
2022	} else
2023		node->parent = parentRun;
2024
2025	node->uid = HOST_ENDIAN_TO_BFS_INT32(geteuid());
2026	node->gid = HOST_ENDIAN_TO_BFS_INT32(parent ? parent->Node()->gid : getegid());
2027		// the group ID is inherited from the parent, if available
2028
2029	node->type = HOST_ENDIAN_TO_BFS_INT32(type);
2030
2031	// only add the name to regular files, directories, or symlinks
2032	// don't add it to attributes, or indices
2033	if (tree && inode->IsRegularNode() && inode->SetName(transaction, name) < B_OK)
2034		return B_ERROR;
2035
2036	// Initialize b+tree if it's a directory (and add "." & ".." if it's
2037	// a standard directory for files - not for attributes or indices)
2038	if (inode->IsContainer()) {
2039		status = allocator.CreateTree();
2040		if (status < B_OK)
2041			return status;
2042	}
2043
2044	// Add a link to the inode from the parent, depending on its type
2045	// (the INODE_NOT_READY flag is set, so it is safe to make the inode
2046	// accessable to the file system here)
2047	if (tree) {
2048		status = tree->Insert(transaction, name, inode->ID());
2049	} else if (parent && (mode & S_ATTR_DIR) != 0) {
2050		parent->Attributes() = run;
2051		status = parent->WriteBack(transaction);
2052	}
2053
2054	// Note, we only care if the inode could be made accessable for the
2055	// two cases above; the root node or the indices root node must
2056	// handle this case on their own (or other cases where "parent" is
2057	// NULL)
2058	if (status < B_OK)
2059		RETURN_ERROR(status);
2060
2061	// Update the main indices (name, size & last_modified)
2062	// (live queries might want to access us after this)
2063
2064	Index index(volume);
2065	if (inode->IsRegularNode() && name != NULL) {
2066		// the name index only contains regular files
2067		// (but not the root node where name == NULL)
2068		status = index.InsertName(transaction, name, inode);
2069		if (status < B_OK && status != B_BAD_INDEX) {
2070			// We have to remove the node from the parent at this point,
2071			// because the InodeAllocator destructor can't handle this
2072			// case (and if it fails, we can't do anything about it...)
2073			if (tree)
2074				tree->Remove(transaction, name, inode->ID());
2075			else if (parent != NULL && (mode & S_ATTR_DIR) != 0)
2076				parent->Node()->attributes.SetTo(0, 0, 0);
2077
2078			return status;
2079		}
2080	}
2081
2082	inode->UpdateOldLastModified();
2083
2084	// The "size" & "last_modified" indices don't contain directories
2085	if (inode->IsFile() || inode->IsSymLink()) {
2086		// if adding to these indices fails, the inode creation will not be harmed;
2087		// they are considered less important than the "name" index
2088		if (inode->IsFile())
2089			index.InsertSize(transaction, inode);
2090		index.InsertLastModified(transaction, inode);
2091	}
2092
2093	// Everything worked well until this point, we have a fully
2094	// initialized inode, and we want to keep it
2095	allocator.Keep();
2096
2097	// We hold the volume lock to make sure that bfs_read_vnode()
2098	// won't succeed in the meantime (between the call right
2099	// above and below)!
2100
2101	if ((status = new_vnode(volume->ID(), inode->ID(), inode)) != B_OK) {
2102		// this is a really fatal error, and we can't recover from that
2103		// The only exception is that the volume is being initialized.
2104		if (volume->ID() >= 0) {
2105			FATAL(("new_vnode() failed with: %s\n", strerror(status)));
2106			DIE(("new_vnode() failed for inode!"));
2107		}
2108	}
2109
2110	if (_id != NULL)
2111		*_id = inode->ID();
2112	if (_inode != NULL)
2113		*_inode = inode;
2114
2115	// if either _id or _inode is passed, we will keep the inode locked
2116	if (_id == NULL && _inode == NULL)
2117		put_vnode(volume->ID(), inode->ID());
2118
2119	return B_OK;
2120}
2121
2122
2123//	#pragma mark -
2124
2125
2126AttributeIterator::AttributeIterator(Inode *inode)
2127	:
2128	fCurrentSmallData(0),
2129	fInode(inode),
2130	fAttributes(NULL),
2131	fIterator(NULL),
2132	fBuffer(NULL)
2133{
2134	inode->AddIterator(this);
2135}
2136
2137
2138AttributeIterator::~AttributeIterator()
2139{
2140	if (fAttributes)
2141		put_vnode(fAttributes->GetVolume()->ID(), fAttributes->ID());
2142
2143	delete fIterator;
2144	fInode->RemoveIterator(this);
2145}
2146
2147
2148status_t
2149AttributeIterator::Rewind()
2150{
2151	fCurrentSmallData = 0;
2152
2153	if (fIterator != NULL)
2154		fIterator->Rewind();
2155
2156	return B_OK;
2157}
2158
2159
2160status_t
2161AttributeIterator::GetNext(char *name, size_t *_length, uint32 *_type, vnode_id *_id)
2162{
2163	// read attributes out of the small data section
2164
2165	if (fCurrentSmallData >= 0) {
2166		small_data *item = fInode->Node()->SmallDataStart();
2167
2168		fInode->SmallDataLock().Lock();
2169
2170		int32 i = 0;
2171		for (;;item = item->Next()) {
2172			if (item->IsLast(fInode->Node()))
2173				break;
2174
2175			if (item->NameSize() == FILE_NAME_NAME_LENGTH
2176				&& *item->Name() == FILE_NAME_NAME)
2177				continue;
2178
2179			if (i++ == fCurrentSmallData)
2180				break;
2181		}
2182
2183		if (!item->IsLast(fInode->Node())) {
2184			strncpy(name, item->Name(), B_FILE_NAME_LENGTH);
2185			*_type = item->Type();
2186			*_length = item->NameSize();
2187			*_id = (vnode_id)fCurrentSmallData;
2188
2189			fCurrentSmallData = i;
2190		}
2191		else {
2192			// stop traversing the small_data section
2193			fCurrentSmallData = -1;
2194		}
2195
2196		fInode->SmallDataLock().Unlock();
2197
2198		if (fCurrentSmallData != -1)
2199			return B_OK;
2200	}
2201
2202	// read attributes out of the attribute directory
2203
2204	if (fInode->Attributes().IsZero())
2205		return B_ENTRY_NOT_FOUND;
2206
2207	Volume *volume = fInode->GetVolume();
2208
2209	// if you haven't yet access to the attributes directory, get it
2210	if (fAttributes == NULL) {
2211#ifdef UNSAFE_GET_VNODE
2212		RecursiveLocker locker(volume->Lock());
2213#endif
2214		if (get_vnode(volume->ID(), volume->ToVnode(fInode->Attributes()),
2215				(void **)&fAttributes) != 0
2216			|| fAttributes == NULL) {
2217			FATAL(("get_vnode() failed in AttributeIterator::GetNext(vnode_id = %Ld,name = \"%s\")\n",fInode->ID(),name));
2218			return B_ENTRY_NOT_FOUND;
2219		}
2220
2221		BPlusTree *tree;
2222		if (fAttributes->GetTree(&tree) < B_OK
2223			|| (fIterator = new TreeIterator(tree)) == NULL) {
2224			FATAL(("could not get tree in AttributeIterator::GetNext(vnode_id = %Ld,name = \"%s\")\n",fInode->ID(),name));
2225			return B_ENTRY_NOT_FOUND;
2226		}
2227	}
2228
2229	uint16 length;
2230	vnode_id id;
2231	status_t status = fIterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id);
2232	if (status < B_OK)
2233		return status;
2234
2235	Vnode vnode(volume,id);
2236	Inode *attribute;
2237	if ((status = vnode.Get(&attribute)) == B_OK) {
2238		*_type = attribute->Type();
2239		*_length = attribute->Size();
2240		*_id = id;
2241	}
2242
2243	return status;
2244}
2245
2246
2247void
2248AttributeIterator::Update(uint16 index, int8 change)
2249{
2250	// fCurrentSmallData points already to the next item
2251	if (index < fCurrentSmallData)
2252		fCurrentSmallData += change;
2253}
2254
2255