1/*
2 * Copyright 2001-2012, Axel D��rfler, axeld@pinc-software.de.
3 * This file may be used under the terms of the MIT License.
4 */
5
6
7//! Inode access functions
8
9
10#include "Debug.h"
11#include "Inode.h"
12#include "BPlusTree.h"
13#include "Index.h"
14
15
16#if BFS_TRACING && !defined(BFS_SHELL) && !defined(_BOOT_MODE)
17namespace BFSInodeTracing {
18
19class Create : public AbstractTraceEntry {
20public:
21	Create(Inode* inode, Inode* parent, const char* name, int32 mode,
22			int openMode, uint32 type)
23		:
24		fInode(inode),
25		fID(inode->ID()),
26		fParent(parent),
27		fParentID(parent != NULL ? parent->ID() : 0),
28		fMode(mode),
29		fOpenMode(openMode),
30		fType(type)
31	{
32		if (name != NULL)
33			strlcpy(fName, name, sizeof(fName));
34		else
35			fName[0] = '\0';
36
37		Initialized();
38	}
39
40	virtual void AddDump(TraceOutput& out)
41	{
42		out.Print("bfs:Create %Ld (%p), parent %Ld (%p), \"%s\", "
43			"mode %lx, omode %x, type %lx", fID, fInode, fParentID,
44			fParent, fName, fMode, fOpenMode, fType);
45	}
46
47private:
48	Inode*	fInode;
49	ino_t	fID;
50	Inode*	fParent;
51	ino_t	fParentID;
52	char	fName[32];
53	int32	fMode;
54	int		fOpenMode;
55	uint32	fType;
56};
57
58class Remove : public AbstractTraceEntry {
59public:
60	Remove(Inode* inode, const char* name)
61		:
62		fInode(inode),
63		fID(inode->ID())
64	{
65		strlcpy(fName, name, sizeof(fName));
66		Initialized();
67	}
68
69	virtual void AddDump(TraceOutput& out)
70	{
71		out.Print("bfs:Remove %Ld (%p), \"%s\"", fID, fInode, fName);
72	}
73
74private:
75	Inode*	fInode;
76	ino_t	fID;
77	char	fName[32];
78};
79
80class Action : public AbstractTraceEntry {
81public:
82	Action(const char* action, Inode* inode)
83		:
84		fInode(inode),
85		fID(inode->ID())
86	{
87		strlcpy(fAction, action, sizeof(fAction));
88		Initialized();
89	}
90
91	virtual void AddDump(TraceOutput& out)
92	{
93		out.Print("bfs:%s %Ld (%p)\n", fAction, fID, fInode);
94	}
95
96private:
97	Inode*	fInode;
98	ino_t	fID;
99	char	fAction[16];
100};
101
102class Resize : public AbstractTraceEntry {
103public:
104	Resize(Inode* inode, off_t oldSize, off_t newSize, bool trim)
105		:
106		fInode(inode),
107		fID(inode->ID()),
108		fOldSize(oldSize),
109		fNewSize(newSize),
110		fTrim(trim)
111	{
112		Initialized();
113	}
114
115	virtual void AddDump(TraceOutput& out)
116	{
117		out.Print("bfs:%s %Ld (%p), %Ld -> %Ld", fTrim ? "Trim" : "Resize",
118			fID, fInode, fOldSize, fNewSize);
119	}
120
121private:
122	Inode*	fInode;
123	ino_t	fID;
124	off_t	fOldSize;
125	off_t	fNewSize;
126	bool	fTrim;
127};
128
129}	// namespace BFSInodeTracing
130
131#	define T(x) new(std::nothrow) BFSInodeTracing::x;
132#else
133#	define T(x) ;
134#endif
135
136
137/*!	A helper class used by Inode::Create() to keep track of the belongings
138	of an inode creation in progress.
139	This class will make sure everything is cleaned up properly.
140*/
141class InodeAllocator {
142public:
143							InodeAllocator(Transaction& transaction);
144							~InodeAllocator();
145
146			status_t		New(block_run* parentRun, mode_t mode,
147								block_run& run, fs_vnode_ops* vnodeOps,
148								Inode** _inode);
149			status_t		CreateTree();
150			status_t		Keep(fs_vnode_ops* vnodeOps, uint32 publishFlags);
151
152private:
153	static	void			_TransactionListener(int32 id, int32 event,
154								void* _inode);
155
156			Transaction*	fTransaction;
157			block_run		fRun;
158			Inode*			fInode;
159};
160
161
162InodeAllocator::InodeAllocator(Transaction& transaction)
163	:
164	fTransaction(&transaction),
165	fInode(NULL)
166{
167}
168
169
170InodeAllocator::~InodeAllocator()
171{
172	if (fTransaction != NULL) {
173		Volume* volume = fTransaction->GetVolume();
174
175		if (fInode != NULL) {
176			fInode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
177				// this unblocks any pending bfs_read_vnode() calls
178			fInode->Free(*fTransaction);
179
180			if (fInode->fTree != NULL)
181				fTransaction->RemoveListener(fInode->fTree);
182			fTransaction->RemoveListener(fInode);
183
184			remove_vnode(volume->FSVolume(), fInode->ID());
185		} else
186			volume->Free(*fTransaction, fRun);
187	}
188
189	delete fInode;
190}
191
192
193status_t
194InodeAllocator::New(block_run* parentRun, mode_t mode, block_run& run,
195	fs_vnode_ops* vnodeOps, Inode** _inode)
196{
197	Volume* volume = fTransaction->GetVolume();
198
199	status_t status = volume->AllocateForInode(*fTransaction, parentRun, mode,
200		fRun);
201	if (status < B_OK) {
202		// don't free the space in the destructor, because
203		// the allocation failed
204		fTransaction = NULL;
205		RETURN_ERROR(status);
206	}
207
208	run = fRun;
209	fInode = new Inode(volume, *fTransaction, volume->ToVnode(run), mode, run);
210	if (fInode == NULL)
211		RETURN_ERROR(B_NO_MEMORY);
212
213	if (!volume->IsInitializing()) {
214		status = new_vnode(volume->FSVolume(), fInode->ID(), fInode,
215			vnodeOps != NULL ? vnodeOps : &gBFSVnodeOps);
216		if (status < B_OK) {
217			delete fInode;
218			fInode = NULL;
219			RETURN_ERROR(status);
220		}
221	}
222
223	fInode->WriteLockInTransaction(*fTransaction);
224	*_inode = fInode;
225	return B_OK;
226}
227
228
229status_t
230InodeAllocator::CreateTree()
231{
232	Volume* volume = fTransaction->GetVolume();
233
234	// force S_STR_INDEX to be set, if no type is set
235	if ((fInode->Mode() & S_INDEX_TYPES) == 0)
236		fInode->Node().mode |= HOST_ENDIAN_TO_BFS_INT32(S_STR_INDEX);
237
238	BPlusTree* tree = fInode->fTree = new BPlusTree(*fTransaction, fInode);
239	if (tree == NULL || tree->InitCheck() < B_OK)
240		return B_ERROR;
241
242	if (fInode->IsRegularNode()) {
243		if (tree->Insert(*fTransaction, ".", fInode->ID()) < B_OK
244			|| tree->Insert(*fTransaction, "..",
245					volume->ToVnode(fInode->Parent())) < B_OK)
246			return B_ERROR;
247	}
248	return B_OK;
249}
250
251
252status_t
253InodeAllocator::Keep(fs_vnode_ops* vnodeOps, uint32 publishFlags)
254{
255	ASSERT(fInode != NULL && fTransaction != NULL);
256	Volume* volume = fTransaction->GetVolume();
257
258	status_t status = fInode->WriteBack(*fTransaction);
259	if (status < B_OK) {
260		FATAL(("writing new inode %" B_PRIdINO " failed!\n", fInode->ID()));
261		return status;
262	}
263
264	// Symbolic links are not published -- the caller needs to do this once
265	// the contents have been written.
266	if (!fInode->IsSymLink() && volume->ID() >= 0) {
267		status = publish_vnode(volume->FSVolume(), fInode->ID(), fInode,
268			vnodeOps != NULL ? vnodeOps : &gBFSVnodeOps, fInode->Mode(),
269			publishFlags);
270	}
271
272	if (status == B_OK) {
273		cache_add_transaction_listener(volume->BlockCache(), fTransaction->ID(),
274			TRANSACTION_ABORTED, &_TransactionListener, fInode);
275	}
276
277	fTransaction = NULL;
278	fInode = NULL;
279
280	return status;
281}
282
283
284/*static*/ void
285InodeAllocator::_TransactionListener(int32 id, int32 event, void* _inode)
286{
287	Inode* inode = (Inode*)_inode;
288
289	if (event == TRANSACTION_ABORTED)
290		panic("transaction %d aborted, inode %p still around!\n", (int)id, inode);
291}
292
293
294//	#pragma mark -
295
296
297status_t
298bfs_inode::InitCheck(Volume* volume) const
299{
300	if (Magic1() != INODE_MAGIC1
301		|| !(Flags() & INODE_IN_USE)
302		|| inode_num.Length() != 1
303		// matches inode size?
304		|| (uint32)InodeSize() != volume->InodeSize()
305		// parent resides on disk?
306		|| parent.AllocationGroup() > int32(volume->AllocationGroups())
307		|| parent.AllocationGroup() < 0
308		|| parent.Start() > (1L << volume->AllocationGroupShift())
309		|| parent.Length() != 1
310		// attributes, too?
311		|| attributes.AllocationGroup() > int32(volume->AllocationGroups())
312		|| attributes.AllocationGroup() < 0
313		|| attributes.Start() > (1L << volume->AllocationGroupShift()))
314		RETURN_ERROR(B_BAD_DATA);
315
316	if (Flags() & INODE_DELETED)
317		return B_NOT_ALLOWED;
318
319	// TODO: Add some tests to check the integrity of the other stuff here,
320	// especially for the data_stream!
321
322	return B_OK;
323}
324
325
326//	#pragma mark - Inode
327
328
329Inode::Inode(Volume* volume, ino_t id)
330	:
331	fVolume(volume),
332	fID(id),
333	fTree(NULL),
334	fAttributes(NULL),
335	fCache(NULL),
336	fMap(NULL)
337{
338	PRINT(("Inode::Inode(volume = %p, id = %Ld) @ %p\n", volume, id, this));
339
340	UpdateNodeFromDisk();
341
342	rw_lock_init(&fLock, "bfs inode");
343	recursive_lock_init(&fSmallDataLock, "bfs inode small data");
344
345	// these two will help to maintain the indices
346	fOldSize = Size();
347	fOldLastModified = LastModified();
348
349	if (IsContainer())
350		fTree = new BPlusTree(this);
351	if (NeedsFileCache()) {
352		SetFileCache(file_cache_create(fVolume->ID(), ID(), Size()));
353		SetMap(file_map_create(volume->ID(), ID(), Size()));
354	}
355}
356
357
358Inode::Inode(Volume* volume, Transaction& transaction, ino_t id, mode_t mode,
359		block_run& run)
360	:
361	fVolume(volume),
362	fID(id),
363	fTree(NULL),
364	fAttributes(NULL),
365	fCache(NULL),
366	fMap(NULL)
367{
368	PRINT(("Inode::Inode(volume = %p, transaction = %p, id = %Ld) @ %p\n",
369		volume, &transaction, id, this));
370
371	rw_lock_init(&fLock, "bfs inode");
372	recursive_lock_init(&fSmallDataLock, "bfs inode small data");
373
374	NodeGetter node(volume, transaction, this, true);
375	memset(&fNode, 0, sizeof(bfs_inode));
376
377	// Initialize the bfs_inode structure -- it's not written back to disk
378	// here, because it will be done so already when the inode could be
379	// created completely.
380
381	Node().magic1 = HOST_ENDIAN_TO_BFS_INT32(INODE_MAGIC1);
382	Node().inode_num = run;
383	Node().mode = HOST_ENDIAN_TO_BFS_INT32(mode);
384	Node().flags = HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
385
386	Node().create_time = Node().last_modified_time = Node().status_change_time
387		= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
388
389	Node().inode_size = HOST_ENDIAN_TO_BFS_INT32(volume->InodeSize());
390
391	// these two will help to maintain the indices
392	fOldSize = Size();
393	fOldLastModified = LastModified();
394}
395
396
397Inode::~Inode()
398{
399	PRINT(("Inode::~Inode() @ %p\n", this));
400
401	file_cache_delete(FileCache());
402	file_map_delete(Map());
403	delete fTree;
404
405	rw_lock_destroy(&fLock);
406	recursive_lock_destroy(&fSmallDataLock);
407}
408
409
410status_t
411Inode::InitCheck(bool checkNode) const
412{
413	// test inode magic and flags
414	if (checkNode) {
415		status_t status = Node().InitCheck(fVolume);
416		if (status == B_BUSY)
417			return B_BUSY;
418
419		if (status != B_OK) {
420			FATAL(("inode at block %" B_PRIdOFF " corrupt!\n", BlockNumber()));
421			RETURN_ERROR(B_BAD_DATA);
422		}
423	}
424
425	if (IsContainer()) {
426		// inodes that have a B+tree
427		if (fTree == NULL)
428			RETURN_ERROR(B_NO_MEMORY);
429
430		status_t status = fTree->InitCheck();
431		if (status != B_OK) {
432			FATAL(("inode tree at block %" B_PRIdOFF " corrupt!\n",
433				BlockNumber()));
434			RETURN_ERROR(B_BAD_DATA);
435		}
436	}
437
438	if (NeedsFileCache() && (fCache == NULL || fMap == NULL))
439		return B_NO_MEMORY;
440
441	return B_OK;
442}
443
444
445/*!	Adds this inode to the specified transaction. This means that the inode will
446	be write locked until the transaction ended.
447	To ensure that the inode will stay valid until that point, an extra reference
448	is acquired to it as long as this transaction stays active.
449*/
450void
451Inode::WriteLockInTransaction(Transaction& transaction)
452{
453	// These flags can only change while holding the transaction lock
454	if ((Flags() & INODE_IN_TRANSACTION) != 0)
455		return;
456
457	// We share the same list link with the removed list, so we have to remove
458	// the inode from that list here (and add it back when we no longer need it)
459	if ((Flags() & INODE_DELETED) != 0)
460		fVolume->RemovedInodes().Remove(this);
461
462	if (!fVolume->IsInitializing())
463		acquire_vnode(fVolume->FSVolume(), ID());
464
465	rw_lock_write_lock(&Lock());
466	Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_IN_TRANSACTION);
467
468	transaction.AddListener(this);
469}
470
471
472status_t
473Inode::WriteBack(Transaction& transaction)
474{
475	NodeGetter node(fVolume, transaction, this);
476	if (node.WritableNode() == NULL)
477		return B_IO_ERROR;
478
479	memcpy(node.WritableNode(), &Node(), sizeof(bfs_inode));
480	return B_OK;
481}
482
483
484void
485Inode::UpdateNodeFromDisk()
486{
487	NodeGetter node(fVolume, this);
488	memcpy(&fNode, node.Node(), sizeof(bfs_inode));
489	fNode.flags &= HOST_ENDIAN_TO_BFS_INT32(INODE_PERMANENT_FLAGS);
490}
491
492
493status_t
494Inode::CheckPermissions(int accessMode) const
495{
496	// you never have write access to a read-only volume
497	if ((accessMode & W_OK) != 0 && fVolume->IsReadOnly())
498		return B_READ_ONLY_DEVICE;
499
500	// get node permissions
501	mode_t mode = Mode();
502	int userPermissions = (mode & S_IRWXU) >> 6;
503	int groupPermissions = (mode & S_IRWXG) >> 3;
504	int otherPermissions = mode & S_IRWXO;
505
506	// get the node permissions for this uid/gid
507	int permissions = 0;
508	uid_t uid = geteuid();
509	gid_t gid = getegid();
510
511	if (uid == 0) {
512		// user is root
513		// root has always read/write permission, but at least one of the
514		// X bits must be set for execute permission
515		permissions = userPermissions | groupPermissions | otherPermissions
516			| R_OK | W_OK;
517	} else if (uid == (uid_t)fNode.UserID()) {
518		// user is node owner
519		permissions = userPermissions;
520	} else if (gid == (gid_t)fNode.GroupID()) {
521		// user is in owning group
522		permissions = groupPermissions;
523	} else {
524		// user is one of the others
525		permissions = otherPermissions;
526	}
527
528	return (accessMode & ~permissions) == 0 ? B_OK : B_NOT_ALLOWED;
529}
530
531
532//	#pragma mark - attributes
533
534
535void
536Inode::_AddIterator(AttributeIterator* iterator)
537{
538	RecursiveLocker _(fSmallDataLock);
539	fIterators.Add(iterator);
540}
541
542
543void
544Inode::_RemoveIterator(AttributeIterator* iterator)
545{
546	RecursiveLocker _(fSmallDataLock);
547	fIterators.Remove(iterator);
548}
549
550
551/*!	Tries to free up "bytes" space in the small_data section by moving
552	attributes to real files. Used for system attributes like the name.
553	You need to hold the fSmallDataLock when you call this method
554*/
555status_t
556Inode::_MakeSpaceForSmallData(Transaction& transaction, bfs_inode* node,
557	const char* name, int32 bytes)
558{
559	ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
560
561	while (bytes > 0) {
562		small_data* item = node->SmallDataStart();
563		small_data* max = NULL;
564		int32 index = 0, maxIndex = 0;
565		for (; !item->IsLast(node); item = item->Next(), index++) {
566			// should not remove those
567			if (*item->Name() == FILE_NAME_NAME || !strcmp(name, item->Name()))
568				continue;
569
570			if (max == NULL || max->Size() < item->Size()) {
571				maxIndex = index;
572				max = item;
573			}
574
575			// Remove the first one large enough to free the needed amount of
576			// bytes
577			if (bytes < (int32)item->Size())
578				break;
579		}
580
581		if (item->IsLast(node) || (int32)item->Size() < bytes)
582			return B_ERROR;
583
584		bytes -= max->Size();
585
586		// Move the attribute to a real attribute file
587		// Luckily, this doesn't cause any index updates
588
589		Inode* attribute;
590		status_t status = CreateAttribute(transaction, item->Name(),
591			item->Type(), &attribute);
592		if (status != B_OK)
593			RETURN_ERROR(status);
594
595		size_t length = item->DataSize();
596		status = attribute->WriteAt(transaction, 0, item->Data(), &length);
597
598		ReleaseAttribute(attribute);
599
600		if (status != B_OK) {
601			Vnode vnode(fVolume, Attributes());
602			Inode* attributes;
603			if (vnode.Get(&attributes) < B_OK
604				|| attributes->Remove(transaction, name) < B_OK) {
605				FATAL(("Could not remove newly created attribute!\n"));
606			}
607
608			RETURN_ERROR(status);
609		}
610
611		_RemoveSmallData(node, max, maxIndex);
612	}
613	return B_OK;
614}
615
616
617/*!	Private function which removes the given attribute from the small_data
618	section.
619	You need to hold the fSmallDataLock when you call this method
620*/
621status_t
622Inode::_RemoveSmallData(bfs_inode* node, small_data* item, int32 index)
623{
624	ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
625
626	small_data* next = item->Next();
627	if (!next->IsLast(node)) {
628		// find the last attribute
629		small_data* last = next;
630		while (!last->IsLast(node))
631			last = last->Next();
632
633		int32 size = (uint8*)last - (uint8*)next;
634		if (size < 0
635			|| size > (uint8*)node + fVolume->BlockSize() - (uint8*)next)
636			return B_BAD_DATA;
637
638		memmove(item, next, size);
639
640		// Move the "last" one to its new location and
641		// correctly terminate the small_data section
642		last = (small_data*)((uint8*)last - ((uint8*)next - (uint8*)item));
643		memset(last, 0, (uint8*)node + fVolume->BlockSize() - (uint8*)last);
644	} else
645		memset(item, 0, item->Size());
646
647	// update all current iterators
648	SinglyLinkedList<AttributeIterator>::Iterator iterator
649		= fIterators.GetIterator();
650	while (iterator.HasNext()) {
651		iterator.Next()->Update(index, -1);
652	}
653
654	return B_OK;
655}
656
657
658//!	Removes the given attribute from the small_data section.
659status_t
660Inode::_RemoveSmallData(Transaction& transaction, NodeGetter& nodeGetter,
661	const char* name)
662{
663	if (name == NULL)
664		return B_BAD_VALUE;
665
666	bfs_inode* node = nodeGetter.WritableNode();
667	RecursiveLocker locker(fSmallDataLock);
668
669	// search for the small_data item
670
671	small_data* item = node->SmallDataStart();
672	int32 index = 0;
673	while (!item->IsLast(node) && strcmp(item->Name(), name)) {
674		item = item->Next();
675		index++;
676	}
677
678	if (item->IsLast(node))
679		return B_ENTRY_NOT_FOUND;
680
681	nodeGetter.MakeWritable(transaction);
682
683	status_t status = _RemoveSmallData(node, item, index);
684	if (status == B_OK) {
685		Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
686			bfs_inode::ToInode(real_time_clock_usecs()));
687
688		status = WriteBack(transaction);
689	}
690
691	return status;
692}
693
694
695/*!	Try to place the given attribute in the small_data section - if the
696	new attribute is too big to fit in that section, it returns B_DEVICE_FULL.
697	In that case, the attribute should be written to a real attribute file;
698	it's the caller's responsibility to remove any existing attributes in the
699	small data section if that's the case.
700
701	Note that you need to write back the inode yourself after having called that
702	method - it's a bad API decision that it needs a transaction but enforces
703	you to write back the inode all by yourself, but it's just more efficient
704	in most cases...
705*/
706status_t
707Inode::_AddSmallData(Transaction& transaction, NodeGetter& nodeGetter,
708	const char* name, uint32 type, off_t pos, const uint8* data, size_t length,
709	bool force)
710{
711	bfs_inode* node = nodeGetter.WritableNode();
712
713	if (node == NULL || name == NULL || data == NULL)
714		return B_BAD_VALUE;
715
716	// reject any requests that can't fit into the small_data section
717	uint32 nameLength = strlen(name);
718	uint32 spaceNeeded = sizeof(small_data) + nameLength + 3 + pos + length + 1;
719	if (spaceNeeded > fVolume->InodeSize() - sizeof(bfs_inode))
720		return B_DEVICE_FULL;
721
722	nodeGetter.MakeWritable(transaction);
723	RecursiveLocker locker(fSmallDataLock);
724
725	// Find the last item or one with the same name we have to add
726	small_data* item = node->SmallDataStart();
727	int32 index = 0;
728	while (!item->IsLast(node) && strcmp(item->Name(), name)) {
729		item = item->Next();
730		index++;
731	}
732
733	// is the attribute already in the small_data section?
734	// then just replace the data part of that one
735	if (!item->IsLast(node)) {
736		// find last attribute
737		small_data* last = item;
738		while (!last->IsLast(node))
739			last = last->Next();
740
741		// try to change the attributes value
742		if (item->data_size > pos + length
743			|| force
744			|| ((uint8*)last + pos + length - item->DataSize())
745					<= ((uint8*)node + fVolume->InodeSize())) {
746			// Make room for the new attribute if needed (and we are forced
747			// to do so)
748			if (force && ((uint8*)last + pos + length - item->DataSize())
749					> ((uint8*)node + fVolume->InodeSize())) {
750				// We also take the free space at the end of the small_data
751				// section into account, and request only what's really needed
752				uint32 needed = pos + length - item->DataSize() -
753					(uint32)((uint8*)node + fVolume->InodeSize()
754						- (uint8*)last);
755
756				if (_MakeSpaceForSmallData(transaction, node, name, needed)
757						!= B_OK)
758					return B_ERROR;
759
760				// reset our pointers
761				item = node->SmallDataStart();
762				index = 0;
763				while (!item->IsLast(node) && strcmp(item->Name(), name)) {
764					item = item->Next();
765					index++;
766				}
767
768				last = item;
769				while (!last->IsLast(node))
770					last = last->Next();
771			}
772
773			size_t oldDataSize = item->DataSize();
774
775			// Normally, we can just overwrite the attribute data as the size
776			// is specified by the type and does not change that often
777			if (pos + length != item->DataSize()) {
778				// move the attributes after the current one
779				small_data* next = item->Next();
780				if (!next->IsLast(node)) {
781					memmove((uint8*)item + spaceNeeded, next,
782						(uint8*)last - (uint8*)next);
783				}
784
785				// Move the "last" one to its new location and
786				// correctly terminate the small_data section
787				last = (small_data*)((uint8*)last
788					- ((uint8*)next - ((uint8*)item + spaceNeeded)));
789				if ((uint8*)last < (uint8*)node + fVolume->BlockSize()) {
790					memset(last, 0, (uint8*)node + fVolume->BlockSize()
791						- (uint8*)last);
792				}
793
794				item->data_size = HOST_ENDIAN_TO_BFS_INT16(pos + length);
795			}
796
797			item->type = HOST_ENDIAN_TO_BFS_INT32(type);
798
799			if ((uint64)oldDataSize < (uint64)pos) {
800				// Fill gap with zeros
801				memset(item->Data() + oldDataSize, 0, pos - oldDataSize);
802			}
803			memcpy(item->Data() + pos, data, length);
804			item->Data()[pos + length] = '\0';
805
806			return B_OK;
807		}
808
809		return B_DEVICE_FULL;
810	}
811
812	// try to add the new attribute!
813
814	if ((uint8*)item + spaceNeeded > (uint8*)node + fVolume->InodeSize()) {
815		// there is not enough space for it!
816		if (!force)
817			return B_DEVICE_FULL;
818
819		// make room for the new attribute
820		if (_MakeSpaceForSmallData(transaction, node, name, spaceNeeded) < B_OK)
821			return B_ERROR;
822
823		// get new last item!
824		item = node->SmallDataStart();
825		index = 0;
826		while (!item->IsLast(node)) {
827			item = item->Next();
828			index++;
829		}
830	}
831
832	memset(item, 0, spaceNeeded);
833	item->type = HOST_ENDIAN_TO_BFS_INT32(type);
834	item->name_size = HOST_ENDIAN_TO_BFS_INT16(nameLength);
835	item->data_size = HOST_ENDIAN_TO_BFS_INT16(length);
836	strcpy(item->Name(), name);
837	memcpy(item->Data() + pos, data, length);
838
839	// correctly terminate the small_data section
840	item = item->Next();
841	if (!item->IsLast(node))
842		memset(item, 0, (uint8*)node + fVolume->InodeSize() - (uint8*)item);
843
844	// update all current iterators
845	SinglyLinkedList<AttributeIterator>::Iterator iterator
846		= fIterators.GetIterator();
847	while (iterator.HasNext()) {
848		iterator.Next()->Update(index, 1);
849	}
850
851	return B_OK;
852}
853
854
855/*!	Iterates through the small_data section of an inode.
856	To start at the beginning of this section, you let smallData
857	point to NULL, like:
858		small_data* data = NULL;
859		while (inode->GetNextSmallData(&data) { ... }
860
861	This function is reentrant and doesn't allocate any memory;
862	you can safely stop calling it at any point (you don't need
863	to iterate through the whole list).
864	You need to hold the fSmallDataLock when you call this method
865*/
866status_t
867Inode::_GetNextSmallData(bfs_inode* node, small_data** _smallData) const
868{
869	if (node == NULL)
870		RETURN_ERROR(B_BAD_VALUE);
871
872	ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
873
874	small_data* data = *_smallData;
875
876	// begin from the start?
877	if (data == NULL)
878		data = node->SmallDataStart();
879	else
880		data = data->Next();
881
882	// is already last item?
883	if (data->IsLast(node))
884		return B_ENTRY_NOT_FOUND;
885
886	*_smallData = data;
887
888	return B_OK;
889}
890
891
892/*!	Finds the attribute "name" in the small data section, and
893	returns a pointer to it (or NULL if it doesn't exist).
894	You need to hold the fSmallDataLock when you call this method
895*/
896small_data*
897Inode::FindSmallData(const bfs_inode* node, const char* name) const
898{
899	ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
900
901	small_data* smallData = NULL;
902	while (_GetNextSmallData(const_cast<bfs_inode*>(node), &smallData)
903			== B_OK) {
904		if (!strcmp(smallData->Name(), name))
905			return smallData;
906	}
907	return NULL;
908}
909
910
911/*!	Returns a pointer to the node's name if present in the small data
912	section, NULL otherwise.
913	You need to hold the fSmallDataLock when you call this method
914*/
915const char*
916Inode::Name(const bfs_inode* node) const
917{
918	ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
919
920	small_data* smallData = NULL;
921	while (_GetNextSmallData((bfs_inode*)node, &smallData) == B_OK) {
922		if (*smallData->Name() == FILE_NAME_NAME
923			&& smallData->NameSize() == FILE_NAME_NAME_LENGTH)
924			return (const char*)smallData->Data();
925	}
926	return NULL;
927}
928
929
930/*!	Copies the node's name into the provided buffer.
931	The buffer should be B_FILE_NAME_LENGTH bytes large.
932*/
933status_t
934Inode::GetName(char* buffer, size_t size) const
935{
936	NodeGetter node(fVolume, this);
937	RecursiveLocker locker(fSmallDataLock);
938
939	const char* name = Name(node.Node());
940	if (name == NULL)
941		return B_ENTRY_NOT_FOUND;
942
943	strlcpy(buffer, name, size);
944	return B_OK;
945}
946
947
948/*!	Changes or set the name of a file: in the inode small_data section only, it
949	doesn't change it in the parent directory's b+tree.
950	Note that you need to write back the inode yourself after having called
951	that method. It suffers from the same API decision as AddSmallData() does
952	(and for the same reason).
953*/
954status_t
955Inode::SetName(Transaction& transaction, const char* name)
956{
957	if (name == NULL || *name == '\0')
958		return B_BAD_VALUE;
959
960	NodeGetter node(fVolume, transaction, this);
961	const char nameTag[2] = {FILE_NAME_NAME, 0};
962
963	return _AddSmallData(transaction, node, nameTag, FILE_NAME_TYPE, 0,
964		(uint8*)name, strlen(name), true);
965}
966
967
968status_t
969Inode::_RemoveAttribute(Transaction& transaction, const char* name,
970	bool hasIndex, Index* index)
971{
972	// remove the attribute file if it exists
973	Vnode vnode(fVolume, Attributes());
974	Inode* attributes;
975	status_t status = vnode.Get(&attributes);
976	if (status < B_OK)
977		return status;
978
979	// update index
980	if (index != NULL) {
981		Inode* attribute;
982		if ((hasIndex || fVolume->CheckForLiveQuery(name))
983			&& GetAttribute(name, &attribute) == B_OK) {
984			uint8 data[BPLUSTREE_MAX_KEY_LENGTH];
985			size_t length = BPLUSTREE_MAX_KEY_LENGTH;
986			if (attribute->ReadAt(0, data, &length) == B_OK) {
987				index->Update(transaction, name, attribute->Type(), data,
988					length, NULL, 0, this);
989			}
990
991			ReleaseAttribute(attribute);
992		}
993	}
994
995	if ((status = attributes->Remove(transaction, name)) < B_OK)
996		return status;
997
998	if (attributes->IsEmpty()) {
999		attributes->WriteLockInTransaction(transaction);
1000
1001		// remove attribute directory (don't fail if that can't be done)
1002		if (remove_vnode(fVolume->FSVolume(), attributes->ID()) == B_OK) {
1003			// update the inode, so that no one will ever doubt it's deleted :-)
1004			attributes->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
1005			if (attributes->WriteBack(transaction) == B_OK) {
1006				Attributes().SetTo(0, 0, 0);
1007				WriteBack(transaction);
1008			} else {
1009				unremove_vnode(fVolume->FSVolume(), attributes->ID());
1010				attributes->Node().flags
1011					&= ~HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
1012			}
1013		}
1014	}
1015
1016	return status;
1017}
1018
1019
1020/*!	Reads data from the specified attribute.
1021	This is a high-level attribute function that understands attributes
1022	in the small_data section as well as real attribute files.
1023*/
1024status_t
1025Inode::ReadAttribute(const char* name, int32 type, off_t pos, uint8* buffer,
1026	size_t* _length)
1027{
1028	if (pos < 0)
1029		pos = 0;
1030
1031	// search in the small_data section (which has to be locked first)
1032	{
1033		NodeGetter node(fVolume, this);
1034		RecursiveLocker locker(fSmallDataLock);
1035
1036		small_data* smallData = FindSmallData(node.Node(), name);
1037		if (smallData != NULL) {
1038			size_t length = *_length;
1039			if (pos >= smallData->data_size) {
1040				*_length = 0;
1041				return B_OK;
1042			}
1043			if (length + pos > smallData->DataSize())
1044				length = smallData->DataSize() - pos;
1045
1046			memcpy(buffer, smallData->Data() + pos, length);
1047			*_length = length;
1048			return B_OK;
1049		}
1050	}
1051
1052	// search in the attribute directory
1053	Inode* attribute;
1054	status_t status = GetAttribute(name, &attribute);
1055	if (status == B_OK) {
1056		status = attribute->ReadAt(pos, (uint8*)buffer, _length);
1057
1058		ReleaseAttribute(attribute);
1059	}
1060
1061	RETURN_ERROR(status);
1062}
1063
1064
1065/*!	Writes data to the specified attribute.
1066	This is a high-level attribute function that understands attributes
1067	in the small_data section as well as real attribute files.
1068*/
1069status_t
1070Inode::WriteAttribute(Transaction& transaction, const char* name, int32 type,
1071	off_t pos, const uint8* buffer, size_t* _length, bool* _created)
1072{
1073	if (pos < 0)
1074		return B_BAD_VALUE;
1075
1076	// needed to maintain the index
1077	uint8 oldBuffer[BPLUSTREE_MAX_KEY_LENGTH];
1078	uint8* oldData = NULL;
1079	size_t oldLength = 0;
1080	bool created = false;
1081
1082	// TODO: we actually depend on that the contents of "buffer" are constant.
1083	// If they get changed during the write (hey, user programs), we may mess
1084	// up our index trees!
1085	// TODO: for attribute files, we need to log the first
1086	// BPLUSTREE_MAX_KEY_LENGTH bytes of the data stream, or the same as above
1087	// might happen.
1088
1089	Index index(fVolume);
1090	bool hasIndex = index.SetTo(name) == B_OK;
1091
1092	Inode* attribute = NULL;
1093	status_t status = B_OK;
1094
1095	if (GetAttribute(name, &attribute) != B_OK) {
1096		// No attribute inode exists yet
1097
1098		// save the old attribute data
1099		NodeGetter node(fVolume, transaction, this);
1100		recursive_lock_lock(&fSmallDataLock);
1101
1102		small_data* smallData = FindSmallData(node.Node(), name);
1103		if (smallData != NULL) {
1104			oldLength = smallData->DataSize();
1105			if (oldLength > 0) {
1106				if (oldLength > BPLUSTREE_MAX_KEY_LENGTH)
1107					oldLength = BPLUSTREE_MAX_KEY_LENGTH;
1108				memcpy(oldData = oldBuffer, smallData->Data(), oldLength);
1109			}
1110		} else
1111			created = true;
1112
1113		recursive_lock_unlock(&fSmallDataLock);
1114
1115		// if the attribute doesn't exist yet (as a file), try to put it in the
1116		// small_data section first - if that fails (due to insufficent space),
1117		// create a real attribute file
1118		status = _AddSmallData(transaction, node, name, type, pos, buffer,
1119			*_length);
1120		if (status == B_DEVICE_FULL) {
1121			if (smallData != NULL) {
1122				// remove the old attribute from the small data section - there
1123				// is no space left for the new data
1124				status = _RemoveSmallData(transaction, node, name);
1125			} else
1126				status = B_OK;
1127
1128			if (status == B_OK)
1129				status = CreateAttribute(transaction, name, type, &attribute);
1130			if (status != B_OK)
1131				RETURN_ERROR(status);
1132
1133			created = true;
1134		} else if (status == B_OK) {
1135			// Update status time on attribute write
1136			Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
1137				bfs_inode::ToInode(real_time_clock_usecs()));
1138
1139			status = WriteBack(transaction);
1140		}
1141	}
1142
1143	if (attribute != NULL) {
1144		WriteLocker writeLocker(attribute->fLock);
1145
1146		if (hasIndex || fVolume->CheckForLiveQuery(name)) {
1147			// Save the old attribute data (if this fails, oldLength will
1148			// reflect it)
1149			while (attribute->Size() > 0) {
1150				bigtime_t oldModified = attribute->LastModified();
1151				writeLocker.Unlock();
1152
1153				oldLength = BPLUSTREE_MAX_KEY_LENGTH;
1154				if (attribute->ReadAt(0, oldBuffer, &oldLength) == B_OK)
1155					oldData = oldBuffer;
1156
1157				writeLocker.Lock();
1158
1159				// Read until the data hasn't changed in between
1160				if (oldModified == attribute->LastModified())
1161					break;
1162
1163				oldLength = 0;
1164			}
1165		}
1166
1167		// check if the data fits into the small_data section again
1168		NodeGetter node(fVolume, transaction, this);
1169		status = _AddSmallData(transaction, node, name, type, pos, buffer,
1170			*_length);
1171
1172		if (status == B_OK) {
1173			// it does - remove its file
1174			writeLocker.Unlock();
1175			status = _RemoveAttribute(transaction, name, false, NULL);
1176		} else {
1177			// The attribute type might have been changed - we need to
1178			// adopt the new one
1179			attribute->Node().type = HOST_ENDIAN_TO_BFS_INT32(type);
1180			status = attribute->WriteBack(transaction);
1181			writeLocker.Unlock();
1182
1183			if (status == B_OK) {
1184				status = attribute->WriteAt(transaction, pos, buffer,
1185					_length);
1186			}
1187		}
1188
1189		if (status == B_OK) {
1190			// Update status time on attribute write
1191			Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
1192				bfs_inode::ToInode(real_time_clock_usecs()));
1193
1194			status = WriteBack(transaction);
1195		}
1196
1197		attribute->WriteLockInTransaction(transaction);
1198		ReleaseAttribute(attribute);
1199	}
1200
1201	// TODO: find a better way than this "pos" thing (the begin of the old key
1202	//	must be copied to the start of the new one for a comparison)
1203	if (status == B_OK && pos == 0) {
1204		// index only the first BPLUSTREE_MAX_KEY_LENGTH bytes
1205		uint16 length = *_length;
1206		if (length > BPLUSTREE_MAX_KEY_LENGTH)
1207			length = BPLUSTREE_MAX_KEY_LENGTH;
1208
1209		// Update index. Note, Index::Update() may be called even if
1210		// initializing the index failed - it will just update the live
1211		// queries in this case
1212		if (pos < length || (uint64)pos < (uint64)oldLength) {
1213			index.Update(transaction, name, type, oldData, oldLength, buffer,
1214				length, this);
1215		}
1216	}
1217
1218	if (_created != NULL)
1219		*_created = created;
1220
1221	return status;
1222}
1223
1224
1225/*!	Removes the specified attribute from the inode.
1226	This is a high-level attribute function that understands attributes
1227	in the small_data section as well as real attribute files.
1228*/
1229status_t
1230Inode::RemoveAttribute(Transaction& transaction, const char* name)
1231{
1232	Index index(fVolume);
1233	bool hasIndex = index.SetTo(name) == B_OK;
1234	NodeGetter node(fVolume, this);
1235
1236	// update index for attributes in the small_data section
1237	{
1238		RecursiveLocker _(fSmallDataLock);
1239
1240		small_data* smallData = FindSmallData(node.Node(), name);
1241		if (smallData != NULL) {
1242			uint32 length = smallData->DataSize();
1243			if (length > BPLUSTREE_MAX_KEY_LENGTH)
1244				length = BPLUSTREE_MAX_KEY_LENGTH;
1245			index.Update(transaction, name, smallData->Type(),
1246				smallData->Data(), length, NULL, 0, this);
1247		}
1248	}
1249
1250	status_t status = _RemoveSmallData(transaction, node, name);
1251	if (status == B_ENTRY_NOT_FOUND && !Attributes().IsZero()) {
1252		// remove the attribute file if it exists
1253		status = _RemoveAttribute(transaction, name, hasIndex, &index);
1254		if (status == B_OK) {
1255			Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
1256				bfs_inode::ToInode(real_time_clock_usecs()));
1257			WriteBack(transaction);
1258		}
1259	}
1260
1261	return status;
1262}
1263
1264
1265/*!	Returns the attribute inode with the specified \a name, in case it exists.
1266	This method can only return real attribute files; the attributes in the
1267	small data section are ignored.
1268*/
1269status_t
1270Inode::GetAttribute(const char* name, Inode** _attribute)
1271{
1272	// does this inode even have attributes?
1273	if (Attributes().IsZero())
1274		return B_ENTRY_NOT_FOUND;
1275
1276	Vnode vnode(fVolume, Attributes());
1277	Inode* attributes;
1278	if (vnode.Get(&attributes) < B_OK) {
1279		FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n",
1280			name));
1281		return B_ERROR;
1282	}
1283
1284	BPlusTree* tree = attributes->Tree();
1285	if (tree == NULL)
1286		return B_BAD_VALUE;
1287
1288	InodeReadLocker locker(attributes);
1289
1290	ino_t id;
1291	status_t status = tree->Find((uint8*)name, (uint16)strlen(name), &id);
1292	if (status == B_OK) {
1293		Vnode vnode(fVolume, id);
1294		Inode* inode;
1295		// Check if the attribute is really an attribute
1296		if (vnode.Get(&inode) != B_OK || !inode->IsAttribute())
1297			return B_ERROR;
1298
1299		*_attribute = inode;
1300		vnode.Keep();
1301		return B_OK;
1302	}
1303
1304	return status;
1305}
1306
1307
1308void
1309Inode::ReleaseAttribute(Inode* attribute)
1310{
1311	if (attribute == NULL)
1312		return;
1313
1314	put_vnode(fVolume->FSVolume(), attribute->ID());
1315}
1316
1317
1318status_t
1319Inode::CreateAttribute(Transaction& transaction, const char* name, uint32 type,
1320	Inode** attribute)
1321{
1322	// do we need to create the attribute directory first?
1323	if (Attributes().IsZero()) {
1324		status_t status = Inode::Create(transaction, this, NULL,
1325			S_ATTR_DIR | S_DIRECTORY | 0666, 0, 0, NULL);
1326		if (status < B_OK)
1327			RETURN_ERROR(status);
1328	}
1329	Vnode vnode(fVolume, Attributes());
1330	Inode* attributes;
1331	if (vnode.Get(&attributes) < B_OK)
1332		return B_ERROR;
1333
1334	// Inode::Create() locks the inode for us
1335	return Inode::Create(transaction, attributes, name,
1336		S_ATTR | S_FILE | 0666, 0, type, NULL, NULL, attribute);
1337}
1338
1339
1340//	#pragma mark - directory tree
1341
1342
1343bool
1344Inode::IsEmpty()
1345{
1346	TreeIterator iterator(fTree);
1347
1348	// index and attribute directories are really empty when they are
1349	// empty - directories for standard files always contain ".", and
1350	// "..", so we need to ignore those two
1351
1352	uint32 count = 0;
1353	char name[BPLUSTREE_MAX_KEY_LENGTH];
1354	uint16 length;
1355	ino_t id;
1356	while (iterator.GetNextEntry(name, &length, B_FILE_NAME_LENGTH,
1357			&id) == B_OK) {
1358		if (Mode() & (S_ATTR_DIR | S_INDEX_DIR))
1359			return false;
1360
1361		if (++count > 2 || (strcmp(".", name) && strcmp("..", name)))
1362			return false;
1363	}
1364	return true;
1365}
1366
1367
1368status_t
1369Inode::ContainerContentsChanged(Transaction& transaction)
1370{
1371	ASSERT(!InLastModifiedIndex());
1372
1373	Node().last_modified_time = Node().status_change_time
1374		= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
1375
1376	return WriteBack(transaction);
1377}
1378
1379
1380//	#pragma mark - data stream
1381
1382
1383/*!	Computes the number of bytes this inode occupies in the file system.
1384	This includes the file meta blocks used for maintaining its data stream.
1385
1386	TODO: However, the attributes in extra files are not really accounted for;
1387	depending on the speed penalty, this should be changed, though (the value
1388	could be cached in the inode structure or Inode object, though).
1389*/
1390off_t
1391Inode::AllocatedSize() const
1392{
1393	if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0) {
1394		// This symlink does not have a data stream
1395		return Node().InodeSize();
1396	}
1397
1398	const data_stream& data = Node().data;
1399	uint32 blockSize = fVolume->BlockSize();
1400	off_t size = blockSize;
1401
1402	if (data.MaxDoubleIndirectRange() != 0) {
1403		off_t doubleIndirectSize = data.MaxDoubleIndirectRange()
1404			- data.MaxIndirectRange();
1405		int32 indirectSize = double_indirect_max_indirect_size(
1406			data.double_indirect.Length(), fVolume->BlockSize());
1407
1408		size += (2 * data.double_indirect.Length()
1409				+ doubleIndirectSize / indirectSize)
1410			* blockSize + data.MaxDoubleIndirectRange();
1411	} else if (data.MaxIndirectRange() != 0)
1412		size += data.indirect.Length() + data.MaxIndirectRange();
1413	else
1414		size += data.MaxDirectRange();
1415
1416	if (!Node().attributes.IsZero()) {
1417		// TODO: to make this exact, we'd had to count all attributes
1418		size += 2 * blockSize;
1419			// 2 blocks, one for the attributes inode, one for its B+tree
1420	}
1421
1422	return size;
1423}
1424
1425
1426/*!	Finds the block_run where "pos" is located in the data_stream of
1427	the inode.
1428	If successful, "offset" will then be set to the file offset
1429	of the block_run returned; so "pos - offset" is for the block_run
1430	what "pos" is for the whole stream.
1431	The caller has to make sure that "pos" is inside the stream.
1432*/
1433status_t
1434Inode::FindBlockRun(off_t pos, block_run& run, off_t& offset)
1435{
1436	data_stream* data = &Node().data;
1437
1438	// find matching block run
1439
1440	if (data->MaxIndirectRange() > 0 && pos >= data->MaxDirectRange()) {
1441		if (data->MaxDoubleIndirectRange() > 0
1442			&& pos >= data->MaxIndirectRange()) {
1443			// access to double indirect blocks
1444
1445			CachedBlock cached(fVolume);
1446
1447			int32 runsPerBlock;
1448			int32 directSize;
1449			int32 indirectSize;
1450			get_double_indirect_sizes(data->double_indirect.Length(),
1451				fVolume->BlockSize(), runsPerBlock, directSize, indirectSize);
1452
1453			off_t start = pos - data->MaxIndirectRange();
1454			int32 index = start / indirectSize;
1455
1456			block_run* indirect = (block_run*)cached.SetTo(
1457				fVolume->ToBlock(data->double_indirect) + index / runsPerBlock);
1458			if (indirect == NULL)
1459				RETURN_ERROR(B_ERROR);
1460
1461			int32 current = (start % indirectSize) / directSize;
1462
1463			indirect = (block_run*)cached.SetTo(
1464				fVolume->ToBlock(indirect[index % runsPerBlock])
1465				+ current / runsPerBlock);
1466			if (indirect == NULL)
1467				RETURN_ERROR(B_ERROR);
1468
1469			run = indirect[current % runsPerBlock];
1470			if (run.Length() != data->double_indirect.Length())
1471				RETURN_ERROR(B_BAD_DATA);
1472
1473			offset = data->MaxIndirectRange() + (index * indirectSize)
1474				+ (current * directSize);
1475		} else {
1476			// access to indirect blocks
1477
1478			int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
1479			off_t runBlockEnd = data->MaxDirectRange();
1480
1481			CachedBlock cached(fVolume);
1482			off_t block = fVolume->ToBlock(data->indirect);
1483
1484			for (int32 i = 0; i < data->indirect.Length(); i++) {
1485				block_run* indirect = (block_run*)cached.SetTo(block + i);
1486				if (indirect == NULL)
1487					RETURN_ERROR(B_IO_ERROR);
1488
1489				int32 current = -1;
1490				while (++current < runsPerBlock) {
1491					if (indirect[current].IsZero())
1492						break;
1493
1494					runBlockEnd += indirect[current].Length()
1495						<< cached.BlockShift();
1496					if (runBlockEnd > pos) {
1497						run = indirect[current];
1498						offset = runBlockEnd - (run.Length()
1499							<< cached.BlockShift());
1500						return fVolume->ValidateBlockRun(run);
1501					}
1502				}
1503			}
1504			RETURN_ERROR(B_ERROR);
1505		}
1506	} else {
1507		// access from direct blocks
1508
1509		off_t runBlockEnd = 0LL;
1510		int32 current = -1;
1511
1512		while (++current < NUM_DIRECT_BLOCKS) {
1513			if (data->direct[current].IsZero())
1514				break;
1515
1516			runBlockEnd += data->direct[current].Length()
1517				<< fVolume->BlockShift();
1518			if (runBlockEnd > pos) {
1519				run = data->direct[current];
1520				offset = runBlockEnd
1521					- ((uint32)run.Length() << fVolume->BlockShift());
1522				return fVolume->ValidateBlockRun(run);
1523			}
1524		}
1525
1526		return B_ENTRY_NOT_FOUND;
1527	}
1528	return fVolume->ValidateBlockRun(run);
1529}
1530
1531
1532status_t
1533Inode::ReadAt(off_t pos, uint8* buffer, size_t* _length)
1534{
1535	size_t length = *_length;
1536
1537	// set/check boundaries for pos/length
1538	if (pos < 0)
1539		return B_BAD_VALUE;
1540
1541	InodeReadLocker locker(this);
1542
1543	if (pos >= Size() || length == 0) {
1544		*_length = 0;
1545		return B_NO_ERROR;
1546	}
1547
1548	locker.Unlock();
1549
1550	return file_cache_read(FileCache(), NULL, pos, buffer, _length);
1551}
1552
1553
1554status_t
1555Inode::WriteAt(Transaction& transaction, off_t pos, const uint8* buffer,
1556	size_t* _length)
1557{
1558	InodeReadLocker locker(this);
1559
1560	// update the last modification time in memory, it will be written
1561	// back to the inode, and the index when the file is closed
1562	// TODO: should update the internal last modified time only at this point!
1563	Node().last_modified_time = Node().status_change_time
1564		= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
1565
1566	// TODO: support INODE_LOGGED!
1567
1568	size_t length = *_length;
1569	bool changeSize = (uint64)pos + (uint64)length > (uint64)Size();
1570
1571	// set/check boundaries for pos/length
1572	if (pos < 0)
1573		return B_BAD_VALUE;
1574
1575	locker.Unlock();
1576
1577	// the transaction doesn't have to be started already
1578	if (changeSize && !transaction.IsStarted())
1579		transaction.Start(fVolume, BlockNumber());
1580
1581	WriteLocker writeLocker(fLock);
1582
1583	// Work around possible race condition: Someone might have shrunken the file
1584	// while we had no lock.
1585	if (!transaction.IsStarted()
1586		&& (uint64)pos + (uint64)length > (uint64)Size()) {
1587		writeLocker.Unlock();
1588		transaction.Start(fVolume, BlockNumber());
1589		writeLocker.Lock();
1590	}
1591
1592	off_t oldSize = Size();
1593
1594	if ((uint64)pos + (uint64)length > (uint64)oldSize) {
1595		// let's grow the data stream to the size needed
1596		status_t status = SetFileSize(transaction, pos + length);
1597		if (status != B_OK) {
1598			*_length = 0;
1599			WriteLockInTransaction(transaction);
1600			RETURN_ERROR(status);
1601		}
1602		// TODO: In theory we would need to update the file size
1603		// index here as part of the current transaction - this might just
1604		// be a bit too expensive, but worth a try.
1605
1606		// we need to write back the inode here because it has to
1607		// go into this transaction (we cannot wait until the file
1608		// is closed)
1609		status = WriteBack(transaction);
1610		if (status != B_OK) {
1611			WriteLockInTransaction(transaction);
1612			return status;
1613		}
1614	}
1615
1616	writeLocker.Unlock();
1617
1618	if (oldSize < pos)
1619		FillGapWithZeros(oldSize, pos);
1620
1621	// If we don't want to write anything, we can now return (we may
1622	// just have changed the file size using the position parameter)
1623	if (length == 0)
1624		return B_OK;
1625
1626	status_t status = file_cache_write(FileCache(), NULL, pos, buffer, _length);
1627
1628	if (transaction.IsStarted())
1629		WriteLockInTransaction(transaction);
1630
1631	return status;
1632}
1633
1634
1635/*!	Fills the gap between the old file size and the new file size
1636	with zeros.
1637	It's more or less a copy of Inode::WriteAt() but it can handle
1638	length differences of more than just 4 GB, and it never uses
1639	the log, even if the INODE_LOGGED flag is set.
1640*/
1641status_t
1642Inode::FillGapWithZeros(off_t pos, off_t newSize)
1643{
1644	while (pos < newSize) {
1645		size_t size;
1646		if (newSize > pos + 1024 * 1024 * 1024)
1647			size = 1024 * 1024 * 1024;
1648		else
1649			size = newSize - pos;
1650
1651		status_t status = file_cache_write(FileCache(), NULL, pos, NULL, &size);
1652		if (status < B_OK)
1653			return status;
1654
1655		pos += size;
1656	}
1657
1658	return B_OK;
1659}
1660
1661
1662/*!	Allocates \a length blocks, and clears their contents. Growing
1663	the indirect and double indirect range uses this method.
1664	The allocated block_run is saved in "run"
1665*/
1666status_t
1667Inode::_AllocateBlockArray(Transaction& transaction, block_run& run,
1668	size_t length, bool variableSize)
1669{
1670	if (!run.IsZero())
1671		return B_BAD_VALUE;
1672
1673	status_t status = fVolume->Allocate(transaction, this, length, run,
1674		variableSize ? 1 : length);
1675	if (status != B_OK)
1676		return status;
1677
1678	// make sure those blocks are empty
1679	CachedBlock cached(fVolume);
1680	off_t block = fVolume->ToBlock(run);
1681
1682	for (int32 i = 0; i < run.Length(); i++) {
1683		block_run* runs = (block_run*)cached.SetToWritable(transaction,
1684			block + i, true);
1685		if (runs == NULL)
1686			return B_IO_ERROR;
1687	}
1688	return B_OK;
1689}
1690
1691
1692/*!	Grows the stream to \a size, and fills the direct/indirect/double indirect
1693	ranges with the runs.
1694	This method will also determine the size of the preallocation, if any.
1695*/
1696status_t
1697Inode::_GrowStream(Transaction& transaction, off_t size)
1698{
1699	data_stream* data = &Node().data;
1700
1701	// is the data stream already large enough to hold the new size?
1702	// (can be the case with preallocated blocks)
1703	if (size < data->MaxDirectRange()
1704		|| size < data->MaxIndirectRange()
1705		|| size < data->MaxDoubleIndirectRange()) {
1706		data->size = HOST_ENDIAN_TO_BFS_INT64(size);
1707		return B_OK;
1708	}
1709
1710	// how many bytes are still needed? (unused ranges are always zero)
1711	uint16 minimum = 1;
1712	off_t bytes;
1713	if (data->Size() < data->MaxDoubleIndirectRange()) {
1714		bytes = size - data->MaxDoubleIndirectRange();
1715		// The double indirect range can only handle multiples of
1716		// its base length
1717		minimum = data->double_indirect.Length();
1718	} else if (data->Size() < data->MaxIndirectRange())
1719		bytes = size - data->MaxIndirectRange();
1720	else if (data->Size() < data->MaxDirectRange())
1721		bytes = size - data->MaxDirectRange();
1722	else {
1723		// no preallocation left to be used
1724		bytes = size - data->Size();
1725		if (data->MaxDoubleIndirectRange() > 0)
1726			minimum = data->double_indirect.Length();
1727	}
1728
1729	// do we have enough free blocks on the disk?
1730	off_t blocksNeeded = (bytes + fVolume->BlockSize() - 1)
1731		>> fVolume->BlockShift();
1732	if (blocksNeeded > fVolume->FreeBlocks())
1733		return B_DEVICE_FULL;
1734
1735	off_t blocksRequested = blocksNeeded;
1736		// because of preallocations and partial allocations, the number of
1737		// blocks we need to allocate may be different from the one we request
1738		// from the block allocator
1739
1740	// Should we preallocate some blocks?
1741	// Attributes, attribute directories, and long symlinks usually won't get
1742	// that big, and should stay close to the inode - preallocating could be
1743	// counterproductive.
1744	// Also, if free disk space is tight, don't preallocate.
1745	if (!IsAttribute() && !IsAttributeDirectory() && !IsSymLink()
1746		&& fVolume->FreeBlocks() > 128) {
1747		off_t roundTo = 0;
1748		if (IsFile()) {
1749			// Request preallocated blocks depending on the file size and growth
1750			if (size < 1 * 1024 * 1024 && bytes < 512 * 1024) {
1751				// Preallocate 64 KB for file sizes <1 MB and grow rates <512 KB
1752				roundTo = 65536 >> fVolume->BlockShift();
1753			} else if (size < 32 * 1024 * 1024 && bytes <= 1 * 1024 * 1024) {
1754				// Preallocate 512 KB for file sizes between 1 MB and 32 MB, and
1755				// grow rates smaller than 1 MB
1756				roundTo = (512 * 1024) >> fVolume->BlockShift();
1757			} else {
1758				// Preallocate 1/16 of the file size (ie. 4 MB for 64 MB,
1759				// 64 MB for 1 GB)
1760				roundTo = size >> (fVolume->BlockShift() + 4);
1761			}
1762		} else if (IsIndex()) {
1763			// Always preallocate 64 KB for index directories
1764			roundTo = 65536 >> fVolume->BlockShift();
1765		} else {
1766			// Preallocate only 4 KB - directories only get trimmed when their
1767			// vnode is flushed, which might not happen very often.
1768			roundTo = 4096 >> fVolume->BlockShift();
1769		}
1770		if (roundTo > 1) {
1771			// Round to next "roundTo" block count
1772			blocksRequested = ((blocksNeeded + roundTo) / roundTo) * roundTo;
1773		}
1774	}
1775
1776	while (blocksNeeded > 0) {
1777		// the requested blocks do not need to be returned with a
1778		// single allocation, so we need to iterate until we have
1779		// enough blocks allocated
1780		if (minimum > 1) {
1781			// make sure that "blocks" is a multiple of minimum
1782			blocksRequested = round_up(blocksRequested, minimum);
1783		}
1784
1785		block_run run;
1786		status_t status = fVolume->Allocate(transaction, this, blocksRequested,
1787			run, minimum);
1788		if (status != B_OK)
1789			return status;
1790
1791		// okay, we have the needed blocks, so just distribute them to the
1792		// different ranges of the stream (direct, indirect & double indirect)
1793
1794		blocksNeeded -= run.Length();
1795		// don't preallocate if the first allocation was already too small
1796		blocksRequested = blocksNeeded;
1797
1798		// Direct block range
1799
1800		if (data->Size() <= data->MaxDirectRange()) {
1801			// let's try to put them into the direct block range
1802			int32 free = 0;
1803			for (; free < NUM_DIRECT_BLOCKS; free++) {
1804				if (data->direct[free].IsZero())
1805					break;
1806			}
1807
1808			if (free < NUM_DIRECT_BLOCKS) {
1809				// can we merge the last allocated run with the new one?
1810				int32 last = free - 1;
1811				if (free > 0 && data->direct[last].MergeableWith(run)) {
1812					data->direct[last].length = HOST_ENDIAN_TO_BFS_INT16(
1813						data->direct[last].Length() + run.Length());
1814				} else
1815					data->direct[free] = run;
1816
1817				data->max_direct_range = HOST_ENDIAN_TO_BFS_INT64(
1818					data->MaxDirectRange()
1819					+ run.Length() * fVolume->BlockSize());
1820				data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0
1821					? data->max_direct_range : size);
1822				continue;
1823			}
1824		}
1825
1826		// Indirect block range
1827
1828		if (data->Size() <= data->MaxIndirectRange()
1829			|| !data->MaxIndirectRange()) {
1830			CachedBlock cached(fVolume);
1831			block_run* runs = NULL;
1832			uint32 free = 0;
1833			off_t block;
1834
1835			// if there is no indirect block yet, create one
1836			if (data->indirect.IsZero()) {
1837				status = _AllocateBlockArray(transaction, data->indirect,
1838					NUM_ARRAY_BLOCKS, true);
1839				if (status != B_OK)
1840					return status;
1841
1842				data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(
1843					data->MaxDirectRange());
1844				// insert the block_run in the first block
1845				runs = (block_run*)cached.SetTo(data->indirect);
1846			} else {
1847				uint32 numberOfRuns = fVolume->BlockSize() / sizeof(block_run);
1848				block = fVolume->ToBlock(data->indirect);
1849
1850				// search first empty entry
1851				int32 i = 0;
1852				for (; i < data->indirect.Length(); i++) {
1853					if ((runs = (block_run*)cached.SetTo(block + i)) == NULL)
1854						return B_IO_ERROR;
1855
1856					for (free = 0; free < numberOfRuns; free++)
1857						if (runs[free].IsZero())
1858							break;
1859
1860					if (free < numberOfRuns)
1861						break;
1862				}
1863				if (i == data->indirect.Length())
1864					runs = NULL;
1865			}
1866
1867			if (runs != NULL) {
1868				// try to insert the run to the last one - note that this
1869				// doesn't take block borders into account, so it could be
1870				// further optimized
1871				cached.MakeWritable(transaction);
1872
1873				int32 last = free - 1;
1874				if (free > 0 && runs[last].MergeableWith(run)) {
1875					runs[last].length = HOST_ENDIAN_TO_BFS_INT16(
1876						runs[last].Length() + run.Length());
1877				} else
1878					runs[free] = run;
1879
1880				data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(
1881					data->MaxIndirectRange()
1882					+ ((uint32)run.Length() << fVolume->BlockShift()));
1883				data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0
1884					? data->MaxIndirectRange() : size);
1885				continue;
1886			}
1887		}
1888
1889		// Double indirect block range
1890
1891		if (data->Size() <= data->MaxDoubleIndirectRange()
1892			|| !data->max_double_indirect_range) {
1893			// We make sure here that we have this minimum allocated, so if
1894			// the allocation succeeds, we don't run into an endless loop.
1895			if (!data->max_double_indirect_range)
1896				minimum = _DoubleIndirectBlockLength();
1897			else
1898				minimum = data->double_indirect.Length();
1899
1900			if ((run.Length() % minimum) != 0) {
1901				// The number of allocated blocks isn't a multiple of 'minimum',
1902				// so we have to change this. This can happen the first time the
1903				// stream grows into the double indirect range.
1904				// First, free the remaining blocks that don't fit into this
1905				// multiple.
1906				int32 rest = run.Length() % minimum;
1907				run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - rest);
1908
1909				status = fVolume->Free(transaction,
1910					block_run::Run(run.AllocationGroup(),
1911					run.Start() + run.Length(), rest));
1912				if (status != B_OK)
1913					return status;
1914
1915				blocksNeeded += rest;
1916				blocksRequested = round_up(blocksNeeded, minimum);
1917
1918				// Are there any blocks left in the run? If not, allocate
1919				// a new one
1920				if (run.length == 0)
1921					continue;
1922			}
1923
1924			// if there is no double indirect block yet, create one
1925			if (data->double_indirect.IsZero()) {
1926				status = _AllocateBlockArray(transaction,
1927					data->double_indirect, _DoubleIndirectBlockLength());
1928				if (status != B_OK)
1929					return status;
1930
1931				data->max_double_indirect_range = data->max_indirect_range;
1932			}
1933
1934			// calculate the index where to insert the new blocks
1935
1936			int32 runsPerBlock;
1937			int32 directSize;
1938			int32 indirectSize;
1939			get_double_indirect_sizes(data->double_indirect.Length(),
1940				fVolume->BlockSize(), runsPerBlock, directSize, indirectSize);
1941
1942			off_t start = data->MaxDoubleIndirectRange()
1943				- data->MaxIndirectRange();
1944			int32 indirectIndex = start / indirectSize;
1945			int32 index = (start % indirectSize) / directSize;
1946			int32 runsPerArray = runsPerBlock * minimum;
1947
1948			// distribute the blocks to the array and allocate
1949			// new array blocks when needed
1950
1951			CachedBlock cached(fVolume);
1952			CachedBlock cachedDirect(fVolume);
1953			block_run* array = NULL;
1954			uint32 runLength = run.Length();
1955
1956			while (run.length != 0) {
1957				// get the indirect array block
1958				if (array == NULL) {
1959					uint32 block = indirectIndex / runsPerBlock;
1960					if (block >= minimum)
1961						return EFBIG;
1962
1963					array = (block_run*)cached.SetTo(fVolume->ToBlock(
1964						data->double_indirect) + block);
1965					if (array == NULL)
1966						return B_IO_ERROR;
1967				}
1968
1969				do {
1970					// do we need a new array block?
1971					if (array[indirectIndex % runsPerBlock].IsZero()) {
1972						cached.MakeWritable(transaction);
1973
1974						status = _AllocateBlockArray(transaction,
1975							array[indirectIndex % runsPerBlock],
1976							data->double_indirect.Length());
1977						if (status != B_OK)
1978							return status;
1979					}
1980
1981					block_run* runs = (block_run*)cachedDirect.SetToWritable(
1982						transaction, fVolume->ToBlock(array[indirectIndex
1983							% runsPerBlock]) + index / runsPerBlock);
1984					if (runs == NULL)
1985						return B_IO_ERROR;
1986
1987					do {
1988						// insert the block_run into the array
1989						runs[index % runsPerBlock] = run;
1990						runs[index % runsPerBlock].length
1991							= HOST_ENDIAN_TO_BFS_INT16(minimum);
1992
1993						// alter the remaining block_run
1994						run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start()
1995							+ minimum);
1996						run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length()
1997							- minimum);
1998					} while ((++index % runsPerBlock) != 0 && run.length);
1999				} while ((index % runsPerArray) != 0 && run.length);
2000
2001				if (index == runsPerArray)
2002					index = 0;
2003				if (++indirectIndex % runsPerBlock == 0) {
2004					array = NULL;
2005					index = 0;
2006				}
2007			}
2008
2009			data->max_double_indirect_range = HOST_ENDIAN_TO_BFS_INT64(
2010				data->MaxDoubleIndirectRange()
2011				+ (runLength << fVolume->BlockShift()));
2012			data->size = blocksNeeded > 0 ? HOST_ENDIAN_TO_BFS_INT64(
2013				data->max_double_indirect_range) : size;
2014
2015			continue;
2016		}
2017
2018		RETURN_ERROR(EFBIG);
2019	}
2020	// update the size of the data stream
2021	data->size = HOST_ENDIAN_TO_BFS_INT64(size);
2022
2023	return B_OK;
2024}
2025
2026
2027size_t
2028Inode::_DoubleIndirectBlockLength() const
2029{
2030	if (fVolume->BlockSize() > DOUBLE_INDIRECT_ARRAY_SIZE)
2031		return 1;
2032
2033	return DOUBLE_INDIRECT_ARRAY_SIZE / fVolume->BlockSize();
2034}
2035
2036
2037/*!	Frees the statically sized array of the double indirect part of a data
2038	stream.
2039*/
2040status_t
2041Inode::_FreeStaticStreamArray(Transaction& transaction, int32 level,
2042	block_run run, off_t size, off_t offset, off_t& max)
2043{
2044	int32 indirectSize;
2045	if (level == 0) {
2046		indirectSize = double_indirect_max_indirect_size(run.Length(),
2047			fVolume->BlockSize());
2048	} else {
2049		indirectSize = double_indirect_max_direct_size(run.Length(),
2050			fVolume->BlockSize());
2051	}
2052
2053	off_t start;
2054	if (size > offset)
2055		start = size - offset;
2056	else
2057		start = 0;
2058
2059	int32 index = start / indirectSize;
2060	int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
2061
2062	CachedBlock cached(fVolume);
2063	off_t blockNumber = fVolume->ToBlock(run);
2064
2065	// set the file offset to the current block run
2066	offset += (off_t)index * indirectSize;
2067
2068	for (int32 i = index / runsPerBlock; i < run.Length(); i++) {
2069		block_run* array = (block_run*)cached.SetToWritable(transaction,
2070			blockNumber + i);
2071		if (array == NULL)
2072			RETURN_ERROR(B_ERROR);
2073
2074		for (index = index % runsPerBlock; index < runsPerBlock; index++) {
2075			if (array[index].IsZero()) {
2076				// we also want to break out of the outer loop
2077				i = run.Length();
2078				break;
2079			}
2080
2081			status_t status = B_OK;
2082			if (level == 0) {
2083				status = _FreeStaticStreamArray(transaction, 1, array[index],
2084					size, offset, max);
2085			} else if (offset >= size)
2086				status = fVolume->Free(transaction, array[index]);
2087			else
2088				max = HOST_ENDIAN_TO_BFS_INT64(offset + indirectSize);
2089
2090			if (status < B_OK)
2091				RETURN_ERROR(status);
2092
2093			if (offset >= size)
2094				array[index].SetTo(0, 0, 0);
2095
2096			offset += indirectSize;
2097		}
2098		index = 0;
2099	}
2100	return B_OK;
2101}
2102
2103
2104/*!	Frees all block_runs in the array which come after the specified size.
2105	It also trims the last block_run that contain the size.
2106	"offset" and "max" are maintained until the last block_run that doesn't
2107	have to be freed - after this, the values won't be correct anymore, but
2108	will still assure correct function for all subsequent calls.
2109	"max" is considered to be in file system byte order.
2110*/
2111status_t
2112Inode::_FreeStreamArray(Transaction& transaction, block_run* array,
2113	uint32 arrayLength, off_t size, off_t& offset, off_t& max)
2114{
2115	PRINT(("FreeStreamArray: arrayLength %lu, size %Ld, offset %Ld, max %Ld\n",
2116		arrayLength, size, offset, max));
2117
2118	off_t newOffset = offset;
2119	uint32 i = 0;
2120	for (; i < arrayLength; i++, offset = newOffset) {
2121		if (array[i].IsZero())
2122			break;
2123
2124		newOffset += (off_t)array[i].Length() << fVolume->BlockShift();
2125		if (newOffset <= size)
2126			continue;
2127
2128		block_run run = array[i];
2129
2130		// determine the block_run to be freed
2131		if (newOffset > size && offset < size) {
2132			// free partial block_run (and update the original block_run)
2133			run.start = HOST_ENDIAN_TO_BFS_INT16(array[i].Start()
2134				+ ((size + fVolume->BlockSize() - 1 - offset)
2135					>> fVolume->BlockShift()));
2136				// round to next block
2137			array[i].length = HOST_ENDIAN_TO_BFS_INT16(run.Start()
2138				- array[i].Start());
2139			run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length()
2140				- array[i].Length());
2141
2142			if (run.length == 0)
2143				continue;
2144
2145			// update maximum range
2146			max = HOST_ENDIAN_TO_BFS_INT64(offset + ((off_t)array[i].Length()
2147				<< fVolume->BlockShift()));
2148		} else {
2149			// free the whole block_run
2150			array[i].SetTo(0, 0, 0);
2151
2152			if ((off_t)BFS_ENDIAN_TO_HOST_INT64(max) > offset)
2153				max = HOST_ENDIAN_TO_BFS_INT64(offset);
2154		}
2155
2156		if (fVolume->Free(transaction, run) < B_OK)
2157			return B_IO_ERROR;
2158	}
2159	return B_OK;
2160}
2161
2162
2163status_t
2164Inode::_ShrinkStream(Transaction& transaction, off_t size)
2165{
2166	data_stream* data = &Node().data;
2167	status_t status;
2168
2169	if (data->MaxDoubleIndirectRange() > size) {
2170		off_t* maxDoubleIndirect = &data->max_double_indirect_range;
2171			// gcc 4 work-around: "error: cannot bind packed field
2172			// 'data->data_stream::max_double_indirect_range' to 'off_t&'"
2173		status = _FreeStaticStreamArray(transaction, 0, data->double_indirect,
2174			size, data->MaxIndirectRange(), *maxDoubleIndirect);
2175		if (status != B_OK)
2176			return status;
2177
2178		if (size <= data->MaxIndirectRange()) {
2179			fVolume->Free(transaction, data->double_indirect);
2180			data->double_indirect.SetTo(0, 0, 0);
2181			data->max_double_indirect_range = 0;
2182		}
2183	}
2184
2185	if (data->MaxIndirectRange() > size) {
2186		CachedBlock cached(fVolume);
2187		off_t block = fVolume->ToBlock(data->indirect);
2188		off_t offset = data->MaxDirectRange();
2189
2190		for (int32 i = 0; i < data->indirect.Length(); i++) {
2191			block_run* array = (block_run*)cached.SetToWritable(transaction,
2192				block + i);
2193			if (array == NULL)
2194				break;
2195
2196			off_t* maxIndirect = &data->max_indirect_range;
2197				// gcc 4 work-around: "error: cannot bind packed field
2198				// 'data->data_stream::max_indirect_range' to 'off_t&'"
2199			if (_FreeStreamArray(transaction, array, fVolume->BlockSize()
2200					/ sizeof(block_run), size, offset, *maxIndirect) != B_OK)
2201				return B_IO_ERROR;
2202		}
2203		if (data->max_direct_range == data->max_indirect_range) {
2204			fVolume->Free(transaction, data->indirect);
2205			data->indirect.SetTo(0, 0, 0);
2206			data->max_indirect_range = 0;
2207		}
2208	}
2209
2210	if (data->MaxDirectRange() > size) {
2211		off_t offset = 0;
2212		off_t *maxDirect = &data->max_direct_range;
2213			// gcc 4 work-around: "error: cannot bind packed field
2214			// 'data->data_stream::max_direct_range' to 'off_t&'"
2215		status = _FreeStreamArray(transaction, data->direct, NUM_DIRECT_BLOCKS,
2216			size, offset, *maxDirect);
2217		if (status < B_OK)
2218			return status;
2219	}
2220
2221	data->size = HOST_ENDIAN_TO_BFS_INT64(size);
2222	return B_OK;
2223}
2224
2225
2226status_t
2227Inode::SetFileSize(Transaction& transaction, off_t size)
2228{
2229	if (size < 0)
2230		return B_BAD_VALUE;
2231
2232	off_t oldSize = Size();
2233
2234	if (size == oldSize)
2235		return B_OK;
2236
2237	T(Resize(this, oldSize, size, false));
2238
2239	// should the data stream grow or shrink?
2240	status_t status;
2241	if (size > oldSize) {
2242		status = _GrowStream(transaction, size);
2243		if (status < B_OK) {
2244			// if the growing of the stream fails, the whole operation
2245			// fails, so we should shrink the stream to its former size
2246			_ShrinkStream(transaction, oldSize);
2247		}
2248	} else
2249		status = _ShrinkStream(transaction, size);
2250
2251	if (status < B_OK)
2252		return status;
2253
2254	file_cache_set_size(FileCache(), size);
2255	file_map_set_size(Map(), size);
2256
2257	return WriteBack(transaction);
2258}
2259
2260
2261status_t
2262Inode::Append(Transaction& transaction, off_t bytes)
2263{
2264	return SetFileSize(transaction, Size() + bytes);
2265}
2266
2267
2268/*!	Checks whether or not this inode's data stream needs to be trimmed
2269	because of an earlier preallocation.
2270	Returns true if there are any blocks to be trimmed.
2271*/
2272bool
2273Inode::NeedsTrimming() const
2274{
2275	// We never trim preallocated index blocks to make them grow as smooth as
2276	// possible. There are only few indices anyway, so this doesn't hurt.
2277	// Also, if an inode is already in deleted state, we don't bother trimming
2278	// it.
2279	if (IsIndex() || IsDeleted()
2280		|| (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0))
2281		return false;
2282
2283	off_t roundedSize = round_up(Size(), fVolume->BlockSize());
2284
2285	return Node().data.MaxDirectRange() > roundedSize
2286		|| Node().data.MaxIndirectRange() > roundedSize
2287		|| Node().data.MaxDoubleIndirectRange() > roundedSize;
2288}
2289
2290
2291status_t
2292Inode::TrimPreallocation(Transaction& transaction)
2293{
2294	T(Resize(this, max_c(Node().data.MaxDirectRange(),
2295		Node().data.MaxIndirectRange()), Size(), true));
2296
2297	status_t status = _ShrinkStream(transaction, Size());
2298	if (status < B_OK)
2299		return status;
2300
2301	return WriteBack(transaction);
2302}
2303
2304
2305//!	Frees the file's data stream and removes all attributes
2306status_t
2307Inode::Free(Transaction& transaction)
2308{
2309	FUNCTION();
2310
2311	// Perhaps there should be an implementation of Inode::ShrinkStream() that
2312	// just frees the data_stream, but doesn't change the inode (since it is
2313	// freed anyway) - that would make an undelete command possible
2314	if (!IsSymLink() || (Flags() & INODE_LONG_SYMLINK) != 0) {
2315		status_t status = SetFileSize(transaction, 0);
2316		if (status < B_OK)
2317			return status;
2318	}
2319
2320	// Free all attributes, and remove their indices
2321	{
2322		// We have to limit the scope of AttributeIterator, so that its
2323		// destructor is not called after the inode is deleted
2324		AttributeIterator iterator(this);
2325
2326		char name[B_FILE_NAME_LENGTH];
2327		uint32 type;
2328		size_t length;
2329		ino_t id;
2330		while (iterator.GetNext(name, &length, &type, &id) == B_OK) {
2331			RemoveAttribute(transaction, name);
2332		}
2333	}
2334
2335	if (WriteBack(transaction) < B_OK)
2336		return B_IO_ERROR;
2337
2338	return fVolume->Free(transaction, BlockRun());
2339}
2340
2341
2342//!	Synchronizes (writes back to disk) the file stream of the inode.
2343status_t
2344Inode::Sync()
2345{
2346	if (FileCache())
2347		return file_cache_sync(FileCache());
2348
2349	// We may also want to flush the attribute's data stream to
2350	// disk here... (do we?)
2351
2352	if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0)
2353		return B_OK;
2354
2355	InodeReadLocker locker(this);
2356
2357	data_stream* data = &Node().data;
2358	status_t status = B_OK;
2359
2360	// flush direct range
2361
2362	for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) {
2363		if (data->direct[i].IsZero())
2364			return B_OK;
2365
2366		status = block_cache_sync_etc(fVolume->BlockCache(),
2367			fVolume->ToBlock(data->direct[i]), data->direct[i].Length());
2368		if (status != B_OK)
2369			return status;
2370	}
2371
2372	// flush indirect range
2373
2374	if (data->max_indirect_range == 0)
2375		return B_OK;
2376
2377	CachedBlock cached(fVolume);
2378	off_t block = fVolume->ToBlock(data->indirect);
2379	int32 count = fVolume->BlockSize() / sizeof(block_run);
2380
2381	for (int32 j = 0; j < data->indirect.Length(); j++) {
2382		block_run* runs = (block_run*)cached.SetTo(block + j);
2383		if (runs == NULL)
2384			break;
2385
2386		for (int32 i = 0; i < count; i++) {
2387			if (runs[i].IsZero())
2388				return B_OK;
2389
2390			status = block_cache_sync_etc(fVolume->BlockCache(),
2391				fVolume->ToBlock(runs[i]), runs[i].Length());
2392			if (status != B_OK)
2393				return status;
2394		}
2395	}
2396
2397	// flush double indirect range
2398
2399	if (data->max_double_indirect_range == 0)
2400		return B_OK;
2401
2402	off_t indirectBlock = fVolume->ToBlock(data->double_indirect);
2403
2404	for (int32 l = 0; l < data->double_indirect.Length(); l++) {
2405		block_run* indirectRuns = (block_run*)cached.SetTo(indirectBlock + l);
2406		if (indirectRuns == NULL)
2407			return B_FILE_ERROR;
2408
2409		CachedBlock directCached(fVolume);
2410
2411		for (int32 k = 0; k < count; k++) {
2412			if (indirectRuns[k].IsZero())
2413				return B_OK;
2414
2415			block = fVolume->ToBlock(indirectRuns[k]);
2416			for (int32 j = 0; j < indirectRuns[k].Length(); j++) {
2417				block_run* runs = (block_run*)directCached.SetTo(block + j);
2418				if (runs == NULL)
2419					return B_FILE_ERROR;
2420
2421				for (int32 i = 0; i < count; i++) {
2422					if (runs[i].IsZero())
2423						return B_OK;
2424
2425					// TODO: combine single block_runs to bigger ones when
2426					// they are adjacent
2427					status = block_cache_sync_etc(fVolume->BlockCache(),
2428						fVolume->ToBlock(runs[i]), runs[i].Length());
2429					if (status != B_OK)
2430						return status;
2431				}
2432			}
2433		}
2434	}
2435	return B_OK;
2436}
2437
2438
2439//	#pragma mark - TransactionListener implementation
2440
2441
2442void
2443Inode::TransactionDone(bool success)
2444{
2445	if (!success) {
2446		// revert any changes made to the cached bfs_inode
2447		UpdateNodeFromDisk();
2448	}
2449}
2450
2451
2452void
2453Inode::RemovedFromTransaction()
2454{
2455	Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_TRANSACTION);
2456
2457	// See AddInode() why we do this here
2458	if ((Flags() & INODE_DELETED) != 0)
2459		fVolume->RemovedInodes().Add(this);
2460
2461	rw_lock_write_unlock(&Lock());
2462
2463	if (!fVolume->IsInitializing())
2464		put_vnode(fVolume->FSVolume(), ID());
2465}
2466
2467
2468//	#pragma mark - creation/deletion
2469
2470
2471status_t
2472Inode::Remove(Transaction& transaction, const char* name, ino_t* _id,
2473	bool isDirectory, bool force)
2474{
2475	if (fTree == NULL)
2476		RETURN_ERROR(B_BAD_VALUE);
2477
2478	WriteLockInTransaction(transaction);
2479
2480	// does the file even exist?
2481	off_t id;
2482	if (fTree->Find((uint8*)name, (uint16)strlen(name), &id) < B_OK)
2483		return B_ENTRY_NOT_FOUND;
2484
2485	if (_id)
2486		*_id = id;
2487
2488	Vnode vnode(fVolume, id);
2489	Inode* inode;
2490	status_t status = vnode.Get(&inode);
2491	if (status < B_OK) {
2492		REPORT_ERROR(status);
2493		return B_ENTRY_NOT_FOUND;
2494	}
2495
2496	T(Remove(inode, name));
2497	inode->WriteLockInTransaction(transaction);
2498
2499	// Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR
2500	// bit is set for indices in BFS, not for attribute directories) - but you
2501	// should really be able to do whatever you want with your indices
2502	// without having to remove all files first :)
2503	if (!inode->IsIndex() && !force) {
2504		// if it's not of the correct type, don't delete it!
2505		if (inode->IsContainer() != isDirectory)
2506			return isDirectory ? B_NOT_A_DIRECTORY : B_IS_A_DIRECTORY;
2507
2508		// only delete empty directories
2509		if (isDirectory && !inode->IsEmpty())
2510			return B_DIRECTORY_NOT_EMPTY;
2511	}
2512
2513	// remove_vnode() allows the inode to be accessed until the last put_vnode()
2514	status = remove_vnode(fVolume->FSVolume(), id);
2515	if (status != B_OK)
2516		return status;
2517
2518	if (fTree->Remove(transaction, name, id) != B_OK && !force) {
2519		unremove_vnode(fVolume->FSVolume(), id);
2520		RETURN_ERROR(B_ERROR);
2521	}
2522
2523#ifdef DEBUG
2524	if (fTree->Find((uint8*)name, (uint16)strlen(name), &id) == B_OK) {
2525		DIE(("deleted entry still there"));
2526	}
2527#endif
2528
2529	ContainerContentsChanged(transaction);
2530
2531	// update the inode, so that no one will ever doubt it's deleted :-)
2532	inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
2533	inode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
2534
2535	// In balance to the Inode::Create() method, the main indices
2536	// are updated here (name, size, & last_modified)
2537
2538	Index index(fVolume);
2539	if (inode->InNameIndex()) {
2540		index.RemoveName(transaction, name, inode);
2541			// If removing from the index fails, it is not regarded as a
2542			// fatal error and will not be reported back!
2543			// Deleted inodes won't be visible in queries anyway.
2544	}
2545
2546	if (inode->InSizeIndex())
2547		index.RemoveSize(transaction, inode);
2548	if (inode->InLastModifiedIndex())
2549		index.RemoveLastModified(transaction, inode);
2550
2551	return inode->WriteBack(transaction);
2552}
2553
2554
2555/*!	Creates the inode with the specified \a parent directory, and automatically
2556	adds the created inode to that parent directory. If an attribute directory
2557	is created, it will also automatically  be added to the \a parent inode as
2558	such. However, the indices root node, and the regular root node won't be
2559	added to the superblock.
2560	It will also create the initial B+tree for the inode if it's a directory
2561	of any kind.
2562	\a name may be \c NULL, but only if no \a parent is given.
2563	If the "_id" or "_inode" variable is given and non-NULL to store the
2564	inode's ID, the inode stays locked - you have to call put_vnode() if you
2565	don't use it anymore.
2566	If the node already exists, this method will fail if \c O_EXCL is set, or
2567	it's a directory or a symlink. Otherwise, it will just be returned.
2568	If \c O_TRUNC has been specified, the file will also be truncated.
2569*/
2570status_t
2571Inode::Create(Transaction& transaction, Inode* parent, const char* name,
2572	int32 mode, int openMode, uint32 type, bool* _created, ino_t* _id,
2573	Inode** _inode, fs_vnode_ops* vnodeOps, uint32 publishFlags)
2574{
2575	FUNCTION_START(("name = %s, mode = %ld\n", name, mode));
2576
2577	block_run parentRun = parent ? parent->BlockRun() : block_run::Run(0, 0, 0);
2578	Volume* volume = transaction.GetVolume();
2579	BPlusTree* tree = NULL;
2580
2581	if (parent != NULL && (mode & S_ATTR_DIR) == 0 && parent->IsContainer()) {
2582		// check if the file already exists in the directory
2583		tree = parent->Tree();
2584	}
2585
2586	if (parent != NULL) {
2587		// the parent directory is locked during the whole inode creation
2588		parent->WriteLockInTransaction(transaction);
2589	}
2590
2591	if (parent != NULL && !volume->IsInitializing() && parent->IsContainer()) {
2592		// don't create anything in removed directories
2593		bool removed;
2594		if (get_vnode_removed(volume->FSVolume(), parent->ID(), &removed)
2595				== B_OK && removed) {
2596			RETURN_ERROR(B_ENTRY_NOT_FOUND);
2597		}
2598	}
2599
2600	if (tree != NULL) {
2601		// Does the file already exist?
2602		off_t offset;
2603		if (tree->Find((uint8*)name, (uint16)strlen(name), &offset) == B_OK) {
2604			// Return if the file should be a directory/link or opened in
2605			// exclusive mode
2606			if (S_ISDIR(mode) || S_ISLNK(mode) || (openMode & O_EXCL) != 0)
2607				return B_FILE_EXISTS;
2608
2609			Vnode vnode(volume, offset);
2610			Inode* inode;
2611			status_t status = vnode.Get(&inode);
2612			if (status != B_OK) {
2613				REPORT_ERROR(status);
2614				return B_ENTRY_NOT_FOUND;
2615			}
2616
2617			if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
2618				return B_IS_A_DIRECTORY;
2619			if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
2620				return B_NOT_A_DIRECTORY;
2621
2622			// we want to open the file, so we should have the rights to do so
2623			if (inode->CheckPermissions(open_mode_to_access(openMode)
2624					| ((openMode & O_TRUNC) != 0 ? W_OK : 0)) != B_OK)
2625				return B_NOT_ALLOWED;
2626
2627			if ((openMode & O_TRUNC) != 0) {
2628				// truncate the existing file
2629				inode->WriteLockInTransaction(transaction);
2630
2631				status_t status = inode->SetFileSize(transaction, 0);
2632				if (status == B_OK)
2633					status = inode->WriteBack(transaction);
2634
2635				if (status != B_OK)
2636					return status;
2637			}
2638
2639			if (_created)
2640				*_created = false;
2641			if (_id)
2642				*_id = inode->ID();
2643			if (_inode)
2644				*_inode = inode;
2645
2646			// Only keep the vnode in memory if the _id or _inode pointer is
2647			// provided
2648			if (_id != NULL || _inode != NULL)
2649				vnode.Keep();
2650
2651			return B_OK;
2652		}
2653	} else if (parent != NULL && (mode & S_ATTR_DIR) == 0) {
2654		return B_BAD_VALUE;
2655	} else if ((openMode & O_DIRECTORY) != 0) {
2656		// TODO: we might need to return B_NOT_A_DIRECTORY here
2657		return B_ENTRY_NOT_FOUND;
2658	}
2659
2660	status_t status;
2661
2662	// do we have the power to create new files at all?
2663	if (parent != NULL && (status = parent->CheckPermissions(W_OK)) != B_OK)
2664		RETURN_ERROR(status);
2665
2666	// allocate space for the new inode
2667	InodeAllocator allocator(transaction);
2668	block_run run;
2669	Inode* inode;
2670	status = allocator.New(&parentRun, mode, run, vnodeOps, &inode);
2671	if (status < B_OK)
2672		return status;
2673
2674	T(Create(inode, parent, name, mode, openMode, type));
2675
2676	// Initialize the parts of the bfs_inode structure that
2677	// InodeAllocator::New() hasn't touched yet
2678
2679	bfs_inode* node = &inode->Node();
2680
2681	if (parent == NULL) {
2682		// we set the parent to itself in this case
2683		// (only happens for the root and indices node)
2684		node->parent = run;
2685	} else
2686		node->parent = parentRun;
2687
2688	node->uid = HOST_ENDIAN_TO_BFS_INT32(geteuid());
2689	node->gid = HOST_ENDIAN_TO_BFS_INT32(parent
2690		? parent->Node().GroupID() : getegid());
2691		// the group ID is inherited from the parent, if available
2692
2693	node->type = HOST_ENDIAN_TO_BFS_INT32(type);
2694
2695	inode->WriteBack(transaction);
2696		// make sure the initialized node is available to others
2697
2698	// only add the name to regular files, directories, or symlinks
2699	// don't add it to attributes, or indices
2700	if (tree && inode->IsRegularNode()
2701		&& inode->SetName(transaction, name) != B_OK)
2702		return B_ERROR;
2703
2704	// Initialize b+tree if it's a directory (and add "." & ".." if it's
2705	// a standard directory for files - not for attributes or indices)
2706	if (inode->IsContainer()) {
2707		status = allocator.CreateTree();
2708		if (status != B_OK)
2709			return status;
2710	}
2711
2712	// Add a link to the inode from the parent, depending on its type
2713	// (the vnode is not published yet, so it is safe to make the inode
2714	// accessable to the file system here)
2715	if (tree != NULL) {
2716		status = tree->Insert(transaction, name, inode->ID());
2717	} else if (parent != NULL && (mode & S_ATTR_DIR) != 0) {
2718		parent->Attributes() = run;
2719		status = parent->WriteBack(transaction);
2720	}
2721
2722	// Note, we only care if the inode could be made accessable for the
2723	// two cases above; the root node or the indices root node must
2724	// handle this case on their own (or other cases where "parent" is
2725	// NULL)
2726	if (status != B_OK)
2727		RETURN_ERROR(status);
2728
2729	// Update the main indices (name, size & last_modified)
2730	// (live queries might want to access us after this)
2731
2732	Index index(volume);
2733	if (inode->InNameIndex() && name != NULL) {
2734		// the name index only contains regular files
2735		// (but not the root node where name == NULL)
2736		status = index.InsertName(transaction, name, inode);
2737		if (status != B_OK && status != B_BAD_INDEX) {
2738			// We have to remove the node from the parent at this point,
2739			// because the InodeAllocator destructor can't handle this
2740			// case (and if it fails, we can't do anything about it...)
2741			if (tree)
2742				tree->Remove(transaction, name, inode->ID());
2743			else if (parent != NULL && (mode & S_ATTR_DIR) != 0)
2744				parent->Node().attributes.SetTo(0, 0, 0);
2745
2746			RETURN_ERROR(status);
2747		}
2748	}
2749
2750	if (parent != NULL && parent->IsContainer())
2751		parent->ContainerContentsChanged(transaction);
2752
2753	inode->UpdateOldLastModified();
2754
2755	// The "size" & "last_modified" indices don't contain directories.
2756	// If adding to these indices fails, the inode creation will not be
2757	// harmed; they are considered less important than the "name" index.
2758	if (inode->InSizeIndex())
2759		index.InsertSize(transaction, inode);
2760	if (inode->InLastModifiedIndex())
2761		index.InsertLastModified(transaction, inode);
2762
2763	if (inode->NeedsFileCache()) {
2764		inode->SetFileCache(file_cache_create(volume->ID(), inode->ID(),
2765			inode->Size()));
2766		inode->SetMap(file_map_create(volume->ID(), inode->ID(),
2767			inode->Size()));
2768
2769		if (inode->FileCache() == NULL || inode->Map() == NULL)
2770			return B_NO_MEMORY;
2771	}
2772
2773	// Everything worked well until this point, we have a fully
2774	// initialized inode, and we want to keep it
2775	allocator.Keep(vnodeOps, publishFlags);
2776
2777	if (_created)
2778		*_created = true;
2779	if (_id != NULL)
2780		*_id = inode->ID();
2781	if (_inode != NULL)
2782		*_inode = inode;
2783
2784	// if either _id or _inode is passed, we will keep the inode locked
2785	if (_id == NULL && _inode == NULL)
2786		put_vnode(volume->FSVolume(), inode->ID());
2787
2788	return B_OK;
2789}
2790
2791
2792//	#pragma mark - AttributeIterator
2793
2794
2795AttributeIterator::AttributeIterator(Inode* inode)
2796	:
2797	fCurrentSmallData(0),
2798	fInode(inode),
2799	fAttributes(NULL),
2800	fIterator(NULL),
2801	fBuffer(NULL)
2802{
2803	inode->_AddIterator(this);
2804}
2805
2806
2807AttributeIterator::~AttributeIterator()
2808{
2809	if (fAttributes)
2810		put_vnode(fAttributes->GetVolume()->FSVolume(), fAttributes->ID());
2811
2812	delete fIterator;
2813	fInode->_RemoveIterator(this);
2814}
2815
2816
2817status_t
2818AttributeIterator::Rewind()
2819{
2820	fCurrentSmallData = 0;
2821
2822	if (fIterator != NULL)
2823		fIterator->Rewind();
2824
2825	return B_OK;
2826}
2827
2828
2829status_t
2830AttributeIterator::GetNext(char* name, size_t* _length, uint32* _type,
2831	ino_t* _id)
2832{
2833	// read attributes out of the small data section
2834
2835	if (fCurrentSmallData >= 0) {
2836		NodeGetter nodeGetter(fInode->GetVolume(), fInode);
2837		const bfs_inode* node = nodeGetter.Node();
2838		const small_data* item = ((bfs_inode*)node)->SmallDataStart();
2839
2840		RecursiveLocker _(&fInode->SmallDataLock());
2841
2842		int32 index = 0;
2843		for (; !item->IsLast(node); item = item->Next(), index++) {
2844			if (item->NameSize() == FILE_NAME_NAME_LENGTH
2845				&& *item->Name() == FILE_NAME_NAME)
2846				continue;
2847
2848			if (index >= fCurrentSmallData)
2849				break;
2850		}
2851
2852		if (!item->IsLast(node)) {
2853			strncpy(name, item->Name(), B_FILE_NAME_LENGTH);
2854			*_type = item->Type();
2855			*_length = item->NameSize();
2856			*_id = (ino_t)index;
2857
2858			fCurrentSmallData = index + 1;
2859			return B_OK;
2860		}
2861
2862		// stop traversing the small_data section
2863		fCurrentSmallData = -1;
2864	}
2865
2866	// read attributes out of the attribute directory
2867
2868	if (fInode->Attributes().IsZero())
2869		return B_ENTRY_NOT_FOUND;
2870
2871	Volume* volume = fInode->GetVolume();
2872
2873	// if you haven't yet access to the attributes directory, get it
2874	if (fAttributes == NULL) {
2875		if (get_vnode(volume->FSVolume(), volume->ToVnode(fInode->Attributes()),
2876				(void**)&fAttributes) != B_OK) {
2877			FATAL(("get_vnode() failed in AttributeIterator::GetNext(ino_t"
2878				" = %" B_PRIdINO ",name = \"%s\")\n", fInode->ID(), name));
2879			return B_ENTRY_NOT_FOUND;
2880		}
2881
2882		BPlusTree* tree = fAttributes->Tree();
2883		if (tree == NULL
2884			|| (fIterator = new TreeIterator(tree)) == NULL) {
2885			FATAL(("could not get tree in AttributeIterator::GetNext(ino_t"
2886				" = %" B_PRIdINO ",name = \"%s\")\n", fInode->ID(), name));
2887			return B_ENTRY_NOT_FOUND;
2888		}
2889	}
2890
2891	uint16 length;
2892	ino_t id;
2893	status_t status = fIterator->GetNextEntry(name, &length,
2894		B_FILE_NAME_LENGTH, &id);
2895	if (status < B_OK)
2896		return status;
2897
2898	Vnode vnode(volume, id);
2899	Inode* attribute;
2900	if ((status = vnode.Get(&attribute)) == B_OK) {
2901		*_type = attribute->Type();
2902		*_length = length;
2903		*_id = id;
2904	}
2905
2906	return status;
2907}
2908
2909
2910void
2911AttributeIterator::Update(uint16 index, int8 change)
2912{
2913	// fCurrentSmallData points already to the next item
2914	if (index < fCurrentSmallData)
2915		fCurrentSmallData += change;
2916}
2917
2918