1/*
2 * Copyright 2001-2015, Axel D��rfler, axeld@pinc-software.de.
3 * This file may be used under the terms of the MIT License.
4 */
5#ifndef B_PLUS_TREE_H
6#define B_PLUS_TREE_H
7
8
9#include "bfs.h"
10
11#include "Utility.h"
12
13#if !_BOOT_MODE
14#include "Journal.h"
15class Inode;
16#else
17#define Inode BFS::Stream
18
19namespace BFS {
20
21class Stream;
22class Transaction;
23class TransactionListener {};
24#endif // _BOOT_MODE
25
26//	#pragma mark - on-disk structures
27
28struct bplustree_node;
29
30#define BPLUSTREE_NULL			-1LL
31#define BPLUSTREE_FREE			-2LL
32
33struct bplustree_header {
34	uint32		magic;
35	uint32		node_size;
36	uint32		max_number_of_levels;
37	uint32		data_type;
38	int64		root_node_pointer;
39	int64		free_node_pointer;
40	int64		maximum_size;
41
42	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
43	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
44	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
45	off_t RootNode() const
46		{ return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
47	off_t FreeNode() const
48		{ return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
49	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
50	uint32 MaxNumberOfLevels() const
51		{ return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
52
53	inline bool CheckNode(const bplustree_node* node) const;
54	inline bool IsValidLink(off_t link) const;
55	bool IsValid() const;
56} _PACKED;
57
58#define BPLUSTREE_MAGIC 			0x69f6c2e8
59#define BPLUSTREE_NODE_SIZE 		1024
60#define BPLUSTREE_MAX_KEY_LENGTH	256
61#define BPLUSTREE_MIN_KEY_LENGTH	1
62
63enum bplustree_types {
64	BPLUSTREE_STRING_TYPE	= 0,
65	BPLUSTREE_INT32_TYPE	= 1,
66	BPLUSTREE_UINT32_TYPE	= 2,
67	BPLUSTREE_INT64_TYPE	= 3,
68	BPLUSTREE_UINT64_TYPE	= 4,
69	BPLUSTREE_FLOAT_TYPE	= 5,
70	BPLUSTREE_DOUBLE_TYPE	= 6
71};
72
73
74struct duplicate_array;
75
76
77template <typename T>
78struct  __attribute__((packed)) Unaligned {
79	T value;
80
81	Unaligned<T>& operator=(const T& newValue)
82	{
83		value = newValue; return *this;
84	}
85	operator T() const { return value; }
86};
87
88
89struct bplustree_node {
90			int64				left_link;
91			int64				right_link;
92			int64				overflow_link;
93			uint16				all_key_count;
94			uint16				all_key_length;
95
96			off_t				LeftLink() const
97									{ return BFS_ENDIAN_TO_HOST_INT64(
98										left_link); }
99			off_t				RightLink() const
100									{ return BFS_ENDIAN_TO_HOST_INT64(
101										right_link); }
102			off_t				OverflowLink() const
103									{ return BFS_ENDIAN_TO_HOST_INT64(
104										overflow_link); }
105			uint16				NumKeys() const
106									{ return BFS_ENDIAN_TO_HOST_INT16(
107										all_key_count); }
108			uint16				AllKeyLength() const
109									{ return BFS_ENDIAN_TO_HOST_INT16(
110										all_key_length); }
111
112	inline	Unaligned<uint16>*	KeyLengths() const;
113	inline	Unaligned<off_t>*   Values() const;
114	inline	uint8*				Keys() const;
115	inline	int32				Used() const;
116			uint8*				KeyAt(int32 index, uint16* keyLength) const;
117
118	inline	bool				IsLeaf() const;
119
120			void				Initialize();
121			uint8				CountDuplicates(off_t offset,
122									bool isFragment) const;
123			off_t				DuplicateAt(off_t offset, bool isFragment,
124									int8 index) const;
125			uint32				FragmentsUsed(uint32 nodeSize) const;
126	inline	duplicate_array*	FragmentAt(int8 index) const;
127	inline	duplicate_array*	DuplicateArray() const;
128
129	static inline uint8			LinkType(off_t link);
130	static inline off_t			MakeLink(uint8 type, off_t link,
131									uint32 fragmentIndex = 0);
132	static inline bool			IsDuplicate(off_t link);
133	static inline off_t			FragmentOffset(off_t link);
134	static inline uint32		FragmentIndex(off_t link);
135	static inline uint32		MaxFragments(uint32 nodeSize);
136
137			status_t			CheckIntegrity(uint32 nodeSize) const;
138} _PACKED;
139
140//#define BPLUSTREE_NODE 0
141#define BPLUSTREE_DUPLICATE_NODE 2
142#define BPLUSTREE_DUPLICATE_FRAGMENT 3
143
144#define NUM_FRAGMENT_VALUES 7
145#define NUM_DUPLICATE_VALUES 125
146
147//**************************************
148
149enum bplustree_traversing {
150	BPLUSTREE_FORWARD = 1,
151	BPLUSTREE_BACKWARD = -1,
152
153	BPLUSTREE_BEGIN = 0,
154	BPLUSTREE_END = 1
155};
156
157
158//	#pragma mark - in-memory structures
159
160
161class BPlusTree;
162struct TreeCheck;
163class TreeIterator;
164
165
166#if !_BOOT_MODE
167// needed for searching (utilizing a stack)
168struct node_and_key {
169	off_t	nodeOffset;
170	uint16	keyIndex;
171};
172#endif // !_BOOT_MODE
173
174
175class CachedNode {
176public:
177	CachedNode(BPlusTree* tree)
178		:
179		fTree(tree),
180		fNode(NULL),
181		fOffset(0),
182		fBlockNumber(0),
183		fWritable(false)
184	{
185#if _BOOT_MODE
186		fBlock = NULL;
187#endif
188	}
189
190	CachedNode(BPlusTree* tree, off_t offset, bool check = true)
191		:
192		fTree(tree),
193		fNode(NULL),
194		fOffset(0),
195		fBlockNumber(0),
196		fWritable(false)
197	{
198#if _BOOT_MODE
199		fBlock = NULL;
200#endif
201		SetTo(offset, check);
202	}
203
204	~CachedNode()
205	{
206		Unset();
207#if _BOOT_MODE
208		free(fBlock);
209#endif
210	}
211
212			const bplustree_node* SetTo(off_t offset, bool check = true);
213			status_t			SetTo(off_t offset,
214									const bplustree_node** _node,
215									bool check = true);
216
217			const bplustree_header* SetToHeader();
218			void				Unset();
219
220#if !_BOOT_MODE
221			bplustree_node*		SetToWritable(Transaction& transaction,
222									off_t offset, bool check = true);
223			bplustree_node*		MakeWritable(Transaction& transaction);
224			bplustree_header*	SetToWritableHeader(Transaction& transaction);
225
226			void				UnsetUnchanged(Transaction& transaction);
227
228			status_t			Free(Transaction& transaction, off_t offset);
229			status_t			Allocate(Transaction& transaction,
230									bplustree_node** _node, off_t* _offset);
231#endif // !_BOOT_MODE
232
233			bool				IsWritable() const { return fWritable; }
234			bplustree_node*		Node() const { return fNode; }
235
236protected:
237			bplustree_node*		InternalSetTo(Transaction* transaction,
238									off_t offset);
239
240			BPlusTree*			fTree;
241			bplustree_node*		fNode;
242			off_t				fOffset;
243			off_t				fBlockNumber;
244			bool				fWritable;
245#if _BOOT_MODE
246			uint8*				fBlock;
247#endif
248};
249
250
251class BPlusTree : public TransactionListener {
252public:
253#if !_BOOT_MODE
254								BPlusTree(Transaction& transaction,
255									Inode* stream,
256									int32 nodeSize = BPLUSTREE_NODE_SIZE);
257#endif
258								BPlusTree(Inode* stream);
259								BPlusTree();
260								~BPlusTree();
261
262#if !_BOOT_MODE
263			status_t			SetTo(Transaction& transaction, Inode* stream,
264									int32 nodeSize = BPLUSTREE_NODE_SIZE);
265#endif
266			status_t			SetTo(Inode* stream);
267			status_t			SetStream(Inode* stream);
268
269			status_t			InitCheck();
270
271			size_t				NodeSize() const { return fNodeSize; }
272			Inode*				Stream() const { return fStream; }
273
274#if !_BOOT_MODE
275			status_t			Validate(bool repair, bool& _errorsFound);
276			status_t			MakeEmpty();
277
278			status_t			Remove(Transaction& transaction,
279									const uint8* key, uint16 keyLength,
280									off_t value);
281			status_t			Insert(Transaction& transaction,
282									const uint8* key, uint16 keyLength,
283									off_t value);
284
285			status_t			Remove(Transaction& transaction, const char* key,
286									off_t value);
287			status_t			Insert(Transaction& transaction, const char* key,
288									off_t value);
289			status_t			Insert(Transaction& transaction, int32 key,
290									off_t value);
291			status_t			Insert(Transaction& transaction, uint32 key,
292									off_t value);
293			status_t			Insert(Transaction& transaction, int64 key,
294									off_t value);
295			status_t			Insert(Transaction& transaction, uint64 key,
296									off_t value);
297			status_t			Insert(Transaction& transaction, float key,
298									off_t value);
299			status_t			Insert(Transaction& transaction, double key,
300									off_t value);
301
302			status_t			Replace(Transaction& transaction,
303									const uint8* key, uint16 keyLength,
304									off_t value);
305#endif // !_BOOT_MODE
306
307			status_t			Find(const uint8* key, uint16 keyLength,
308									off_t* value);
309
310#if !_BOOT_MODE
311	static	int32				TypeCodeToKeyType(type_code code);
312	static	int32				ModeToKeyType(mode_t mode);
313
314protected:
315	virtual void				TransactionDone(bool success);
316	virtual void				RemovedFromTransaction();
317#endif // !_BOOT_MODE
318
319private:
320								BPlusTree(const BPlusTree& other);
321								BPlusTree& operator=(const BPlusTree& other);
322									// no implementation
323
324			int32				_CompareKeys(const void* key1, int keylength1,
325									const void* key2, int keylength2);
326			status_t			_FindKey(const bplustree_node* node,
327									const uint8* key, uint16 keyLength,
328									uint16* index = NULL, off_t* next = NULL);
329#if !_BOOT_MODE
330			status_t			_SeekDown(Stack<node_and_key>& stack,
331									const uint8* key, uint16 keyLength);
332
333			status_t			_FindFreeDuplicateFragment(
334									Transaction& transaction,
335									const bplustree_node* node,
336									CachedNode& cached, off_t* _offset,
337									bplustree_node** _fragment, uint32* _index);
338			status_t			_InsertDuplicate(Transaction& transaction,
339									CachedNode& cached,
340									const bplustree_node* node, uint16 index,
341									off_t value);
342			void				_InsertKey(bplustree_node* node, uint16 index,
343									uint8* key, uint16 keyLength, off_t value);
344			status_t			_SplitNode(bplustree_node* node,
345									off_t nodeOffset, bplustree_node* other,
346									off_t otherOffset, uint16* _keyIndex,
347									uint8* key, uint16* _keyLength,
348									off_t* _value);
349
350			status_t			_RemoveDuplicate(Transaction& transaction,
351									const bplustree_node* node,
352									CachedNode& cached, uint16 keyIndex,
353									off_t value);
354			void				_RemoveKey(bplustree_node* node, uint16 index);
355
356			void				_UpdateIterators(off_t offset, off_t nextOffset,
357									uint16 keyIndex, uint16 splitAt,
358									int8 change);
359			void				_AddIterator(TreeIterator* iterator);
360			void				_RemoveIterator(TreeIterator* iterator);
361
362			status_t			_ValidateChildren(TreeCheck& check,
363									uint32 level, off_t offset,
364									const uint8* largestKey, uint16 keyLength,
365									const bplustree_node* parent);
366			status_t			_ValidateChild(TreeCheck& check,
367									CachedNode& cached, uint32 level,
368									off_t offset, off_t lastOffset,
369									off_t nextOffset, const uint8* key,
370									uint16 keyLength);
371#endif // !_BOOT_MODE
372
373private:
374			friend class TreeIterator;
375			friend class CachedNode;
376			friend struct TreeCheck;
377
378			Inode*				fStream;
379			bplustree_header	fHeader;
380			int32				fNodeSize;
381			bool				fAllowDuplicates;
382			bool				fInTransaction;
383			status_t			fStatus;
384
385#if !_BOOT_MODE
386			mutex				fIteratorLock;
387			SinglyLinkedList<TreeIterator> fIterators;
388#endif
389};
390
391
392//	#pragma mark - helper classes/functions
393
394
395class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> {
396public:
397								TreeIterator(BPlusTree* tree);
398								~TreeIterator();
399
400			status_t			Goto(int8 to);
401			status_t			Traverse(int8 direction, void* key,
402									uint16* keyLength, uint16 maxLength,
403									off_t* value, uint16* duplicate = NULL);
404			status_t			Find(const uint8* key, uint16 keyLength);
405
406			status_t			Rewind();
407			status_t			GetNextEntry(void* key, uint16* keyLength,
408									uint16 maxLength, off_t* value,
409									uint16* duplicate = NULL);
410			status_t			GetPreviousEntry(void* key, uint16* keyLength,
411									uint16 maxLength, off_t* value,
412									uint16* duplicate = NULL);
413			void				SkipDuplicates();
414
415			BPlusTree*			Tree() const { return fTree; }
416
417#ifdef DEBUG
418			void				Dump();
419#endif
420
421private:
422			friend class BPlusTree;
423
424			// called by BPlusTree
425			void				Update(off_t offset, off_t nextOffset,
426									uint16 keyIndex, uint16 splitAt,
427									int8 change);
428			void				Stop();
429
430private:
431			BPlusTree*			fTree;
432			off_t				fCurrentNodeOffset;
433									// traverse position
434			int32				fCurrentKey;
435			off_t				fDuplicateNode;
436			uint16				fDuplicate;
437			uint16				fNumDuplicates;
438			bool				fIsFragment;
439};
440
441
442//	#pragma mark - BPlusTree's inline functions
443//	(most of them may not be needed)
444
445
446#if !_BOOT_MODE
447inline status_t
448BPlusTree::Remove(Transaction& transaction, const char* key, off_t value)
449{
450	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
451		return B_BAD_TYPE;
452	return Remove(transaction, (uint8*)key, strlen(key), value);
453}
454
455
456inline status_t
457BPlusTree::Insert(Transaction& transaction, const char* key, off_t value)
458{
459	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
460		return B_BAD_TYPE;
461	return Insert(transaction, (uint8*)key, strlen(key), value);
462}
463
464
465inline status_t
466BPlusTree::Insert(Transaction& transaction, int32 key, off_t value)
467{
468	if (fHeader.DataType() != BPLUSTREE_INT32_TYPE)
469		return B_BAD_TYPE;
470	return Insert(transaction, (uint8*)&key, sizeof(key), value);
471}
472
473
474inline status_t
475BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value)
476{
477	if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE)
478		return B_BAD_TYPE;
479	return Insert(transaction, (uint8*)&key, sizeof(key), value);
480}
481
482
483inline status_t
484BPlusTree::Insert(Transaction& transaction, int64 key, off_t value)
485{
486	if (fHeader.DataType() != BPLUSTREE_INT64_TYPE)
487		return B_BAD_TYPE;
488	return Insert(transaction, (uint8*)&key, sizeof(key), value);
489}
490
491
492inline status_t
493BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value)
494{
495	if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE)
496		return B_BAD_TYPE;
497	return Insert(transaction, (uint8*)&key, sizeof(key), value);
498}
499
500
501inline status_t
502BPlusTree::Insert(Transaction& transaction, float key, off_t value)
503{
504	if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE)
505		return B_BAD_TYPE;
506	return Insert(transaction, (uint8*)&key, sizeof(key), value);
507}
508
509
510inline status_t
511BPlusTree::Insert(Transaction& transaction, double key, off_t value)
512{
513	if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE)
514		return B_BAD_TYPE;
515	return Insert(transaction, (uint8*)&key, sizeof(key), value);
516}
517#endif // !_BOOT_MODE
518
519
520//	#pragma mark - TreeIterator inline functions
521
522
523inline status_t
524TreeIterator::Rewind()
525{
526	return Goto(BPLUSTREE_BEGIN);
527}
528
529
530inline status_t
531TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength,
532	off_t* value, uint16* duplicate)
533{
534	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value,
535		duplicate);
536}
537
538
539inline status_t
540TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength,
541	off_t* value, uint16* duplicate)
542{
543	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value,
544		duplicate);
545}
546
547
548//	#pragma mark - bplustree_header inline functions
549
550
551inline bool
552bplustree_header::CheckNode(const bplustree_node* node) const
553{
554	// sanity checks (links, all_key_count)
555	return IsValidLink(node->LeftLink())
556		&& IsValidLink(node->RightLink())
557		&& IsValidLink(node->OverflowLink())
558		&& (int8*)node->Values() + node->NumKeys() * sizeof(off_t)
559				<= (int8*)node + NodeSize();
560}
561
562
563inline bool
564bplustree_header::IsValidLink(off_t link) const
565{
566	return link == BPLUSTREE_NULL
567		|| (link > 0 && link <= MaximumSize() - NodeSize());
568}
569
570
571//	#pragma mark - bplustree_node inline functions
572
573
574inline Unaligned<uint16>*
575bplustree_node::KeyLengths() const
576{
577	return (Unaligned<uint16>*)(((char*)this) + key_align(sizeof(bplustree_node)
578		+ AllKeyLength()));
579}
580
581
582inline Unaligned<off_t>*
583bplustree_node::Values() const
584{
585	return (Unaligned<off_t>*)(
586		(char*)KeyLengths() + NumKeys() * sizeof(uint16));
587}
588
589
590inline uint8*
591bplustree_node::Keys() const
592{
593	return (uint8*)this + sizeof(bplustree_node);
594}
595
596
597inline int32
598bplustree_node::Used() const
599{
600	return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
601		* (sizeof(uint16) + sizeof(off_t));
602}
603
604
605inline bool
606bplustree_node::IsLeaf() const
607{
608	return OverflowLink() == BPLUSTREE_NULL;
609}
610
611
612inline duplicate_array*
613bplustree_node::FragmentAt(int8 index) const
614{
615	return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1));
616}
617
618
619inline duplicate_array*
620bplustree_node::DuplicateArray() const
621{
622	return (duplicate_array*)&overflow_link;
623}
624
625
626inline uint8
627bplustree_node::LinkType(off_t link)
628{
629	return *(uint64*)&link >> 62;
630}
631
632
633inline off_t
634bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
635{
636	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
637		| (fragmentIndex & 0x3ff);
638}
639
640
641inline bool
642bplustree_node::IsDuplicate(off_t link)
643{
644	return (LinkType(link)
645		& (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
646}
647
648
649inline off_t
650bplustree_node::FragmentOffset(off_t link)
651{
652	return link & 0x3ffffffffffffc00LL;
653}
654
655
656inline uint32
657bplustree_node::FragmentIndex(off_t link)
658{
659	return (uint32)(link & 0x3ff);
660}
661
662
663inline uint32
664bplustree_node::MaxFragments(uint32 nodeSize)
665{
666	return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
667}
668
669
670#if _BOOT_MODE
671} // namespace BFS
672
673#undef Inode
674#endif
675
676#endif	// B_PLUS_TREE_H
677