1#ifndef B_PLUS_TREE_H
2#define B_PLUS_TREE_H
3/* BPlusTree - BFS B+Tree implementation
4**
5** Initial version by Axel D��rfler, axeld@pinc-software.de
6** Roughly based on 'btlib' written by Marcus J. Ranum
7**
8** Copyright (c) 2001-2004 pinc Software. All Rights Reserved.
9** This file may be used under the terms of the OpenBeOS License.
10*/
11
12
13#include "bfs.h"
14#include "Journal.h"
15#include "Chain.h"
16
17#include <string.h>
18
19
20//****************** on-disk structures ********************
21
22#define BPLUSTREE_NULL			-1LL
23#define BPLUSTREE_FREE			-2LL
24
25struct bplustree_header {
26	uint32		magic;
27	uint32		node_size;
28	uint32		max_number_of_levels;
29	uint32		data_type;
30	off_t		root_node_pointer;
31	off_t		free_node_pointer;
32	off_t		maximum_size;
33
34	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
35	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
36	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
37	off_t RootNode() const { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
38	off_t FreeNode() const { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
39	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
40	uint32 MaxNumberOfLevels() const { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
41
42	inline bool IsValidLink(off_t link);
43} _PACKED;
44
45#define BPLUSTREE_MAGIC 			0x69f6c2e8
46#define BPLUSTREE_NODE_SIZE 		1024
47#define BPLUSTREE_MAX_KEY_LENGTH	256
48#define BPLUSTREE_MIN_KEY_LENGTH	1
49
50enum bplustree_types {
51	BPLUSTREE_STRING_TYPE	= 0,
52	BPLUSTREE_INT32_TYPE	= 1,
53	BPLUSTREE_UINT32_TYPE	= 2,
54	BPLUSTREE_INT64_TYPE	= 3,
55	BPLUSTREE_UINT64_TYPE	= 4,
56	BPLUSTREE_FLOAT_TYPE	= 5,
57	BPLUSTREE_DOUBLE_TYPE	= 6
58};
59
60struct sorted_array;
61typedef sorted_array duplicate_array;
62
63struct bplustree_node {
64	off_t	left_link;
65	off_t	right_link;
66	off_t	overflow_link;
67	uint16	all_key_count;
68	uint16	all_key_length;
69
70	off_t LeftLink() const { return BFS_ENDIAN_TO_HOST_INT64(left_link); }
71	off_t RightLink() const { return BFS_ENDIAN_TO_HOST_INT64(right_link); }
72	off_t OverflowLink() const { return BFS_ENDIAN_TO_HOST_INT64(overflow_link); }
73	uint16 NumKeys() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_count); }
74	uint16 AllKeyLength() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_length); }
75
76	inline uint16 *KeyLengths() const;
77	inline off_t *Values() const;
78	inline uint8 *Keys() const;
79	inline int32 Used() const;
80	uint8 *KeyAt(int32 index, uint16 *keyLength) const;
81
82	inline bool IsLeaf() const;
83
84	void Initialize();
85	uint8 CountDuplicates(off_t offset, bool isFragment) const;
86	off_t DuplicateAt(off_t offset, bool isFragment, int8 index) const;
87	int32 FragmentsUsed(uint32 nodeSize);
88	inline duplicate_array *FragmentAt(int8 index);
89	inline duplicate_array *DuplicateArray();
90
91	static inline uint8 LinkType(off_t link);
92	static inline off_t MakeLink(uint8 type, off_t link, uint32 fragmentIndex = 0);
93	static inline bool IsDuplicate(off_t link);
94	static inline off_t FragmentOffset(off_t link);
95	static inline uint32 FragmentIndex(off_t link);
96
97#ifdef DEBUG
98	void CheckIntegrity(uint32 nodeSize);
99#endif
100} _PACKED;
101
102//#define BPLUSTREE_NODE 0
103#define BPLUSTREE_DUPLICATE_NODE 2
104#define BPLUSTREE_DUPLICATE_FRAGMENT 3
105
106#define NUM_FRAGMENT_VALUES 7
107#define NUM_DUPLICATE_VALUES 125
108
109//**************************************
110
111enum bplustree_traversing {
112	BPLUSTREE_FORWARD = 1,
113	BPLUSTREE_BACKWARD = -1,
114
115	BPLUSTREE_BEGIN = 0,
116	BPLUSTREE_END = 1
117};
118
119
120//****************** in-memory structures ********************
121
122template<class T> class Stack;
123class BPlusTree;
124class TreeIterator;
125class CachedNode;
126class Inode;
127
128// needed for searching (utilizing a stack)
129struct node_and_key {
130	off_t	nodeOffset;
131	uint16	keyIndex;
132};
133
134
135//***** Cache handling *****
136
137class CachedNode {
138	public:
139		CachedNode(BPlusTree *tree)
140			:
141			fTree(tree),
142			fNode(NULL),
143			fBlock(NULL)
144		{
145		}
146
147		CachedNode(BPlusTree *tree, off_t offset, bool check = true)
148			:
149			fTree(tree),
150			fNode(NULL),
151			fBlock(NULL)
152		{
153			SetTo(offset, check);
154		}
155
156		~CachedNode()
157		{
158			Unset();
159		}
160
161		bplustree_node *SetTo(off_t offset, bool check = true);
162		bplustree_header *SetToHeader();
163		void Unset();
164
165		status_t Free(Transaction *transaction, off_t offset);
166		status_t Allocate(Transaction *transaction, bplustree_node **node, off_t *offset);
167		status_t WriteBack(Transaction *transaction);
168
169		bplustree_node *Node() const { return fNode; }
170
171	protected:
172		bplustree_node	*InternalSetTo(off_t offset);
173
174		BPlusTree		*fTree;
175		bplustree_node	*fNode;
176		uint8			*fBlock;
177		off_t			fBlockNumber;
178};
179
180
181//******** B+tree class *********
182
183class BPlusTree {
184	public:
185		BPlusTree(Transaction *transaction, Inode *stream, int32 nodeSize = BPLUSTREE_NODE_SIZE);
186		BPlusTree(Inode *stream);
187		BPlusTree();
188		~BPlusTree();
189
190		status_t	SetTo(Transaction *transaction, Inode *stream, int32 nodeSize = BPLUSTREE_NODE_SIZE);
191		status_t	SetTo(Inode *stream);
192		status_t	SetStream(Inode *stream);
193
194		status_t	InitCheck();
195		status_t	Validate();
196
197		status_t	Remove(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
198		status_t	Insert(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
199
200		status_t	Remove(Transaction *transaction, const char *key, off_t value);
201		status_t	Insert(Transaction *transaction, const char *key, off_t value);
202		status_t	Insert(Transaction *transaction, int32 key, off_t value);
203		status_t	Insert(Transaction *transaction, uint32 key, off_t value);
204		status_t	Insert(Transaction *transaction, int64 key, off_t value);
205		status_t	Insert(Transaction *transaction, uint64 key, off_t value);
206		status_t	Insert(Transaction *transaction, float key, off_t value);
207		status_t	Insert(Transaction *transaction, double key, off_t value);
208
209		status_t	Replace(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
210		status_t	Find(const uint8 *key, uint16 keyLength, off_t *value);
211
212		static int32 TypeCodeToKeyType(type_code code);
213		static int32 ModeToKeyType(mode_t mode);
214
215	private:
216		BPlusTree(const BPlusTree &);
217		BPlusTree &operator=(const BPlusTree &);
218			// no implementation
219
220		int32		CompareKeys(const void *key1, int keylength1, const void *key2, int keylength2);
221		status_t	FindKey(bplustree_node *node, const uint8 *key, uint16 keyLength,
222						uint16 *index = NULL, off_t *next = NULL);
223		status_t	SeekDown(Stack<node_and_key> &stack, const uint8 *key, uint16 keyLength);
224
225		status_t	FindFreeDuplicateFragment(bplustree_node *node, CachedNode *cached,
226						off_t *_offset, bplustree_node **_fragment, uint32 *_index);
227		status_t	InsertDuplicate(Transaction *transaction, CachedNode *cached,
228						bplustree_node *node, uint16 index, off_t value);
229		void		InsertKey(bplustree_node *node, uint16 index, uint8 *key, uint16 keyLength,
230						off_t value);
231		status_t	SplitNode(bplustree_node *node, off_t nodeOffset, bplustree_node *other,
232						off_t otherOffset, uint16 *_keyIndex, uint8 *key, uint16 *_keyLength,
233						off_t *_value);
234
235		status_t	RemoveDuplicate(Transaction *transaction, bplustree_node *node,
236						CachedNode *cached, uint16 keyIndex, off_t value);
237		void		RemoveKey(bplustree_node *node, uint16 index);
238
239		void		UpdateIterators(off_t offset, off_t nextOffset, uint16 keyIndex,
240						uint16 splitAt, int8 change);
241		void		AddIterator(TreeIterator *iterator);
242		void		RemoveIterator(TreeIterator *iterator);
243
244	private:
245		friend struct TreeIterator;
246		friend class CachedNode;
247
248		Inode		*fStream;
249		bplustree_header *fHeader;
250		CachedNode	fCachedHeader;
251		int32		fNodeSize;
252		bool		fAllowDuplicates;
253		status_t	fStatus;
254		SimpleLock	fIteratorLock;
255		Chain<TreeIterator> fIterators;
256};
257
258
259//***** helper classes/functions *****
260
261extern int32 compareKeys(type_code type, const void *key1, int keyLength1,
262				const void *key2, int keyLength2);
263
264class TreeIterator {
265	public:
266		TreeIterator(BPlusTree *tree);
267		~TreeIterator();
268
269		status_t	Goto(int8 to);
270		status_t	Traverse(int8 direction, void *key, uint16 *keyLength, uint16 maxLength,
271						off_t *value, uint16 *duplicate = NULL);
272		status_t	Find(const uint8 *key, uint16 keyLength);
273
274		status_t	Rewind();
275		status_t	GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength,
276						off_t *value, uint16 *duplicate = NULL);
277		status_t	GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength,
278						off_t *value, uint16 *duplicate = NULL);
279		void		SkipDuplicates();
280
281#ifdef DEBUG
282		void		Dump();
283#endif
284
285	private:
286		BPlusTree	*fTree;
287
288		off_t		fCurrentNodeOffset;	// traverse position
289		int32		fCurrentKey;
290		off_t		fDuplicateNode;
291		uint16		fDuplicate, fNumDuplicates;
292		bool		fIsFragment;
293
294	private:
295		friend class Chain<TreeIterator>;
296		friend class BPlusTree;
297
298		void Update(off_t offset, off_t nextOffset, uint16 keyIndex, uint16 splitAt, int8 change);
299		void Stop();
300		TreeIterator *fNext;
301};
302
303// BPlusTree's inline functions (most of them may not be needed)
304
305inline status_t
306BPlusTree::Remove(Transaction *transaction, const char *key, off_t value)
307{
308	if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
309		return B_BAD_TYPE;
310	return Remove(transaction, (uint8 *)key, strlen(key), value);
311}
312
313inline status_t
314BPlusTree::Insert(Transaction *transaction, const char *key, off_t value)
315{
316	if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
317		return B_BAD_TYPE;
318	return Insert(transaction, (uint8 *)key, strlen(key), value);
319}
320
321inline status_t
322BPlusTree::Insert(Transaction *transaction, int32 key, off_t value)
323{
324	if (fHeader->data_type != BPLUSTREE_INT32_TYPE)
325		return B_BAD_TYPE;
326	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
327}
328
329inline status_t
330BPlusTree::Insert(Transaction *transaction, uint32 key, off_t value)
331{
332	if (fHeader->data_type != BPLUSTREE_UINT32_TYPE)
333		return B_BAD_TYPE;
334	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
335}
336
337inline status_t
338BPlusTree::Insert(Transaction *transaction, int64 key, off_t value)
339{
340	if (fHeader->data_type != BPLUSTREE_INT64_TYPE)
341		return B_BAD_TYPE;
342	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
343}
344
345inline status_t
346BPlusTree::Insert(Transaction *transaction, uint64 key, off_t value)
347{
348	if (fHeader->data_type != BPLUSTREE_UINT64_TYPE)
349		return B_BAD_TYPE;
350	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
351}
352
353inline status_t
354BPlusTree::Insert(Transaction *transaction, float key, off_t value)
355{
356	if (fHeader->data_type != BPLUSTREE_FLOAT_TYPE)
357		return B_BAD_TYPE;
358	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
359}
360
361inline status_t
362BPlusTree::Insert(Transaction *transaction, double key, off_t value)
363{
364	if (fHeader->data_type != BPLUSTREE_DOUBLE_TYPE)
365		return B_BAD_TYPE;
366	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
367}
368
369
370/************************ TreeIterator inline functions ************************/
371//	#pragma mark -
372
373inline status_t
374TreeIterator::Rewind()
375{
376	return Goto(BPLUSTREE_BEGIN);
377}
378
379inline status_t
380TreeIterator::GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength,
381	off_t *value, uint16 *duplicate)
382{
383	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, duplicate);
384}
385
386inline status_t
387TreeIterator::GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength,
388	off_t *value, uint16 *duplicate)
389{
390	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, duplicate);
391}
392
393/************************ bplustree_header inline functions ************************/
394//	#pragma mark -
395
396
397inline bool
398bplustree_header::IsValidLink(off_t link)
399{
400	return link == BPLUSTREE_NULL || (link > 0 && link <= MaximumSize() - NodeSize());
401}
402
403
404/************************ bplustree_node inline functions ************************/
405//	#pragma mark -
406
407
408inline uint16 *
409bplustree_node::KeyLengths() const
410{
411	return (uint16 *)(((char *)this) + round_up(sizeof(bplustree_node) + AllKeyLength()));
412}
413
414
415inline off_t *
416bplustree_node::Values() const
417{
418	return (off_t *)((char *)KeyLengths() + NumKeys() * sizeof(uint16));
419}
420
421
422inline uint8 *
423bplustree_node::Keys() const
424{
425	return (uint8 *)this + sizeof(bplustree_node);
426}
427
428
429inline int32
430bplustree_node::Used() const
431{
432	return round_up(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() * (sizeof(uint16) + sizeof(off_t));
433}
434
435
436inline bool
437bplustree_node::IsLeaf() const
438{
439	return OverflowLink() == BPLUSTREE_NULL;
440}
441
442
443inline duplicate_array *
444bplustree_node::FragmentAt(int8 index)
445{
446	return (duplicate_array *)((off_t *)this + index * (NUM_FRAGMENT_VALUES + 1));
447}
448
449
450inline duplicate_array *
451bplustree_node::DuplicateArray()
452{
453	return (duplicate_array *)&this->overflow_link;
454}
455
456
457inline uint8
458bplustree_node::LinkType(off_t link)
459{
460	return *(uint64 *)&link >> 62;
461}
462
463
464inline off_t
465bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
466{
467	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) | (fragmentIndex & 0x3ff);
468}
469
470
471inline bool
472bplustree_node::IsDuplicate(off_t link)
473{
474	return (LinkType(link) & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
475}
476
477
478inline off_t
479bplustree_node::FragmentOffset(off_t link)
480{
481	return link & 0x3ffffffffffffc00LL;
482}
483
484
485inline uint32
486bplustree_node::FragmentIndex(off_t link)
487{
488	return (uint32)(link & 0x3ff);
489}
490
491#endif	/* B_PLUS_TREE_H */
492