1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2017 Christoph Hellwig.
4 */
5
6#include "xfs.h"
7#include "xfs_shared.h"
8#include "xfs_format.h"
9#include "xfs_bit.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
13#include "xfs_inode.h"
14#include "xfs_trace.h"
15
16/*
17 * In-core extent record layout:
18 *
19 * +-------+----------------------------+
20 * | 00:53 | all 54 bits of startoff    |
21 * | 54:63 | low 10 bits of startblock  |
22 * +-------+----------------------------+
23 * | 00:20 | all 21 bits of length      |
24 * |    21 | unwritten extent bit       |
25 * | 22:63 | high 42 bits of startblock |
26 * +-------+----------------------------+
27 */
28#define XFS_IEXT_STARTOFF_MASK		xfs_mask64lo(BMBT_STARTOFF_BITLEN)
29#define XFS_IEXT_LENGTH_MASK		xfs_mask64lo(BMBT_BLOCKCOUNT_BITLEN)
30#define XFS_IEXT_STARTBLOCK_MASK	xfs_mask64lo(BMBT_STARTBLOCK_BITLEN)
31
32struct xfs_iext_rec {
33	uint64_t			lo;
34	uint64_t			hi;
35};
36
37/*
38 * Given that the length can't be a zero, only an empty hi value indicates an
39 * unused record.
40 */
41static bool xfs_iext_rec_is_empty(struct xfs_iext_rec *rec)
42{
43	return rec->hi == 0;
44}
45
46static inline void xfs_iext_rec_clear(struct xfs_iext_rec *rec)
47{
48	rec->lo = 0;
49	rec->hi = 0;
50}
51
52static void
53xfs_iext_set(
54	struct xfs_iext_rec	*rec,
55	struct xfs_bmbt_irec	*irec)
56{
57	ASSERT((irec->br_startoff & ~XFS_IEXT_STARTOFF_MASK) == 0);
58	ASSERT((irec->br_blockcount & ~XFS_IEXT_LENGTH_MASK) == 0);
59	ASSERT((irec->br_startblock & ~XFS_IEXT_STARTBLOCK_MASK) == 0);
60
61	rec->lo = irec->br_startoff & XFS_IEXT_STARTOFF_MASK;
62	rec->hi = irec->br_blockcount & XFS_IEXT_LENGTH_MASK;
63
64	rec->lo |= (irec->br_startblock << 54);
65	rec->hi |= ((irec->br_startblock & ~xfs_mask64lo(10)) << (22 - 10));
66
67	if (irec->br_state == XFS_EXT_UNWRITTEN)
68		rec->hi |= (1 << 21);
69}
70
71static void
72xfs_iext_get(
73	struct xfs_bmbt_irec	*irec,
74	struct xfs_iext_rec	*rec)
75{
76	irec->br_startoff = rec->lo & XFS_IEXT_STARTOFF_MASK;
77	irec->br_blockcount = rec->hi & XFS_IEXT_LENGTH_MASK;
78
79	irec->br_startblock = rec->lo >> 54;
80	irec->br_startblock |= (rec->hi & xfs_mask64hi(42)) >> (22 - 10);
81
82	if (rec->hi & (1 << 21))
83		irec->br_state = XFS_EXT_UNWRITTEN;
84	else
85		irec->br_state = XFS_EXT_NORM;
86}
87
88enum {
89	NODE_SIZE	= 256,
90	KEYS_PER_NODE	= NODE_SIZE / (sizeof(uint64_t) + sizeof(void *)),
91	RECS_PER_LEAF	= (NODE_SIZE - (2 * sizeof(struct xfs_iext_leaf *))) /
92				sizeof(struct xfs_iext_rec),
93};
94
95/*
96 * In-core extent btree block layout:
97 *
98 * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks.
99 *
100 * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each
101 * contain the startoffset, blockcount, startblock and unwritten extent flag.
102 * See above for the exact format, followed by pointers to the previous and next
103 * leaf blocks (if there are any).
104 *
105 * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed
106 * by an equal number of pointers to the btree blocks at the next lower level.
107 *
108 *		+-------+-------+-------+-------+-------+----------+----------+
109 * Leaf:	| rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr |
110 *		+-------+-------+-------+-------+-------+----------+----------+
111 *
112 *		+-------+-------+-------+-------+-------+-------+------+-------+
113 * Inner:	| key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N |
114 *		+-------+-------+-------+-------+-------+-------+------+-------+
115 */
116struct xfs_iext_node {
117	uint64_t		keys[KEYS_PER_NODE];
118#define XFS_IEXT_KEY_INVALID	(1ULL << 63)
119	void			*ptrs[KEYS_PER_NODE];
120};
121
122struct xfs_iext_leaf {
123	struct xfs_iext_rec	recs[RECS_PER_LEAF];
124	struct xfs_iext_leaf	*prev;
125	struct xfs_iext_leaf	*next;
126};
127
128inline xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp)
129{
130	return ifp->if_bytes / sizeof(struct xfs_iext_rec);
131}
132
133static inline int xfs_iext_max_recs(struct xfs_ifork *ifp)
134{
135	if (ifp->if_height == 1)
136		return xfs_iext_count(ifp);
137	return RECS_PER_LEAF;
138}
139
140static inline struct xfs_iext_rec *cur_rec(struct xfs_iext_cursor *cur)
141{
142	return &cur->leaf->recs[cur->pos];
143}
144
145static inline bool xfs_iext_valid(struct xfs_ifork *ifp,
146		struct xfs_iext_cursor *cur)
147{
148	if (!cur->leaf)
149		return false;
150	if (cur->pos < 0 || cur->pos >= xfs_iext_max_recs(ifp))
151		return false;
152	if (xfs_iext_rec_is_empty(cur_rec(cur)))
153		return false;
154	return true;
155}
156
157static void *
158xfs_iext_find_first_leaf(
159	struct xfs_ifork	*ifp)
160{
161	struct xfs_iext_node	*node = ifp->if_data;
162	int			height;
163
164	if (!ifp->if_height)
165		return NULL;
166
167	for (height = ifp->if_height; height > 1; height--) {
168		node = node->ptrs[0];
169		ASSERT(node);
170	}
171
172	return node;
173}
174
175static void *
176xfs_iext_find_last_leaf(
177	struct xfs_ifork	*ifp)
178{
179	struct xfs_iext_node	*node = ifp->if_data;
180	int			height, i;
181
182	if (!ifp->if_height)
183		return NULL;
184
185	for (height = ifp->if_height; height > 1; height--) {
186		for (i = 1; i < KEYS_PER_NODE; i++)
187			if (!node->ptrs[i])
188				break;
189		node = node->ptrs[i - 1];
190		ASSERT(node);
191	}
192
193	return node;
194}
195
196void
197xfs_iext_first(
198	struct xfs_ifork	*ifp,
199	struct xfs_iext_cursor	*cur)
200{
201	cur->pos = 0;
202	cur->leaf = xfs_iext_find_first_leaf(ifp);
203}
204
205void
206xfs_iext_last(
207	struct xfs_ifork	*ifp,
208	struct xfs_iext_cursor	*cur)
209{
210	int			i;
211
212	cur->leaf = xfs_iext_find_last_leaf(ifp);
213	if (!cur->leaf) {
214		cur->pos = 0;
215		return;
216	}
217
218	for (i = 1; i < xfs_iext_max_recs(ifp); i++) {
219		if (xfs_iext_rec_is_empty(&cur->leaf->recs[i]))
220			break;
221	}
222	cur->pos = i - 1;
223}
224
225void
226xfs_iext_next(
227	struct xfs_ifork	*ifp,
228	struct xfs_iext_cursor	*cur)
229{
230	if (!cur->leaf) {
231		ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
232		xfs_iext_first(ifp, cur);
233		return;
234	}
235
236	ASSERT(cur->pos >= 0);
237	ASSERT(cur->pos < xfs_iext_max_recs(ifp));
238
239	cur->pos++;
240	if (ifp->if_height > 1 && !xfs_iext_valid(ifp, cur) &&
241	    cur->leaf->next) {
242		cur->leaf = cur->leaf->next;
243		cur->pos = 0;
244	}
245}
246
247void
248xfs_iext_prev(
249	struct xfs_ifork	*ifp,
250	struct xfs_iext_cursor	*cur)
251{
252	if (!cur->leaf) {
253		ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
254		xfs_iext_last(ifp, cur);
255		return;
256	}
257
258	ASSERT(cur->pos >= 0);
259	ASSERT(cur->pos <= RECS_PER_LEAF);
260
261recurse:
262	do {
263		cur->pos--;
264		if (xfs_iext_valid(ifp, cur))
265			return;
266	} while (cur->pos > 0);
267
268	if (ifp->if_height > 1 && cur->leaf->prev) {
269		cur->leaf = cur->leaf->prev;
270		cur->pos = RECS_PER_LEAF;
271		goto recurse;
272	}
273}
274
275static inline int
276xfs_iext_key_cmp(
277	struct xfs_iext_node	*node,
278	int			n,
279	xfs_fileoff_t		offset)
280{
281	if (node->keys[n] > offset)
282		return 1;
283	if (node->keys[n] < offset)
284		return -1;
285	return 0;
286}
287
288static inline int
289xfs_iext_rec_cmp(
290	struct xfs_iext_rec	*rec,
291	xfs_fileoff_t		offset)
292{
293	uint64_t		rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK;
294	uint32_t		rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
295
296	if (rec_offset > offset)
297		return 1;
298	if (rec_offset + rec_len <= offset)
299		return -1;
300	return 0;
301}
302
303static void *
304xfs_iext_find_level(
305	struct xfs_ifork	*ifp,
306	xfs_fileoff_t		offset,
307	int			level)
308{
309	struct xfs_iext_node	*node = ifp->if_data;
310	int			height, i;
311
312	if (!ifp->if_height)
313		return NULL;
314
315	for (height = ifp->if_height; height > level; height--) {
316		for (i = 1; i < KEYS_PER_NODE; i++)
317			if (xfs_iext_key_cmp(node, i, offset) > 0)
318				break;
319
320		node = node->ptrs[i - 1];
321		if (!node)
322			break;
323	}
324
325	return node;
326}
327
328static int
329xfs_iext_node_pos(
330	struct xfs_iext_node	*node,
331	xfs_fileoff_t		offset)
332{
333	int			i;
334
335	for (i = 1; i < KEYS_PER_NODE; i++) {
336		if (xfs_iext_key_cmp(node, i, offset) > 0)
337			break;
338	}
339
340	return i - 1;
341}
342
343static int
344xfs_iext_node_insert_pos(
345	struct xfs_iext_node	*node,
346	xfs_fileoff_t		offset)
347{
348	int			i;
349
350	for (i = 0; i < KEYS_PER_NODE; i++) {
351		if (xfs_iext_key_cmp(node, i, offset) > 0)
352			return i;
353	}
354
355	return KEYS_PER_NODE;
356}
357
358static int
359xfs_iext_node_nr_entries(
360	struct xfs_iext_node	*node,
361	int			start)
362{
363	int			i;
364
365	for (i = start; i < KEYS_PER_NODE; i++) {
366		if (node->keys[i] == XFS_IEXT_KEY_INVALID)
367			break;
368	}
369
370	return i;
371}
372
373static int
374xfs_iext_leaf_nr_entries(
375	struct xfs_ifork	*ifp,
376	struct xfs_iext_leaf	*leaf,
377	int			start)
378{
379	int			i;
380
381	for (i = start; i < xfs_iext_max_recs(ifp); i++) {
382		if (xfs_iext_rec_is_empty(&leaf->recs[i]))
383			break;
384	}
385
386	return i;
387}
388
389static inline uint64_t
390xfs_iext_leaf_key(
391	struct xfs_iext_leaf	*leaf,
392	int			n)
393{
394	return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK;
395}
396
397static inline void *
398xfs_iext_alloc_node(
399	int	size)
400{
401	return kzalloc(size, GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
402}
403
404static void
405xfs_iext_grow(
406	struct xfs_ifork	*ifp)
407{
408	struct xfs_iext_node	*node = xfs_iext_alloc_node(NODE_SIZE);
409	int			i;
410
411	if (ifp->if_height == 1) {
412		struct xfs_iext_leaf *prev = ifp->if_data;
413
414		node->keys[0] = xfs_iext_leaf_key(prev, 0);
415		node->ptrs[0] = prev;
416	} else  {
417		struct xfs_iext_node *prev = ifp->if_data;
418
419		ASSERT(ifp->if_height > 1);
420
421		node->keys[0] = prev->keys[0];
422		node->ptrs[0] = prev;
423	}
424
425	for (i = 1; i < KEYS_PER_NODE; i++)
426		node->keys[i] = XFS_IEXT_KEY_INVALID;
427
428	ifp->if_data = node;
429	ifp->if_height++;
430}
431
432static void
433xfs_iext_update_node(
434	struct xfs_ifork	*ifp,
435	xfs_fileoff_t		old_offset,
436	xfs_fileoff_t		new_offset,
437	int			level,
438	void			*ptr)
439{
440	struct xfs_iext_node	*node = ifp->if_data;
441	int			height, i;
442
443	for (height = ifp->if_height; height > level; height--) {
444		for (i = 0; i < KEYS_PER_NODE; i++) {
445			if (i > 0 && xfs_iext_key_cmp(node, i, old_offset) > 0)
446				break;
447			if (node->keys[i] == old_offset)
448				node->keys[i] = new_offset;
449		}
450		node = node->ptrs[i - 1];
451		ASSERT(node);
452	}
453
454	ASSERT(node == ptr);
455}
456
457static struct xfs_iext_node *
458xfs_iext_split_node(
459	struct xfs_iext_node	**nodep,
460	int			*pos,
461	int			*nr_entries)
462{
463	struct xfs_iext_node	*node = *nodep;
464	struct xfs_iext_node	*new = xfs_iext_alloc_node(NODE_SIZE);
465	const int		nr_move = KEYS_PER_NODE / 2;
466	int			nr_keep = nr_move + (KEYS_PER_NODE & 1);
467	int			i = 0;
468
469	/* for sequential append operations just spill over into the new node */
470	if (*pos == KEYS_PER_NODE) {
471		*nodep = new;
472		*pos = 0;
473		*nr_entries = 0;
474		goto done;
475	}
476
477
478	for (i = 0; i < nr_move; i++) {
479		new->keys[i] = node->keys[nr_keep + i];
480		new->ptrs[i] = node->ptrs[nr_keep + i];
481
482		node->keys[nr_keep + i] = XFS_IEXT_KEY_INVALID;
483		node->ptrs[nr_keep + i] = NULL;
484	}
485
486	if (*pos >= nr_keep) {
487		*nodep = new;
488		*pos -= nr_keep;
489		*nr_entries = nr_move;
490	} else {
491		*nr_entries = nr_keep;
492	}
493done:
494	for (; i < KEYS_PER_NODE; i++)
495		new->keys[i] = XFS_IEXT_KEY_INVALID;
496	return new;
497}
498
499static void
500xfs_iext_insert_node(
501	struct xfs_ifork	*ifp,
502	uint64_t		offset,
503	void			*ptr,
504	int			level)
505{
506	struct xfs_iext_node	*node, *new;
507	int			i, pos, nr_entries;
508
509again:
510	if (ifp->if_height < level)
511		xfs_iext_grow(ifp);
512
513	new = NULL;
514	node = xfs_iext_find_level(ifp, offset, level);
515	pos = xfs_iext_node_insert_pos(node, offset);
516	nr_entries = xfs_iext_node_nr_entries(node, pos);
517
518	ASSERT(pos >= nr_entries || xfs_iext_key_cmp(node, pos, offset) != 0);
519	ASSERT(nr_entries <= KEYS_PER_NODE);
520
521	if (nr_entries == KEYS_PER_NODE)
522		new = xfs_iext_split_node(&node, &pos, &nr_entries);
523
524	/*
525	 * Update the pointers in higher levels if the first entry changes
526	 * in an existing node.
527	 */
528	if (node != new && pos == 0 && nr_entries > 0)
529		xfs_iext_update_node(ifp, node->keys[0], offset, level, node);
530
531	for (i = nr_entries; i > pos; i--) {
532		node->keys[i] = node->keys[i - 1];
533		node->ptrs[i] = node->ptrs[i - 1];
534	}
535	node->keys[pos] = offset;
536	node->ptrs[pos] = ptr;
537
538	if (new) {
539		offset = new->keys[0];
540		ptr = new;
541		level++;
542		goto again;
543	}
544}
545
546static struct xfs_iext_leaf *
547xfs_iext_split_leaf(
548	struct xfs_iext_cursor	*cur,
549	int			*nr_entries)
550{
551	struct xfs_iext_leaf	*leaf = cur->leaf;
552	struct xfs_iext_leaf	*new = xfs_iext_alloc_node(NODE_SIZE);
553	const int		nr_move = RECS_PER_LEAF / 2;
554	int			nr_keep = nr_move + (RECS_PER_LEAF & 1);
555	int			i;
556
557	/* for sequential append operations just spill over into the new node */
558	if (cur->pos == RECS_PER_LEAF) {
559		cur->leaf = new;
560		cur->pos = 0;
561		*nr_entries = 0;
562		goto done;
563	}
564
565	for (i = 0; i < nr_move; i++) {
566		new->recs[i] = leaf->recs[nr_keep + i];
567		xfs_iext_rec_clear(&leaf->recs[nr_keep + i]);
568	}
569
570	if (cur->pos >= nr_keep) {
571		cur->leaf = new;
572		cur->pos -= nr_keep;
573		*nr_entries = nr_move;
574	} else {
575		*nr_entries = nr_keep;
576	}
577done:
578	if (leaf->next)
579		leaf->next->prev = new;
580	new->next = leaf->next;
581	new->prev = leaf;
582	leaf->next = new;
583	return new;
584}
585
586static void
587xfs_iext_alloc_root(
588	struct xfs_ifork	*ifp,
589	struct xfs_iext_cursor	*cur)
590{
591	ASSERT(ifp->if_bytes == 0);
592
593	ifp->if_data = xfs_iext_alloc_node(sizeof(struct xfs_iext_rec));
594	ifp->if_height = 1;
595
596	/* now that we have a node step into it */
597	cur->leaf = ifp->if_data;
598	cur->pos = 0;
599}
600
601static void
602xfs_iext_realloc_root(
603	struct xfs_ifork	*ifp,
604	struct xfs_iext_cursor	*cur)
605{
606	int64_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
607	void *new;
608
609	/* account for the prev/next pointers */
610	if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF)
611		new_size = NODE_SIZE;
612
613	new = krealloc(ifp->if_data, new_size,
614			GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
615	memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes);
616	ifp->if_data = new;
617	cur->leaf = new;
618}
619
620/*
621 * Increment the sequence counter on extent tree changes. If we are on a COW
622 * fork, this allows the writeback code to skip looking for a COW extent if the
623 * COW fork hasn't changed. We use WRITE_ONCE here to ensure the update to the
624 * sequence counter is seen before the modifications to the extent tree itself
625 * take effect.
626 */
627static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp)
628{
629	WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1);
630}
631
632void
633xfs_iext_insert_raw(
634	struct xfs_ifork	*ifp,
635	struct xfs_iext_cursor	*cur,
636	struct xfs_bmbt_irec	*irec)
637{
638	xfs_fileoff_t		offset = irec->br_startoff;
639	struct xfs_iext_leaf	*new = NULL;
640	int			nr_entries, i;
641
642	xfs_iext_inc_seq(ifp);
643
644	if (ifp->if_height == 0)
645		xfs_iext_alloc_root(ifp, cur);
646	else if (ifp->if_height == 1)
647		xfs_iext_realloc_root(ifp, cur);
648
649	nr_entries = xfs_iext_leaf_nr_entries(ifp, cur->leaf, cur->pos);
650	ASSERT(nr_entries <= RECS_PER_LEAF);
651	ASSERT(cur->pos >= nr_entries ||
652	       xfs_iext_rec_cmp(cur_rec(cur), irec->br_startoff) != 0);
653
654	if (nr_entries == RECS_PER_LEAF)
655		new = xfs_iext_split_leaf(cur, &nr_entries);
656
657	/*
658	 * Update the pointers in higher levels if the first entry changes
659	 * in an existing node.
660	 */
661	if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) {
662		xfs_iext_update_node(ifp, xfs_iext_leaf_key(cur->leaf, 0),
663				offset, 1, cur->leaf);
664	}
665
666	for (i = nr_entries; i > cur->pos; i--)
667		cur->leaf->recs[i] = cur->leaf->recs[i - 1];
668	xfs_iext_set(cur_rec(cur), irec);
669	ifp->if_bytes += sizeof(struct xfs_iext_rec);
670
671	if (new)
672		xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
673}
674
675void
676xfs_iext_insert(
677	struct xfs_inode	*ip,
678	struct xfs_iext_cursor	*cur,
679	struct xfs_bmbt_irec	*irec,
680	int			state)
681{
682	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
683
684	xfs_iext_insert_raw(ifp, cur, irec);
685	trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
686}
687
688static struct xfs_iext_node *
689xfs_iext_rebalance_node(
690	struct xfs_iext_node	*parent,
691	int			*pos,
692	struct xfs_iext_node	*node,
693	int			nr_entries)
694{
695	/*
696	 * If the neighbouring nodes are completely full, or have different
697	 * parents, we might never be able to merge our node, and will only
698	 * delete it once the number of entries hits zero.
699	 */
700	if (nr_entries == 0)
701		return node;
702
703	if (*pos > 0) {
704		struct xfs_iext_node *prev = parent->ptrs[*pos - 1];
705		int nr_prev = xfs_iext_node_nr_entries(prev, 0), i;
706
707		if (nr_prev + nr_entries <= KEYS_PER_NODE) {
708			for (i = 0; i < nr_entries; i++) {
709				prev->keys[nr_prev + i] = node->keys[i];
710				prev->ptrs[nr_prev + i] = node->ptrs[i];
711			}
712			return node;
713		}
714	}
715
716	if (*pos + 1 < xfs_iext_node_nr_entries(parent, *pos)) {
717		struct xfs_iext_node *next = parent->ptrs[*pos + 1];
718		int nr_next = xfs_iext_node_nr_entries(next, 0), i;
719
720		if (nr_entries + nr_next <= KEYS_PER_NODE) {
721			/*
722			 * Merge the next node into this node so that we don't
723			 * have to do an additional update of the keys in the
724			 * higher levels.
725			 */
726			for (i = 0; i < nr_next; i++) {
727				node->keys[nr_entries + i] = next->keys[i];
728				node->ptrs[nr_entries + i] = next->ptrs[i];
729			}
730
731			++*pos;
732			return next;
733		}
734	}
735
736	return NULL;
737}
738
739static void
740xfs_iext_remove_node(
741	struct xfs_ifork	*ifp,
742	xfs_fileoff_t		offset,
743	void			*victim)
744{
745	struct xfs_iext_node	*node, *parent;
746	int			level = 2, pos, nr_entries, i;
747
748	ASSERT(level <= ifp->if_height);
749	node = xfs_iext_find_level(ifp, offset, level);
750	pos = xfs_iext_node_pos(node, offset);
751again:
752	ASSERT(node->ptrs[pos]);
753	ASSERT(node->ptrs[pos] == victim);
754	kfree(victim);
755
756	nr_entries = xfs_iext_node_nr_entries(node, pos) - 1;
757	offset = node->keys[0];
758	for (i = pos; i < nr_entries; i++) {
759		node->keys[i] = node->keys[i + 1];
760		node->ptrs[i] = node->ptrs[i + 1];
761	}
762	node->keys[nr_entries] = XFS_IEXT_KEY_INVALID;
763	node->ptrs[nr_entries] = NULL;
764
765	if (pos == 0 && nr_entries > 0) {
766		xfs_iext_update_node(ifp, offset, node->keys[0], level, node);
767		offset = node->keys[0];
768	}
769
770	if (nr_entries >= KEYS_PER_NODE / 2)
771		return;
772
773	if (level < ifp->if_height) {
774		/*
775		 * If we aren't at the root yet try to find a neighbour node to
776		 * merge with (or delete the node if it is empty), and then
777		 * recurse up to the next level.
778		 */
779		level++;
780		parent = xfs_iext_find_level(ifp, offset, level);
781		pos = xfs_iext_node_pos(parent, offset);
782
783		ASSERT(pos != KEYS_PER_NODE);
784		ASSERT(parent->ptrs[pos] == node);
785
786		node = xfs_iext_rebalance_node(parent, &pos, node, nr_entries);
787		if (node) {
788			victim = node;
789			node = parent;
790			goto again;
791		}
792	} else if (nr_entries == 1) {
793		/*
794		 * If we are at the root and only one entry is left we can just
795		 * free this node and update the root pointer.
796		 */
797		ASSERT(node == ifp->if_data);
798		ifp->if_data = node->ptrs[0];
799		ifp->if_height--;
800		kfree(node);
801	}
802}
803
804static void
805xfs_iext_rebalance_leaf(
806	struct xfs_ifork	*ifp,
807	struct xfs_iext_cursor	*cur,
808	struct xfs_iext_leaf	*leaf,
809	xfs_fileoff_t		offset,
810	int			nr_entries)
811{
812	/*
813	 * If the neighbouring nodes are completely full we might never be able
814	 * to merge our node, and will only delete it once the number of
815	 * entries hits zero.
816	 */
817	if (nr_entries == 0)
818		goto remove_node;
819
820	if (leaf->prev) {
821		int nr_prev = xfs_iext_leaf_nr_entries(ifp, leaf->prev, 0), i;
822
823		if (nr_prev + nr_entries <= RECS_PER_LEAF) {
824			for (i = 0; i < nr_entries; i++)
825				leaf->prev->recs[nr_prev + i] = leaf->recs[i];
826
827			if (cur->leaf == leaf) {
828				cur->leaf = leaf->prev;
829				cur->pos += nr_prev;
830			}
831			goto remove_node;
832		}
833	}
834
835	if (leaf->next) {
836		int nr_next = xfs_iext_leaf_nr_entries(ifp, leaf->next, 0), i;
837
838		if (nr_entries + nr_next <= RECS_PER_LEAF) {
839			/*
840			 * Merge the next node into this node so that we don't
841			 * have to do an additional update of the keys in the
842			 * higher levels.
843			 */
844			for (i = 0; i < nr_next; i++) {
845				leaf->recs[nr_entries + i] =
846					leaf->next->recs[i];
847			}
848
849			if (cur->leaf == leaf->next) {
850				cur->leaf = leaf;
851				cur->pos += nr_entries;
852			}
853
854			offset = xfs_iext_leaf_key(leaf->next, 0);
855			leaf = leaf->next;
856			goto remove_node;
857		}
858	}
859
860	return;
861remove_node:
862	if (leaf->prev)
863		leaf->prev->next = leaf->next;
864	if (leaf->next)
865		leaf->next->prev = leaf->prev;
866	xfs_iext_remove_node(ifp, offset, leaf);
867}
868
869static void
870xfs_iext_free_last_leaf(
871	struct xfs_ifork	*ifp)
872{
873	ifp->if_height--;
874	kfree(ifp->if_data);
875	ifp->if_data = NULL;
876}
877
878void
879xfs_iext_remove(
880	struct xfs_inode	*ip,
881	struct xfs_iext_cursor	*cur,
882	int			state)
883{
884	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
885	struct xfs_iext_leaf	*leaf = cur->leaf;
886	xfs_fileoff_t		offset = xfs_iext_leaf_key(leaf, 0);
887	int			i, nr_entries;
888
889	trace_xfs_iext_remove(ip, cur, state, _RET_IP_);
890
891	ASSERT(ifp->if_height > 0);
892	ASSERT(ifp->if_data != NULL);
893	ASSERT(xfs_iext_valid(ifp, cur));
894
895	xfs_iext_inc_seq(ifp);
896
897	nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
898	for (i = cur->pos; i < nr_entries; i++)
899		leaf->recs[i] = leaf->recs[i + 1];
900	xfs_iext_rec_clear(&leaf->recs[nr_entries]);
901	ifp->if_bytes -= sizeof(struct xfs_iext_rec);
902
903	if (cur->pos == 0 && nr_entries > 0) {
904		xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1,
905				leaf);
906		offset = xfs_iext_leaf_key(leaf, 0);
907	} else if (cur->pos == nr_entries) {
908		if (ifp->if_height > 1 && leaf->next)
909			cur->leaf = leaf->next;
910		else
911			cur->leaf = NULL;
912		cur->pos = 0;
913	}
914
915	if (nr_entries >= RECS_PER_LEAF / 2)
916		return;
917
918	if (ifp->if_height > 1)
919		xfs_iext_rebalance_leaf(ifp, cur, leaf, offset, nr_entries);
920	else if (nr_entries == 0)
921		xfs_iext_free_last_leaf(ifp);
922}
923
924/*
925 * Lookup the extent covering bno.
926 *
927 * If there is an extent covering bno return the extent index, and store the
928 * expanded extent structure in *gotp, and the extent cursor in *cur.
929 * If there is no extent covering bno, but there is an extent after it (e.g.
930 * it lies in a hole) return that extent in *gotp and its cursor in *cur
931 * instead.
932 * If bno is beyond the last extent return false, and return an invalid
933 * cursor value.
934 */
935bool
936xfs_iext_lookup_extent(
937	struct xfs_inode	*ip,
938	struct xfs_ifork	*ifp,
939	xfs_fileoff_t		offset,
940	struct xfs_iext_cursor	*cur,
941	struct xfs_bmbt_irec	*gotp)
942{
943	XFS_STATS_INC(ip->i_mount, xs_look_exlist);
944
945	cur->leaf = xfs_iext_find_level(ifp, offset, 1);
946	if (!cur->leaf) {
947		cur->pos = 0;
948		return false;
949	}
950
951	for (cur->pos = 0; cur->pos < xfs_iext_max_recs(ifp); cur->pos++) {
952		struct xfs_iext_rec *rec = cur_rec(cur);
953
954		if (xfs_iext_rec_is_empty(rec))
955			break;
956		if (xfs_iext_rec_cmp(rec, offset) >= 0)
957			goto found;
958	}
959
960	/* Try looking in the next node for an entry > offset */
961	if (ifp->if_height == 1 || !cur->leaf->next)
962		return false;
963	cur->leaf = cur->leaf->next;
964	cur->pos = 0;
965	if (!xfs_iext_valid(ifp, cur))
966		return false;
967found:
968	xfs_iext_get(gotp, cur_rec(cur));
969	return true;
970}
971
972/*
973 * Returns the last extent before end, and if this extent doesn't cover
974 * end, update end to the end of the extent.
975 */
976bool
977xfs_iext_lookup_extent_before(
978	struct xfs_inode	*ip,
979	struct xfs_ifork	*ifp,
980	xfs_fileoff_t		*end,
981	struct xfs_iext_cursor	*cur,
982	struct xfs_bmbt_irec	*gotp)
983{
984	/* could be optimized to not even look up the next on a match.. */
985	if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) &&
986	    gotp->br_startoff <= *end - 1)
987		return true;
988	if (!xfs_iext_prev_extent(ifp, cur, gotp))
989		return false;
990	*end = gotp->br_startoff + gotp->br_blockcount;
991	return true;
992}
993
994void
995xfs_iext_update_extent(
996	struct xfs_inode	*ip,
997	int			state,
998	struct xfs_iext_cursor	*cur,
999	struct xfs_bmbt_irec	*new)
1000{
1001	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
1002
1003	xfs_iext_inc_seq(ifp);
1004
1005	if (cur->pos == 0) {
1006		struct xfs_bmbt_irec	old;
1007
1008		xfs_iext_get(&old, cur_rec(cur));
1009		if (new->br_startoff != old.br_startoff) {
1010			xfs_iext_update_node(ifp, old.br_startoff,
1011					new->br_startoff, 1, cur->leaf);
1012		}
1013	}
1014
1015	trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_);
1016	xfs_iext_set(cur_rec(cur), new);
1017	trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_);
1018}
1019
1020/*
1021 * Return true if the cursor points at an extent and return the extent structure
1022 * in gotp.  Else return false.
1023 */
1024bool
1025xfs_iext_get_extent(
1026	struct xfs_ifork	*ifp,
1027	struct xfs_iext_cursor	*cur,
1028	struct xfs_bmbt_irec	*gotp)
1029{
1030	if (!xfs_iext_valid(ifp, cur))
1031		return false;
1032	xfs_iext_get(gotp, cur_rec(cur));
1033	return true;
1034}
1035
1036/*
1037 * This is a recursive function, because of that we need to be extremely
1038 * careful with stack usage.
1039 */
1040static void
1041xfs_iext_destroy_node(
1042	struct xfs_iext_node	*node,
1043	int			level)
1044{
1045	int			i;
1046
1047	if (level > 1) {
1048		for (i = 0; i < KEYS_PER_NODE; i++) {
1049			if (node->keys[i] == XFS_IEXT_KEY_INVALID)
1050				break;
1051			xfs_iext_destroy_node(node->ptrs[i], level - 1);
1052		}
1053	}
1054
1055	kfree(node);
1056}
1057
1058void
1059xfs_iext_destroy(
1060	struct xfs_ifork	*ifp)
1061{
1062	xfs_iext_destroy_node(ifp->if_data, ifp->if_height);
1063
1064	ifp->if_bytes = 0;
1065	ifp->if_height = 0;
1066	ifp->if_data = NULL;
1067}
1068