1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * NILFS B-tree.
4 *
5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6 *
7 * Written by Koji Sato.
8 */
9
10#include <linux/slab.h>
11#include <linux/string.h>
12#include <linux/errno.h>
13#include <linux/pagevec.h>
14#include "nilfs.h"
15#include "page.h"
16#include "btnode.h"
17#include "btree.h"
18#include "alloc.h"
19#include "dat.h"
20
21static void __nilfs_btree_init(struct nilfs_bmap *bmap);
22
23static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
24{
25	struct nilfs_btree_path *path;
26	int level = NILFS_BTREE_LEVEL_DATA;
27
28	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
29	if (path == NULL)
30		goto out;
31
32	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
33		path[level].bp_bh = NULL;
34		path[level].bp_sib_bh = NULL;
35		path[level].bp_index = 0;
36		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
37		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
38		path[level].bp_op = NULL;
39	}
40
41out:
42	return path;
43}
44
45static void nilfs_btree_free_path(struct nilfs_btree_path *path)
46{
47	int level = NILFS_BTREE_LEVEL_DATA;
48
49	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
50		brelse(path[level].bp_bh);
51
52	kmem_cache_free(nilfs_btree_path_cache, path);
53}
54
55/*
56 * B-tree node operations
57 */
58static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
59				     __u64 ptr, struct buffer_head **bhp)
60{
61	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
62	struct address_space *btnc = btnc_inode->i_mapping;
63	struct buffer_head *bh;
64
65	bh = nilfs_btnode_create_block(btnc, ptr);
66	if (!bh)
67		return -ENOMEM;
68
69	set_buffer_nilfs_volatile(bh);
70	*bhp = bh;
71	return 0;
72}
73
74static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
75{
76	return node->bn_flags;
77}
78
79static void
80nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
81{
82	node->bn_flags = flags;
83}
84
85static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
86{
87	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
88}
89
90static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
91{
92	return node->bn_level;
93}
94
95static void
96nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
97{
98	node->bn_level = level;
99}
100
101static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
102{
103	return le16_to_cpu(node->bn_nchildren);
104}
105
106static void
107nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
108{
109	node->bn_nchildren = cpu_to_le16(nchildren);
110}
111
112static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
113{
114	return i_blocksize(btree->b_inode);
115}
116
117static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
118{
119	return btree->b_nchildren_per_block;
120}
121
122static __le64 *
123nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
124{
125	return (__le64 *)((char *)(node + 1) +
126			  (nilfs_btree_node_root(node) ?
127			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
128}
129
130static __le64 *
131nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
132{
133	return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
134}
135
136static __u64
137nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
138{
139	return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
140}
141
142static void
143nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
144{
145	*(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
146}
147
148static __u64
149nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
150			 int ncmax)
151{
152	return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
153}
154
155static void
156nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
157			 int ncmax)
158{
159	*(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
160}
161
162static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
163				  int level, int nchildren, int ncmax,
164				  const __u64 *keys, const __u64 *ptrs)
165{
166	__le64 *dkeys;
167	__le64 *dptrs;
168	int i;
169
170	nilfs_btree_node_set_flags(node, flags);
171	nilfs_btree_node_set_level(node, level);
172	nilfs_btree_node_set_nchildren(node, nchildren);
173
174	dkeys = nilfs_btree_node_dkeys(node);
175	dptrs = nilfs_btree_node_dptrs(node, ncmax);
176	for (i = 0; i < nchildren; i++) {
177		dkeys[i] = cpu_to_le64(keys[i]);
178		dptrs[i] = cpu_to_le64(ptrs[i]);
179	}
180}
181
182/* Assume the buffer heads corresponding to left and right are locked. */
183static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
184				       struct nilfs_btree_node *right,
185				       int n, int lncmax, int rncmax)
186{
187	__le64 *ldkeys, *rdkeys;
188	__le64 *ldptrs, *rdptrs;
189	int lnchildren, rnchildren;
190
191	ldkeys = nilfs_btree_node_dkeys(left);
192	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
193	lnchildren = nilfs_btree_node_get_nchildren(left);
194
195	rdkeys = nilfs_btree_node_dkeys(right);
196	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
197	rnchildren = nilfs_btree_node_get_nchildren(right);
198
199	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
200	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
201	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
202	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
203
204	lnchildren += n;
205	rnchildren -= n;
206	nilfs_btree_node_set_nchildren(left, lnchildren);
207	nilfs_btree_node_set_nchildren(right, rnchildren);
208}
209
210/* Assume that the buffer heads corresponding to left and right are locked. */
211static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
212					struct nilfs_btree_node *right,
213					int n, int lncmax, int rncmax)
214{
215	__le64 *ldkeys, *rdkeys;
216	__le64 *ldptrs, *rdptrs;
217	int lnchildren, rnchildren;
218
219	ldkeys = nilfs_btree_node_dkeys(left);
220	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
221	lnchildren = nilfs_btree_node_get_nchildren(left);
222
223	rdkeys = nilfs_btree_node_dkeys(right);
224	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
225	rnchildren = nilfs_btree_node_get_nchildren(right);
226
227	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
228	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
229	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
230	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
231
232	lnchildren -= n;
233	rnchildren += n;
234	nilfs_btree_node_set_nchildren(left, lnchildren);
235	nilfs_btree_node_set_nchildren(right, rnchildren);
236}
237
238/* Assume that the buffer head corresponding to node is locked. */
239static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
240				    __u64 key, __u64 ptr, int ncmax)
241{
242	__le64 *dkeys;
243	__le64 *dptrs;
244	int nchildren;
245
246	dkeys = nilfs_btree_node_dkeys(node);
247	dptrs = nilfs_btree_node_dptrs(node, ncmax);
248	nchildren = nilfs_btree_node_get_nchildren(node);
249	if (index < nchildren) {
250		memmove(dkeys + index + 1, dkeys + index,
251			(nchildren - index) * sizeof(*dkeys));
252		memmove(dptrs + index + 1, dptrs + index,
253			(nchildren - index) * sizeof(*dptrs));
254	}
255	dkeys[index] = cpu_to_le64(key);
256	dptrs[index] = cpu_to_le64(ptr);
257	nchildren++;
258	nilfs_btree_node_set_nchildren(node, nchildren);
259}
260
261/* Assume that the buffer head corresponding to node is locked. */
262static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
263				    __u64 *keyp, __u64 *ptrp, int ncmax)
264{
265	__u64 key;
266	__u64 ptr;
267	__le64 *dkeys;
268	__le64 *dptrs;
269	int nchildren;
270
271	dkeys = nilfs_btree_node_dkeys(node);
272	dptrs = nilfs_btree_node_dptrs(node, ncmax);
273	key = le64_to_cpu(dkeys[index]);
274	ptr = le64_to_cpu(dptrs[index]);
275	nchildren = nilfs_btree_node_get_nchildren(node);
276	if (keyp != NULL)
277		*keyp = key;
278	if (ptrp != NULL)
279		*ptrp = ptr;
280
281	if (index < nchildren - 1) {
282		memmove(dkeys + index, dkeys + index + 1,
283			(nchildren - index - 1) * sizeof(*dkeys));
284		memmove(dptrs + index, dptrs + index + 1,
285			(nchildren - index - 1) * sizeof(*dptrs));
286	}
287	nchildren--;
288	nilfs_btree_node_set_nchildren(node, nchildren);
289}
290
291static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
292				   __u64 key, int *indexp)
293{
294	__u64 nkey;
295	int index, low, high, s;
296
297	/* binary search */
298	low = 0;
299	high = nilfs_btree_node_get_nchildren(node) - 1;
300	index = 0;
301	s = 0;
302	while (low <= high) {
303		index = (low + high) / 2;
304		nkey = nilfs_btree_node_get_key(node, index);
305		if (nkey == key) {
306			s = 0;
307			goto out;
308		} else if (nkey < key) {
309			low = index + 1;
310			s = -1;
311		} else {
312			high = index - 1;
313			s = 1;
314		}
315	}
316
317	/* adjust index */
318	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
319		if (s > 0 && index > 0)
320			index--;
321	} else if (s < 0)
322		index++;
323
324 out:
325	*indexp = index;
326
327	return s == 0;
328}
329
330/**
331 * nilfs_btree_node_broken - verify consistency of btree node
332 * @node: btree node block to be examined
333 * @size: node size (in bytes)
334 * @inode: host inode of btree
335 * @blocknr: block number
336 *
337 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
338 */
339static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
340				   size_t size, struct inode *inode,
341				   sector_t blocknr)
342{
343	int level, flags, nchildren;
344	int ret = 0;
345
346	level = nilfs_btree_node_get_level(node);
347	flags = nilfs_btree_node_get_flags(node);
348	nchildren = nilfs_btree_node_get_nchildren(node);
349
350	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
351		     level >= NILFS_BTREE_LEVEL_MAX ||
352		     (flags & NILFS_BTREE_NODE_ROOT) ||
353		     nchildren < 0 ||
354		     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
355		nilfs_crit(inode->i_sb,
356			   "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
357			   inode->i_ino, (unsigned long long)blocknr, level,
358			   flags, nchildren);
359		ret = 1;
360	}
361	return ret;
362}
363
364/**
365 * nilfs_btree_root_broken - verify consistency of btree root node
366 * @node: btree root node to be examined
367 * @inode: host inode of btree
368 *
369 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
370 */
371static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
372				   struct inode *inode)
373{
374	int level, flags, nchildren;
375	int ret = 0;
376
377	level = nilfs_btree_node_get_level(node);
378	flags = nilfs_btree_node_get_flags(node);
379	nchildren = nilfs_btree_node_get_nchildren(node);
380
381	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
382		     level >= NILFS_BTREE_LEVEL_MAX ||
383		     nchildren < 0 ||
384		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
385		nilfs_crit(inode->i_sb,
386			   "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
387			   inode->i_ino, level, flags, nchildren);
388		ret = 1;
389	}
390	return ret;
391}
392
393int nilfs_btree_broken_node_block(struct buffer_head *bh)
394{
395	struct inode *inode;
396	int ret;
397
398	if (buffer_nilfs_checked(bh))
399		return 0;
400
401	inode = bh->b_folio->mapping->host;
402	ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
403				      bh->b_size, inode, bh->b_blocknr);
404	if (likely(!ret))
405		set_buffer_nilfs_checked(bh);
406	return ret;
407}
408
409static struct nilfs_btree_node *
410nilfs_btree_get_root(const struct nilfs_bmap *btree)
411{
412	return (struct nilfs_btree_node *)btree->b_u.u_data;
413}
414
415static struct nilfs_btree_node *
416nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
417{
418	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
419}
420
421static struct nilfs_btree_node *
422nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
423{
424	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
425}
426
427static int nilfs_btree_height(const struct nilfs_bmap *btree)
428{
429	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
430}
431
432static struct nilfs_btree_node *
433nilfs_btree_get_node(const struct nilfs_bmap *btree,
434		     const struct nilfs_btree_path *path,
435		     int level, int *ncmaxp)
436{
437	struct nilfs_btree_node *node;
438
439	if (level == nilfs_btree_height(btree) - 1) {
440		node = nilfs_btree_get_root(btree);
441		*ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
442	} else {
443		node = nilfs_btree_get_nonroot_node(path, level);
444		*ncmaxp = nilfs_btree_nchildren_per_block(btree);
445	}
446	return node;
447}
448
449static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
450				struct nilfs_btree_node *node, int level)
451{
452	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
453		dump_stack();
454		nilfs_crit(btree->b_inode->i_sb,
455			   "btree level mismatch (ino=%lu): %d != %d",
456			   btree->b_inode->i_ino,
457			   nilfs_btree_node_get_level(node), level);
458		return 1;
459	}
460	return 0;
461}
462
463struct nilfs_btree_readahead_info {
464	struct nilfs_btree_node *node;	/* parent node */
465	int max_ra_blocks;		/* max nof blocks to read ahead */
466	int index;			/* current index on the parent node */
467	int ncmax;			/* nof children in the parent node */
468};
469
470static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
471				   struct buffer_head **bhp,
472				   const struct nilfs_btree_readahead_info *ra)
473{
474	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
475	struct address_space *btnc = btnc_inode->i_mapping;
476	struct buffer_head *bh, *ra_bh;
477	sector_t submit_ptr = 0;
478	int ret;
479
480	ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh,
481					&submit_ptr);
482	if (ret) {
483		if (likely(ret == -EEXIST))
484			goto out_check;
485		if (ret == -ENOENT) {
486			/*
487			 * Block address translation failed due to invalid
488			 * value of 'ptr'.  In this case, return internal code
489			 * -EINVAL (broken bmap) to notify bmap layer of fatal
490			 * metadata corruption.
491			 */
492			ret = -EINVAL;
493		}
494		return ret;
495	}
496
497	if (ra) {
498		int i, n;
499		__u64 ptr2;
500
501		/* read ahead sibling nodes */
502		for (n = ra->max_ra_blocks, i = ra->index + 1;
503		     n > 0 && i < ra->ncmax; n--, i++) {
504			ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
505
506			ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
507						REQ_OP_READ | REQ_RAHEAD,
508						&ra_bh, &submit_ptr);
509			if (likely(!ret || ret == -EEXIST))
510				brelse(ra_bh);
511			else if (ret != -EBUSY)
512				break;
513			if (!buffer_locked(bh))
514				goto out_no_wait;
515		}
516	}
517
518	wait_on_buffer(bh);
519
520 out_no_wait:
521	if (!buffer_uptodate(bh)) {
522		nilfs_err(btree->b_inode->i_sb,
523			  "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
524			  btree->b_inode->i_ino, (unsigned long long)ptr);
525		brelse(bh);
526		return -EIO;
527	}
528
529 out_check:
530	if (nilfs_btree_broken_node_block(bh)) {
531		clear_buffer_uptodate(bh);
532		brelse(bh);
533		return -EINVAL;
534	}
535
536	*bhp = bh;
537	return 0;
538}
539
540static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
541				   struct buffer_head **bhp)
542{
543	return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
544}
545
546static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
547				 struct nilfs_btree_path *path,
548				 __u64 key, __u64 *ptrp, int minlevel,
549				 int readahead)
550{
551	struct nilfs_btree_node *node;
552	struct nilfs_btree_readahead_info p, *ra;
553	__u64 ptr;
554	int level, index, found, ncmax, ret;
555
556	node = nilfs_btree_get_root(btree);
557	level = nilfs_btree_node_get_level(node);
558	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
559		return -ENOENT;
560
561	found = nilfs_btree_node_lookup(node, key, &index);
562	ptr = nilfs_btree_node_get_ptr(node, index,
563				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
564	path[level].bp_bh = NULL;
565	path[level].bp_index = index;
566
567	ncmax = nilfs_btree_nchildren_per_block(btree);
568
569	while (--level >= minlevel) {
570		ra = NULL;
571		if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
572			p.node = nilfs_btree_get_node(btree, path, level + 1,
573						      &p.ncmax);
574			p.index = index;
575			p.max_ra_blocks = 7;
576			ra = &p;
577		}
578		ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
579					      ra);
580		if (ret < 0)
581			return ret;
582
583		node = nilfs_btree_get_nonroot_node(path, level);
584		if (nilfs_btree_bad_node(btree, node, level))
585			return -EINVAL;
586		if (!found)
587			found = nilfs_btree_node_lookup(node, key, &index);
588		else
589			index = 0;
590		if (index < ncmax) {
591			ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
592		} else {
593			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
594			/* insert */
595			ptr = NILFS_BMAP_INVALID_PTR;
596		}
597		path[level].bp_index = index;
598	}
599	if (!found)
600		return -ENOENT;
601
602	if (ptrp != NULL)
603		*ptrp = ptr;
604
605	return 0;
606}
607
608static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
609				      struct nilfs_btree_path *path,
610				      __u64 *keyp, __u64 *ptrp)
611{
612	struct nilfs_btree_node *node;
613	__u64 ptr;
614	int index, level, ncmax, ret;
615
616	node = nilfs_btree_get_root(btree);
617	index = nilfs_btree_node_get_nchildren(node) - 1;
618	if (index < 0)
619		return -ENOENT;
620	level = nilfs_btree_node_get_level(node);
621	ptr = nilfs_btree_node_get_ptr(node, index,
622				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
623	path[level].bp_bh = NULL;
624	path[level].bp_index = index;
625	ncmax = nilfs_btree_nchildren_per_block(btree);
626
627	for (level--; level > 0; level--) {
628		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
629		if (ret < 0)
630			return ret;
631		node = nilfs_btree_get_nonroot_node(path, level);
632		if (nilfs_btree_bad_node(btree, node, level))
633			return -EINVAL;
634		index = nilfs_btree_node_get_nchildren(node) - 1;
635		ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
636		path[level].bp_index = index;
637	}
638
639	if (keyp != NULL)
640		*keyp = nilfs_btree_node_get_key(node, index);
641	if (ptrp != NULL)
642		*ptrp = ptr;
643
644	return 0;
645}
646
647/**
648 * nilfs_btree_get_next_key - get next valid key from btree path array
649 * @btree: bmap struct of btree
650 * @path: array of nilfs_btree_path struct
651 * @minlevel: start level
652 * @nextkey: place to store the next valid key
653 *
654 * Return Value: If a next key was found, 0 is returned. Otherwise,
655 * -ENOENT is returned.
656 */
657static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
658				    const struct nilfs_btree_path *path,
659				    int minlevel, __u64 *nextkey)
660{
661	struct nilfs_btree_node *node;
662	int maxlevel = nilfs_btree_height(btree) - 1;
663	int index, next_adj, level;
664
665	/* Next index is already set to bp_index for leaf nodes. */
666	next_adj = 0;
667	for (level = minlevel; level <= maxlevel; level++) {
668		if (level == maxlevel)
669			node = nilfs_btree_get_root(btree);
670		else
671			node = nilfs_btree_get_nonroot_node(path, level);
672
673		index = path[level].bp_index + next_adj;
674		if (index < nilfs_btree_node_get_nchildren(node)) {
675			/* Next key is in this node */
676			*nextkey = nilfs_btree_node_get_key(node, index);
677			return 0;
678		}
679		/* For non-leaf nodes, next index is stored at bp_index + 1. */
680		next_adj = 1;
681	}
682	return -ENOENT;
683}
684
685static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
686			      __u64 key, int level, __u64 *ptrp)
687{
688	struct nilfs_btree_path *path;
689	int ret;
690
691	path = nilfs_btree_alloc_path();
692	if (path == NULL)
693		return -ENOMEM;
694
695	ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
696
697	nilfs_btree_free_path(path);
698
699	return ret;
700}
701
702static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
703				     __u64 key, __u64 *ptrp,
704				     unsigned int maxblocks)
705{
706	struct nilfs_btree_path *path;
707	struct nilfs_btree_node *node;
708	struct inode *dat = NULL;
709	__u64 ptr, ptr2;
710	sector_t blocknr;
711	int level = NILFS_BTREE_LEVEL_NODE_MIN;
712	int ret, cnt, index, maxlevel, ncmax;
713	struct nilfs_btree_readahead_info p;
714
715	path = nilfs_btree_alloc_path();
716	if (path == NULL)
717		return -ENOMEM;
718
719	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
720	if (ret < 0)
721		goto out;
722
723	if (NILFS_BMAP_USE_VBN(btree)) {
724		dat = nilfs_bmap_get_dat(btree);
725		ret = nilfs_dat_translate(dat, ptr, &blocknr);
726		if (ret < 0)
727			goto dat_error;
728		ptr = blocknr;
729	}
730	cnt = 1;
731	if (cnt == maxblocks)
732		goto end;
733
734	maxlevel = nilfs_btree_height(btree) - 1;
735	node = nilfs_btree_get_node(btree, path, level, &ncmax);
736	index = path[level].bp_index + 1;
737	for (;;) {
738		while (index < nilfs_btree_node_get_nchildren(node)) {
739			if (nilfs_btree_node_get_key(node, index) !=
740			    key + cnt)
741				goto end;
742			ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
743			if (dat) {
744				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
745				if (ret < 0)
746					goto dat_error;
747				ptr2 = blocknr;
748			}
749			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
750				goto end;
751			index++;
752		}
753		if (level == maxlevel)
754			break;
755
756		/* look-up right sibling node */
757		p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
758		p.index = path[level + 1].bp_index + 1;
759		p.max_ra_blocks = 7;
760		if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
761		    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
762			break;
763		ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
764		path[level + 1].bp_index = p.index;
765
766		brelse(path[level].bp_bh);
767		path[level].bp_bh = NULL;
768
769		ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
770					      &p);
771		if (ret < 0)
772			goto out;
773		node = nilfs_btree_get_nonroot_node(path, level);
774		ncmax = nilfs_btree_nchildren_per_block(btree);
775		index = 0;
776		path[level].bp_index = index;
777	}
778 end:
779	*ptrp = ptr;
780	ret = cnt;
781 out:
782	nilfs_btree_free_path(path);
783	return ret;
784
785 dat_error:
786	if (ret == -ENOENT)
787		ret = -EINVAL;  /* Notify bmap layer of metadata corruption */
788	goto out;
789}
790
791static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
792				    struct nilfs_btree_path *path,
793				    int level, __u64 key)
794{
795	if (level < nilfs_btree_height(btree) - 1) {
796		do {
797			nilfs_btree_node_set_key(
798				nilfs_btree_get_nonroot_node(path, level),
799				path[level].bp_index, key);
800			if (!buffer_dirty(path[level].bp_bh))
801				mark_buffer_dirty(path[level].bp_bh);
802		} while ((path[level].bp_index == 0) &&
803			 (++level < nilfs_btree_height(btree) - 1));
804	}
805
806	/* root */
807	if (level == nilfs_btree_height(btree) - 1) {
808		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
809					 path[level].bp_index, key);
810	}
811}
812
813static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
814				  struct nilfs_btree_path *path,
815				  int level, __u64 *keyp, __u64 *ptrp)
816{
817	struct nilfs_btree_node *node;
818	int ncblk;
819
820	if (level < nilfs_btree_height(btree) - 1) {
821		node = nilfs_btree_get_nonroot_node(path, level);
822		ncblk = nilfs_btree_nchildren_per_block(btree);
823		nilfs_btree_node_insert(node, path[level].bp_index,
824					*keyp, *ptrp, ncblk);
825		if (!buffer_dirty(path[level].bp_bh))
826			mark_buffer_dirty(path[level].bp_bh);
827
828		if (path[level].bp_index == 0)
829			nilfs_btree_promote_key(btree, path, level + 1,
830						nilfs_btree_node_get_key(node,
831									 0));
832	} else {
833		node = nilfs_btree_get_root(btree);
834		nilfs_btree_node_insert(node, path[level].bp_index,
835					*keyp, *ptrp,
836					NILFS_BTREE_ROOT_NCHILDREN_MAX);
837	}
838}
839
840static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
841				   struct nilfs_btree_path *path,
842				   int level, __u64 *keyp, __u64 *ptrp)
843{
844	struct nilfs_btree_node *node, *left;
845	int nchildren, lnchildren, n, move, ncblk;
846
847	node = nilfs_btree_get_nonroot_node(path, level);
848	left = nilfs_btree_get_sib_node(path, level);
849	nchildren = nilfs_btree_node_get_nchildren(node);
850	lnchildren = nilfs_btree_node_get_nchildren(left);
851	ncblk = nilfs_btree_nchildren_per_block(btree);
852	move = 0;
853
854	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
855	if (n > path[level].bp_index) {
856		/* move insert point */
857		n--;
858		move = 1;
859	}
860
861	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
862
863	if (!buffer_dirty(path[level].bp_bh))
864		mark_buffer_dirty(path[level].bp_bh);
865	if (!buffer_dirty(path[level].bp_sib_bh))
866		mark_buffer_dirty(path[level].bp_sib_bh);
867
868	nilfs_btree_promote_key(btree, path, level + 1,
869				nilfs_btree_node_get_key(node, 0));
870
871	if (move) {
872		brelse(path[level].bp_bh);
873		path[level].bp_bh = path[level].bp_sib_bh;
874		path[level].bp_sib_bh = NULL;
875		path[level].bp_index += lnchildren;
876		path[level + 1].bp_index--;
877	} else {
878		brelse(path[level].bp_sib_bh);
879		path[level].bp_sib_bh = NULL;
880		path[level].bp_index -= n;
881	}
882
883	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
884}
885
886static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
887				    struct nilfs_btree_path *path,
888				    int level, __u64 *keyp, __u64 *ptrp)
889{
890	struct nilfs_btree_node *node, *right;
891	int nchildren, rnchildren, n, move, ncblk;
892
893	node = nilfs_btree_get_nonroot_node(path, level);
894	right = nilfs_btree_get_sib_node(path, level);
895	nchildren = nilfs_btree_node_get_nchildren(node);
896	rnchildren = nilfs_btree_node_get_nchildren(right);
897	ncblk = nilfs_btree_nchildren_per_block(btree);
898	move = 0;
899
900	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
901	if (n > nchildren - path[level].bp_index) {
902		/* move insert point */
903		n--;
904		move = 1;
905	}
906
907	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
908
909	if (!buffer_dirty(path[level].bp_bh))
910		mark_buffer_dirty(path[level].bp_bh);
911	if (!buffer_dirty(path[level].bp_sib_bh))
912		mark_buffer_dirty(path[level].bp_sib_bh);
913
914	path[level + 1].bp_index++;
915	nilfs_btree_promote_key(btree, path, level + 1,
916				nilfs_btree_node_get_key(right, 0));
917	path[level + 1].bp_index--;
918
919	if (move) {
920		brelse(path[level].bp_bh);
921		path[level].bp_bh = path[level].bp_sib_bh;
922		path[level].bp_sib_bh = NULL;
923		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
924		path[level + 1].bp_index++;
925	} else {
926		brelse(path[level].bp_sib_bh);
927		path[level].bp_sib_bh = NULL;
928	}
929
930	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
931}
932
933static void nilfs_btree_split(struct nilfs_bmap *btree,
934			      struct nilfs_btree_path *path,
935			      int level, __u64 *keyp, __u64 *ptrp)
936{
937	struct nilfs_btree_node *node, *right;
938	int nchildren, n, move, ncblk;
939
940	node = nilfs_btree_get_nonroot_node(path, level);
941	right = nilfs_btree_get_sib_node(path, level);
942	nchildren = nilfs_btree_node_get_nchildren(node);
943	ncblk = nilfs_btree_nchildren_per_block(btree);
944	move = 0;
945
946	n = (nchildren + 1) / 2;
947	if (n > nchildren - path[level].bp_index) {
948		n--;
949		move = 1;
950	}
951
952	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
953
954	if (!buffer_dirty(path[level].bp_bh))
955		mark_buffer_dirty(path[level].bp_bh);
956	if (!buffer_dirty(path[level].bp_sib_bh))
957		mark_buffer_dirty(path[level].bp_sib_bh);
958
959	if (move) {
960		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
961		nilfs_btree_node_insert(right, path[level].bp_index,
962					*keyp, *ptrp, ncblk);
963
964		*keyp = nilfs_btree_node_get_key(right, 0);
965		*ptrp = path[level].bp_newreq.bpr_ptr;
966
967		brelse(path[level].bp_bh);
968		path[level].bp_bh = path[level].bp_sib_bh;
969		path[level].bp_sib_bh = NULL;
970	} else {
971		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
972
973		*keyp = nilfs_btree_node_get_key(right, 0);
974		*ptrp = path[level].bp_newreq.bpr_ptr;
975
976		brelse(path[level].bp_sib_bh);
977		path[level].bp_sib_bh = NULL;
978	}
979
980	path[level + 1].bp_index++;
981}
982
983static void nilfs_btree_grow(struct nilfs_bmap *btree,
984			     struct nilfs_btree_path *path,
985			     int level, __u64 *keyp, __u64 *ptrp)
986{
987	struct nilfs_btree_node *root, *child;
988	int n, ncblk;
989
990	root = nilfs_btree_get_root(btree);
991	child = nilfs_btree_get_sib_node(path, level);
992	ncblk = nilfs_btree_nchildren_per_block(btree);
993
994	n = nilfs_btree_node_get_nchildren(root);
995
996	nilfs_btree_node_move_right(root, child, n,
997				    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
998	nilfs_btree_node_set_level(root, level + 1);
999
1000	if (!buffer_dirty(path[level].bp_sib_bh))
1001		mark_buffer_dirty(path[level].bp_sib_bh);
1002
1003	path[level].bp_bh = path[level].bp_sib_bh;
1004	path[level].bp_sib_bh = NULL;
1005
1006	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
1007
1008	*keyp = nilfs_btree_node_get_key(child, 0);
1009	*ptrp = path[level].bp_newreq.bpr_ptr;
1010}
1011
1012static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
1013				   const struct nilfs_btree_path *path)
1014{
1015	struct nilfs_btree_node *node;
1016	int level, ncmax;
1017
1018	if (path == NULL)
1019		return NILFS_BMAP_INVALID_PTR;
1020
1021	/* left sibling */
1022	level = NILFS_BTREE_LEVEL_NODE_MIN;
1023	if (path[level].bp_index > 0) {
1024		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1025		return nilfs_btree_node_get_ptr(node,
1026						path[level].bp_index - 1,
1027						ncmax);
1028	}
1029
1030	/* parent */
1031	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1032	if (level <= nilfs_btree_height(btree) - 1) {
1033		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1034		return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1035						ncmax);
1036	}
1037
1038	return NILFS_BMAP_INVALID_PTR;
1039}
1040
1041static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1042				       const struct nilfs_btree_path *path,
1043				       __u64 key)
1044{
1045	__u64 ptr;
1046
1047	ptr = nilfs_bmap_find_target_seq(btree, key);
1048	if (ptr != NILFS_BMAP_INVALID_PTR)
1049		/* sequential access */
1050		return ptr;
1051
1052	ptr = nilfs_btree_find_near(btree, path);
1053	if (ptr != NILFS_BMAP_INVALID_PTR)
1054		/* near */
1055		return ptr;
1056
1057	/* block group */
1058	return nilfs_bmap_find_target_in_group(btree);
1059}
1060
1061static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1062				      struct nilfs_btree_path *path,
1063				      int *levelp, __u64 key, __u64 ptr,
1064				      struct nilfs_bmap_stats *stats)
1065{
1066	struct buffer_head *bh;
1067	struct nilfs_btree_node *node, *parent, *sib;
1068	__u64 sibptr;
1069	int pindex, level, ncmax, ncblk, ret;
1070	struct inode *dat = NULL;
1071
1072	stats->bs_nblocks = 0;
1073	level = NILFS_BTREE_LEVEL_DATA;
1074
1075	/* allocate a new ptr for data block */
1076	if (NILFS_BMAP_USE_VBN(btree)) {
1077		path[level].bp_newreq.bpr_ptr =
1078			nilfs_btree_find_target_v(btree, path, key);
1079		dat = nilfs_bmap_get_dat(btree);
1080	}
1081
1082	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1083	if (ret < 0)
1084		goto err_out_data;
1085
1086	ncblk = nilfs_btree_nchildren_per_block(btree);
1087
1088	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1089	     level < nilfs_btree_height(btree) - 1;
1090	     level++) {
1091		node = nilfs_btree_get_nonroot_node(path, level);
1092		if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1093			path[level].bp_op = nilfs_btree_do_insert;
1094			stats->bs_nblocks++;
1095			goto out;
1096		}
1097
1098		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1099		pindex = path[level + 1].bp_index;
1100
1101		/* left sibling */
1102		if (pindex > 0) {
1103			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1104							  ncmax);
1105			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1106			if (ret < 0)
1107				goto err_out_child_node;
1108			sib = (struct nilfs_btree_node *)bh->b_data;
1109			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1110				path[level].bp_sib_bh = bh;
1111				path[level].bp_op = nilfs_btree_carry_left;
1112				stats->bs_nblocks++;
1113				goto out;
1114			} else {
1115				brelse(bh);
1116			}
1117		}
1118
1119		/* right sibling */
1120		if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1121			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1122							  ncmax);
1123			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1124			if (ret < 0)
1125				goto err_out_child_node;
1126			sib = (struct nilfs_btree_node *)bh->b_data;
1127			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1128				path[level].bp_sib_bh = bh;
1129				path[level].bp_op = nilfs_btree_carry_right;
1130				stats->bs_nblocks++;
1131				goto out;
1132			} else {
1133				brelse(bh);
1134			}
1135		}
1136
1137		/* split */
1138		path[level].bp_newreq.bpr_ptr =
1139			path[level - 1].bp_newreq.bpr_ptr + 1;
1140		ret = nilfs_bmap_prepare_alloc_ptr(btree,
1141						   &path[level].bp_newreq, dat);
1142		if (ret < 0)
1143			goto err_out_child_node;
1144		ret = nilfs_btree_get_new_block(btree,
1145						path[level].bp_newreq.bpr_ptr,
1146						&bh);
1147		if (ret < 0)
1148			goto err_out_curr_node;
1149
1150		stats->bs_nblocks++;
1151
1152		sib = (struct nilfs_btree_node *)bh->b_data;
1153		nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1154		path[level].bp_sib_bh = bh;
1155		path[level].bp_op = nilfs_btree_split;
1156	}
1157
1158	/* root */
1159	node = nilfs_btree_get_root(btree);
1160	if (nilfs_btree_node_get_nchildren(node) <
1161	    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1162		path[level].bp_op = nilfs_btree_do_insert;
1163		stats->bs_nblocks++;
1164		goto out;
1165	}
1166
1167	/* grow */
1168	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1169	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1170	if (ret < 0)
1171		goto err_out_child_node;
1172	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1173					&bh);
1174	if (ret < 0)
1175		goto err_out_curr_node;
1176
1177	nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1178			      0, level, 0, ncblk, NULL, NULL);
1179	path[level].bp_sib_bh = bh;
1180	path[level].bp_op = nilfs_btree_grow;
1181
1182	level++;
1183	path[level].bp_op = nilfs_btree_do_insert;
1184
1185	/* a newly-created node block and a data block are added */
1186	stats->bs_nblocks += 2;
1187
1188	/* success */
1189 out:
1190	*levelp = level;
1191	return ret;
1192
1193	/* error */
1194 err_out_curr_node:
1195	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1196 err_out_child_node:
1197	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1198		nilfs_btnode_delete(path[level].bp_sib_bh);
1199		nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1200
1201	}
1202
1203	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1204 err_out_data:
1205	*levelp = level;
1206	stats->bs_nblocks = 0;
1207	return ret;
1208}
1209
1210static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1211				      struct nilfs_btree_path *path,
1212				      int maxlevel, __u64 key, __u64 ptr)
1213{
1214	struct inode *dat = NULL;
1215	int level;
1216
1217	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1218	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1219	if (NILFS_BMAP_USE_VBN(btree)) {
1220		nilfs_bmap_set_target_v(btree, key, ptr);
1221		dat = nilfs_bmap_get_dat(btree);
1222	}
1223
1224	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1225		nilfs_bmap_commit_alloc_ptr(btree,
1226					    &path[level - 1].bp_newreq, dat);
1227		path[level].bp_op(btree, path, level, &key, &ptr);
1228	}
1229
1230	if (!nilfs_bmap_dirty(btree))
1231		nilfs_bmap_set_dirty(btree);
1232}
1233
1234static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1235{
1236	struct nilfs_btree_path *path;
1237	struct nilfs_bmap_stats stats;
1238	int level, ret;
1239
1240	path = nilfs_btree_alloc_path();
1241	if (path == NULL)
1242		return -ENOMEM;
1243
1244	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1245				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1246	if (ret != -ENOENT) {
1247		if (ret == 0)
1248			ret = -EEXIST;
1249		goto out;
1250	}
1251
1252	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1253	if (ret < 0)
1254		goto out;
1255	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1256	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1257
1258 out:
1259	nilfs_btree_free_path(path);
1260	return ret;
1261}
1262
1263static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1264				  struct nilfs_btree_path *path,
1265				  int level, __u64 *keyp, __u64 *ptrp)
1266{
1267	struct nilfs_btree_node *node;
1268	int ncblk;
1269
1270	if (level < nilfs_btree_height(btree) - 1) {
1271		node = nilfs_btree_get_nonroot_node(path, level);
1272		ncblk = nilfs_btree_nchildren_per_block(btree);
1273		nilfs_btree_node_delete(node, path[level].bp_index,
1274					keyp, ptrp, ncblk);
1275		if (!buffer_dirty(path[level].bp_bh))
1276			mark_buffer_dirty(path[level].bp_bh);
1277		if (path[level].bp_index == 0)
1278			nilfs_btree_promote_key(btree, path, level + 1,
1279				nilfs_btree_node_get_key(node, 0));
1280	} else {
1281		node = nilfs_btree_get_root(btree);
1282		nilfs_btree_node_delete(node, path[level].bp_index,
1283					keyp, ptrp,
1284					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1285	}
1286}
1287
1288static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1289				    struct nilfs_btree_path *path,
1290				    int level, __u64 *keyp, __u64 *ptrp)
1291{
1292	struct nilfs_btree_node *node, *left;
1293	int nchildren, lnchildren, n, ncblk;
1294
1295	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1296
1297	node = nilfs_btree_get_nonroot_node(path, level);
1298	left = nilfs_btree_get_sib_node(path, level);
1299	nchildren = nilfs_btree_node_get_nchildren(node);
1300	lnchildren = nilfs_btree_node_get_nchildren(left);
1301	ncblk = nilfs_btree_nchildren_per_block(btree);
1302
1303	n = (nchildren + lnchildren) / 2 - nchildren;
1304
1305	nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1306
1307	if (!buffer_dirty(path[level].bp_bh))
1308		mark_buffer_dirty(path[level].bp_bh);
1309	if (!buffer_dirty(path[level].bp_sib_bh))
1310		mark_buffer_dirty(path[level].bp_sib_bh);
1311
1312	nilfs_btree_promote_key(btree, path, level + 1,
1313				nilfs_btree_node_get_key(node, 0));
1314
1315	brelse(path[level].bp_sib_bh);
1316	path[level].bp_sib_bh = NULL;
1317	path[level].bp_index += n;
1318}
1319
1320static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1321				     struct nilfs_btree_path *path,
1322				     int level, __u64 *keyp, __u64 *ptrp)
1323{
1324	struct nilfs_btree_node *node, *right;
1325	int nchildren, rnchildren, n, ncblk;
1326
1327	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1328
1329	node = nilfs_btree_get_nonroot_node(path, level);
1330	right = nilfs_btree_get_sib_node(path, level);
1331	nchildren = nilfs_btree_node_get_nchildren(node);
1332	rnchildren = nilfs_btree_node_get_nchildren(right);
1333	ncblk = nilfs_btree_nchildren_per_block(btree);
1334
1335	n = (nchildren + rnchildren) / 2 - nchildren;
1336
1337	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1338
1339	if (!buffer_dirty(path[level].bp_bh))
1340		mark_buffer_dirty(path[level].bp_bh);
1341	if (!buffer_dirty(path[level].bp_sib_bh))
1342		mark_buffer_dirty(path[level].bp_sib_bh);
1343
1344	path[level + 1].bp_index++;
1345	nilfs_btree_promote_key(btree, path, level + 1,
1346				nilfs_btree_node_get_key(right, 0));
1347	path[level + 1].bp_index--;
1348
1349	brelse(path[level].bp_sib_bh);
1350	path[level].bp_sib_bh = NULL;
1351}
1352
1353static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1354				    struct nilfs_btree_path *path,
1355				    int level, __u64 *keyp, __u64 *ptrp)
1356{
1357	struct nilfs_btree_node *node, *left;
1358	int n, ncblk;
1359
1360	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1361
1362	node = nilfs_btree_get_nonroot_node(path, level);
1363	left = nilfs_btree_get_sib_node(path, level);
1364	ncblk = nilfs_btree_nchildren_per_block(btree);
1365
1366	n = nilfs_btree_node_get_nchildren(node);
1367
1368	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1369
1370	if (!buffer_dirty(path[level].bp_sib_bh))
1371		mark_buffer_dirty(path[level].bp_sib_bh);
1372
1373	nilfs_btnode_delete(path[level].bp_bh);
1374	path[level].bp_bh = path[level].bp_sib_bh;
1375	path[level].bp_sib_bh = NULL;
1376	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1377}
1378
1379static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1380				     struct nilfs_btree_path *path,
1381				     int level, __u64 *keyp, __u64 *ptrp)
1382{
1383	struct nilfs_btree_node *node, *right;
1384	int n, ncblk;
1385
1386	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1387
1388	node = nilfs_btree_get_nonroot_node(path, level);
1389	right = nilfs_btree_get_sib_node(path, level);
1390	ncblk = nilfs_btree_nchildren_per_block(btree);
1391
1392	n = nilfs_btree_node_get_nchildren(right);
1393
1394	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1395
1396	if (!buffer_dirty(path[level].bp_bh))
1397		mark_buffer_dirty(path[level].bp_bh);
1398
1399	nilfs_btnode_delete(path[level].bp_sib_bh);
1400	path[level].bp_sib_bh = NULL;
1401	path[level + 1].bp_index++;
1402}
1403
1404static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1405			       struct nilfs_btree_path *path,
1406			       int level, __u64 *keyp, __u64 *ptrp)
1407{
1408	struct nilfs_btree_node *root, *child;
1409	int n, ncblk;
1410
1411	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1412
1413	root = nilfs_btree_get_root(btree);
1414	child = nilfs_btree_get_nonroot_node(path, level);
1415	ncblk = nilfs_btree_nchildren_per_block(btree);
1416
1417	nilfs_btree_node_delete(root, 0, NULL, NULL,
1418				NILFS_BTREE_ROOT_NCHILDREN_MAX);
1419	nilfs_btree_node_set_level(root, level);
1420	n = nilfs_btree_node_get_nchildren(child);
1421	nilfs_btree_node_move_left(root, child, n,
1422				   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1423
1424	nilfs_btnode_delete(path[level].bp_bh);
1425	path[level].bp_bh = NULL;
1426}
1427
1428static void nilfs_btree_nop(struct nilfs_bmap *btree,
1429			    struct nilfs_btree_path *path,
1430			    int level, __u64 *keyp, __u64 *ptrp)
1431{
1432}
1433
1434static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1435				      struct nilfs_btree_path *path,
1436				      int *levelp,
1437				      struct nilfs_bmap_stats *stats,
1438				      struct inode *dat)
1439{
1440	struct buffer_head *bh;
1441	struct nilfs_btree_node *node, *parent, *sib;
1442	__u64 sibptr;
1443	int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1444
1445	ret = 0;
1446	stats->bs_nblocks = 0;
1447	ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1448	ncblk = nilfs_btree_nchildren_per_block(btree);
1449
1450	for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1451	     level < nilfs_btree_height(btree) - 1;
1452	     level++) {
1453		node = nilfs_btree_get_nonroot_node(path, level);
1454		path[level].bp_oldreq.bpr_ptr =
1455			nilfs_btree_node_get_ptr(node, dindex, ncblk);
1456		ret = nilfs_bmap_prepare_end_ptr(btree,
1457						 &path[level].bp_oldreq, dat);
1458		if (ret < 0)
1459			goto err_out_child_node;
1460
1461		if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1462			path[level].bp_op = nilfs_btree_do_delete;
1463			stats->bs_nblocks++;
1464			goto out;
1465		}
1466
1467		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1468		pindex = path[level + 1].bp_index;
1469		dindex = pindex;
1470
1471		if (pindex > 0) {
1472			/* left sibling */
1473			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1474							  ncmax);
1475			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1476			if (ret < 0)
1477				goto err_out_curr_node;
1478			sib = (struct nilfs_btree_node *)bh->b_data;
1479			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1480				path[level].bp_sib_bh = bh;
1481				path[level].bp_op = nilfs_btree_borrow_left;
1482				stats->bs_nblocks++;
1483				goto out;
1484			} else {
1485				path[level].bp_sib_bh = bh;
1486				path[level].bp_op = nilfs_btree_concat_left;
1487				stats->bs_nblocks++;
1488				/* continue; */
1489			}
1490		} else if (pindex <
1491			   nilfs_btree_node_get_nchildren(parent) - 1) {
1492			/* right sibling */
1493			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1494							  ncmax);
1495			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1496			if (ret < 0)
1497				goto err_out_curr_node;
1498			sib = (struct nilfs_btree_node *)bh->b_data;
1499			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1500				path[level].bp_sib_bh = bh;
1501				path[level].bp_op = nilfs_btree_borrow_right;
1502				stats->bs_nblocks++;
1503				goto out;
1504			} else {
1505				path[level].bp_sib_bh = bh;
1506				path[level].bp_op = nilfs_btree_concat_right;
1507				stats->bs_nblocks++;
1508				/*
1509				 * When merging right sibling node
1510				 * into the current node, pointer to
1511				 * the right sibling node must be
1512				 * terminated instead.  The adjustment
1513				 * below is required for that.
1514				 */
1515				dindex = pindex + 1;
1516				/* continue; */
1517			}
1518		} else {
1519			/* no siblings */
1520			/* the only child of the root node */
1521			WARN_ON(level != nilfs_btree_height(btree) - 2);
1522			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1523			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1524				path[level].bp_op = nilfs_btree_shrink;
1525				stats->bs_nblocks += 2;
1526				level++;
1527				path[level].bp_op = nilfs_btree_nop;
1528				goto shrink_root_child;
1529			} else {
1530				path[level].bp_op = nilfs_btree_do_delete;
1531				stats->bs_nblocks++;
1532				goto out;
1533			}
1534		}
1535	}
1536
1537	/* child of the root node is deleted */
1538	path[level].bp_op = nilfs_btree_do_delete;
1539	stats->bs_nblocks++;
1540
1541shrink_root_child:
1542	node = nilfs_btree_get_root(btree);
1543	path[level].bp_oldreq.bpr_ptr =
1544		nilfs_btree_node_get_ptr(node, dindex,
1545					 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1546
1547	ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1548	if (ret < 0)
1549		goto err_out_child_node;
1550
1551	/* success */
1552 out:
1553	*levelp = level;
1554	return ret;
1555
1556	/* error */
1557 err_out_curr_node:
1558	nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1559 err_out_child_node:
1560	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1561		brelse(path[level].bp_sib_bh);
1562		nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1563	}
1564	*levelp = level;
1565	stats->bs_nblocks = 0;
1566	return ret;
1567}
1568
1569static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1570				      struct nilfs_btree_path *path,
1571				      int maxlevel, struct inode *dat)
1572{
1573	int level;
1574
1575	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1576		nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1577		path[level].bp_op(btree, path, level, NULL, NULL);
1578	}
1579
1580	if (!nilfs_bmap_dirty(btree))
1581		nilfs_bmap_set_dirty(btree);
1582}
1583
1584static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1585
1586{
1587	struct nilfs_btree_path *path;
1588	struct nilfs_bmap_stats stats;
1589	struct inode *dat;
1590	int level, ret;
1591
1592	path = nilfs_btree_alloc_path();
1593	if (path == NULL)
1594		return -ENOMEM;
1595
1596	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1597				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1598	if (ret < 0)
1599		goto out;
1600
1601
1602	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1603
1604	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1605	if (ret < 0)
1606		goto out;
1607	nilfs_btree_commit_delete(btree, path, level, dat);
1608	nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1609
1610out:
1611	nilfs_btree_free_path(path);
1612	return ret;
1613}
1614
1615static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1616				__u64 *keyp)
1617{
1618	struct nilfs_btree_path *path;
1619	const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1620	int ret;
1621
1622	path = nilfs_btree_alloc_path();
1623	if (!path)
1624		return -ENOMEM;
1625
1626	ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1627	if (!ret)
1628		*keyp = start;
1629	else if (ret == -ENOENT)
1630		ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1631
1632	nilfs_btree_free_path(path);
1633	return ret;
1634}
1635
1636static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1637{
1638	struct nilfs_btree_path *path;
1639	int ret;
1640
1641	path = nilfs_btree_alloc_path();
1642	if (path == NULL)
1643		return -ENOMEM;
1644
1645	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1646
1647	nilfs_btree_free_path(path);
1648
1649	return ret;
1650}
1651
1652static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1653{
1654	struct buffer_head *bh;
1655	struct nilfs_btree_node *root, *node;
1656	__u64 maxkey, nextmaxkey;
1657	__u64 ptr;
1658	int nchildren, ret;
1659
1660	root = nilfs_btree_get_root(btree);
1661	switch (nilfs_btree_height(btree)) {
1662	case 2:
1663		bh = NULL;
1664		node = root;
1665		break;
1666	case 3:
1667		nchildren = nilfs_btree_node_get_nchildren(root);
1668		if (nchildren > 1)
1669			return 0;
1670		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1671					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1672		ret = nilfs_btree_get_block(btree, ptr, &bh);
1673		if (ret < 0)
1674			return ret;
1675		node = (struct nilfs_btree_node *)bh->b_data;
1676		break;
1677	default:
1678		return 0;
1679	}
1680
1681	nchildren = nilfs_btree_node_get_nchildren(node);
1682	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1683	nextmaxkey = (nchildren > 1) ?
1684		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1685	brelse(bh);
1686
1687	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1688}
1689
1690static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1691				   __u64 *keys, __u64 *ptrs, int nitems)
1692{
1693	struct buffer_head *bh;
1694	struct nilfs_btree_node *node, *root;
1695	__le64 *dkeys;
1696	__le64 *dptrs;
1697	__u64 ptr;
1698	int nchildren, ncmax, i, ret;
1699
1700	root = nilfs_btree_get_root(btree);
1701	switch (nilfs_btree_height(btree)) {
1702	case 2:
1703		bh = NULL;
1704		node = root;
1705		ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1706		break;
1707	case 3:
1708		nchildren = nilfs_btree_node_get_nchildren(root);
1709		WARN_ON(nchildren > 1);
1710		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1711					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1712		ret = nilfs_btree_get_block(btree, ptr, &bh);
1713		if (ret < 0)
1714			return ret;
1715		node = (struct nilfs_btree_node *)bh->b_data;
1716		ncmax = nilfs_btree_nchildren_per_block(btree);
1717		break;
1718	default:
1719		node = NULL;
1720		return -EINVAL;
1721	}
1722
1723	nchildren = nilfs_btree_node_get_nchildren(node);
1724	if (nchildren < nitems)
1725		nitems = nchildren;
1726	dkeys = nilfs_btree_node_dkeys(node);
1727	dptrs = nilfs_btree_node_dptrs(node, ncmax);
1728	for (i = 0; i < nitems; i++) {
1729		keys[i] = le64_to_cpu(dkeys[i]);
1730		ptrs[i] = le64_to_cpu(dptrs[i]);
1731	}
1732
1733	brelse(bh);
1734
1735	return nitems;
1736}
1737
1738static int
1739nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1740				       union nilfs_bmap_ptr_req *dreq,
1741				       union nilfs_bmap_ptr_req *nreq,
1742				       struct buffer_head **bhp,
1743				       struct nilfs_bmap_stats *stats)
1744{
1745	struct buffer_head *bh;
1746	struct inode *dat = NULL;
1747	int ret;
1748
1749	stats->bs_nblocks = 0;
1750
1751	/* for data */
1752	/* cannot find near ptr */
1753	if (NILFS_BMAP_USE_VBN(btree)) {
1754		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1755		dat = nilfs_bmap_get_dat(btree);
1756	}
1757
1758	ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
1759	if (ret < 0)
1760		return ret;
1761
1762	ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1763	if (ret < 0)
1764		return ret;
1765
1766	*bhp = NULL;
1767	stats->bs_nblocks++;
1768	if (nreq != NULL) {
1769		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1770		ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1771		if (ret < 0)
1772			goto err_out_dreq;
1773
1774		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1775		if (ret < 0)
1776			goto err_out_nreq;
1777
1778		*bhp = bh;
1779		stats->bs_nblocks++;
1780	}
1781
1782	/* success */
1783	return 0;
1784
1785	/* error */
1786 err_out_nreq:
1787	nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1788 err_out_dreq:
1789	nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1790	stats->bs_nblocks = 0;
1791	return ret;
1792
1793}
1794
1795static void
1796nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1797				      __u64 key, __u64 ptr,
1798				      const __u64 *keys, const __u64 *ptrs,
1799				      int n,
1800				      union nilfs_bmap_ptr_req *dreq,
1801				      union nilfs_bmap_ptr_req *nreq,
1802				      struct buffer_head *bh)
1803{
1804	struct nilfs_btree_node *node;
1805	struct inode *dat;
1806	__u64 tmpptr;
1807	int ncblk;
1808
1809	/* free resources */
1810	if (btree->b_ops->bop_clear != NULL)
1811		btree->b_ops->bop_clear(btree);
1812
1813	/* ptr must be a pointer to a buffer head. */
1814	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1815
1816	/* convert and insert */
1817	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1818	__nilfs_btree_init(btree);
1819	if (nreq != NULL) {
1820		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1821		nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1822
1823		/* create child node at level 1 */
1824		node = (struct nilfs_btree_node *)bh->b_data;
1825		ncblk = nilfs_btree_nchildren_per_block(btree);
1826		nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1827		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1828		if (!buffer_dirty(bh))
1829			mark_buffer_dirty(bh);
1830		if (!nilfs_bmap_dirty(btree))
1831			nilfs_bmap_set_dirty(btree);
1832
1833		brelse(bh);
1834
1835		/* create root node at level 2 */
1836		node = nilfs_btree_get_root(btree);
1837		tmpptr = nreq->bpr_ptr;
1838		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1839				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1840				      &keys[0], &tmpptr);
1841	} else {
1842		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1843
1844		/* create root node at level 1 */
1845		node = nilfs_btree_get_root(btree);
1846		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1847				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1848				      keys, ptrs);
1849		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1850					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1851		if (!nilfs_bmap_dirty(btree))
1852			nilfs_bmap_set_dirty(btree);
1853	}
1854
1855	if (NILFS_BMAP_USE_VBN(btree))
1856		nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1857}
1858
1859/**
1860 * nilfs_btree_convert_and_insert -
1861 * @bmap:
1862 * @key:
1863 * @ptr:
1864 * @keys:
1865 * @ptrs:
1866 * @n:
1867 */
1868int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1869				   __u64 key, __u64 ptr,
1870				   const __u64 *keys, const __u64 *ptrs, int n)
1871{
1872	struct buffer_head *bh = NULL;
1873	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1874	struct nilfs_bmap_stats stats;
1875	int ret;
1876
1877	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1878		di = &dreq;
1879		ni = NULL;
1880	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1881			   nilfs_btree_node_size(btree))) {
1882		di = &dreq;
1883		ni = &nreq;
1884	} else {
1885		di = NULL;
1886		ni = NULL;
1887		BUG();
1888	}
1889
1890	ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1891						     &stats);
1892	if (ret < 0)
1893		return ret;
1894	nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1895					      di, ni, bh);
1896	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1897	return 0;
1898}
1899
1900static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1901				   struct nilfs_btree_path *path,
1902				   int level,
1903				   struct buffer_head *bh)
1904{
1905	while ((++level < nilfs_btree_height(btree) - 1) &&
1906	       !buffer_dirty(path[level].bp_bh))
1907		mark_buffer_dirty(path[level].bp_bh);
1908
1909	return 0;
1910}
1911
1912static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1913					struct nilfs_btree_path *path,
1914					int level, struct inode *dat)
1915{
1916	struct nilfs_btree_node *parent;
1917	int ncmax, ret;
1918
1919	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1920	path[level].bp_oldreq.bpr_ptr =
1921		nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1922					 ncmax);
1923	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1924	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1925				       &path[level].bp_newreq.bpr_req);
1926	if (ret < 0)
1927		return ret;
1928
1929	if (buffer_nilfs_node(path[level].bp_bh)) {
1930		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1931		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1932		path[level].bp_ctxt.bh = path[level].bp_bh;
1933		ret = nilfs_btnode_prepare_change_key(
1934			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1935			&path[level].bp_ctxt);
1936		if (ret < 0) {
1937			nilfs_dat_abort_update(dat,
1938					       &path[level].bp_oldreq.bpr_req,
1939					       &path[level].bp_newreq.bpr_req);
1940			return ret;
1941		}
1942	}
1943
1944	return 0;
1945}
1946
1947static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1948					struct nilfs_btree_path *path,
1949					int level, struct inode *dat)
1950{
1951	struct nilfs_btree_node *parent;
1952	int ncmax;
1953
1954	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1955				&path[level].bp_newreq.bpr_req,
1956				btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1957
1958	if (buffer_nilfs_node(path[level].bp_bh)) {
1959		nilfs_btnode_commit_change_key(
1960			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1961			&path[level].bp_ctxt);
1962		path[level].bp_bh = path[level].bp_ctxt.bh;
1963	}
1964	set_buffer_nilfs_volatile(path[level].bp_bh);
1965
1966	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1967	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1968				 path[level].bp_newreq.bpr_ptr, ncmax);
1969}
1970
1971static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1972				       struct nilfs_btree_path *path,
1973				       int level, struct inode *dat)
1974{
1975	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1976			       &path[level].bp_newreq.bpr_req);
1977	if (buffer_nilfs_node(path[level].bp_bh))
1978		nilfs_btnode_abort_change_key(
1979			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1980			&path[level].bp_ctxt);
1981}
1982
1983static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1984					   struct nilfs_btree_path *path,
1985					   int minlevel, int *maxlevelp,
1986					   struct inode *dat)
1987{
1988	int level, ret;
1989
1990	level = minlevel;
1991	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1992		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1993		if (ret < 0)
1994			return ret;
1995	}
1996	while ((++level < nilfs_btree_height(btree) - 1) &&
1997	       !buffer_dirty(path[level].bp_bh)) {
1998
1999		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
2000		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
2001		if (ret < 0)
2002			goto out;
2003	}
2004
2005	/* success */
2006	*maxlevelp = level - 1;
2007	return 0;
2008
2009	/* error */
2010 out:
2011	while (--level > minlevel)
2012		nilfs_btree_abort_update_v(btree, path, level, dat);
2013	if (!buffer_nilfs_volatile(path[level].bp_bh))
2014		nilfs_btree_abort_update_v(btree, path, level, dat);
2015	return ret;
2016}
2017
2018static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2019					   struct nilfs_btree_path *path,
2020					   int minlevel, int maxlevel,
2021					   struct buffer_head *bh,
2022					   struct inode *dat)
2023{
2024	int level;
2025
2026	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2027		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2028
2029	for (level = minlevel + 1; level <= maxlevel; level++)
2030		nilfs_btree_commit_update_v(btree, path, level, dat);
2031}
2032
2033static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2034				   struct nilfs_btree_path *path,
2035				   int level, struct buffer_head *bh)
2036{
2037	int maxlevel = 0, ret;
2038	struct nilfs_btree_node *parent;
2039	struct inode *dat = nilfs_bmap_get_dat(btree);
2040	__u64 ptr;
2041	int ncmax;
2042
2043	get_bh(bh);
2044	path[level].bp_bh = bh;
2045	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2046					      dat);
2047	if (ret < 0)
2048		goto out;
2049
2050	if (buffer_nilfs_volatile(path[level].bp_bh)) {
2051		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2052		ptr = nilfs_btree_node_get_ptr(parent,
2053					       path[level + 1].bp_index,
2054					       ncmax);
2055		ret = nilfs_dat_mark_dirty(dat, ptr);
2056		if (ret < 0)
2057			goto out;
2058	}
2059
2060	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2061
2062 out:
2063	brelse(path[level].bp_bh);
2064	path[level].bp_bh = NULL;
2065	return ret;
2066}
2067
2068static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2069				 struct buffer_head *bh)
2070{
2071	struct nilfs_btree_path *path;
2072	struct nilfs_btree_node *node;
2073	__u64 key;
2074	int level, ret;
2075
2076	WARN_ON(!buffer_dirty(bh));
2077
2078	path = nilfs_btree_alloc_path();
2079	if (path == NULL)
2080		return -ENOMEM;
2081
2082	if (buffer_nilfs_node(bh)) {
2083		node = (struct nilfs_btree_node *)bh->b_data;
2084		key = nilfs_btree_node_get_key(node, 0);
2085		level = nilfs_btree_node_get_level(node);
2086	} else {
2087		key = nilfs_bmap_data_get_key(btree, bh);
2088		level = NILFS_BTREE_LEVEL_DATA;
2089	}
2090
2091	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2092	if (ret < 0) {
2093		if (unlikely(ret == -ENOENT))
2094			nilfs_crit(btree->b_inode->i_sb,
2095				   "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2096				   btree->b_inode->i_ino,
2097				   (unsigned long long)key, level);
2098		goto out;
2099	}
2100
2101	ret = NILFS_BMAP_USE_VBN(btree) ?
2102		nilfs_btree_propagate_v(btree, path, level, bh) :
2103		nilfs_btree_propagate_p(btree, path, level, bh);
2104
2105 out:
2106	nilfs_btree_free_path(path);
2107
2108	return ret;
2109}
2110
2111static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2112				    struct buffer_head *bh)
2113{
2114	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2115}
2116
2117static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2118					 struct list_head *lists,
2119					 struct buffer_head *bh)
2120{
2121	struct list_head *head;
2122	struct buffer_head *cbh;
2123	struct nilfs_btree_node *node, *cnode;
2124	__u64 key, ckey;
2125	int level;
2126
2127	get_bh(bh);
2128	node = (struct nilfs_btree_node *)bh->b_data;
2129	key = nilfs_btree_node_get_key(node, 0);
2130	level = nilfs_btree_node_get_level(node);
2131	if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2132	    level >= NILFS_BTREE_LEVEL_MAX) {
2133		dump_stack();
2134		nilfs_warn(btree->b_inode->i_sb,
2135			   "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2136			   level, (unsigned long long)key,
2137			   btree->b_inode->i_ino,
2138			   (unsigned long long)bh->b_blocknr);
2139		return;
2140	}
2141
2142	list_for_each(head, &lists[level]) {
2143		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2144		cnode = (struct nilfs_btree_node *)cbh->b_data;
2145		ckey = nilfs_btree_node_get_key(cnode, 0);
2146		if (key < ckey)
2147			break;
2148	}
2149	list_add_tail(&bh->b_assoc_buffers, head);
2150}
2151
2152static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2153					     struct list_head *listp)
2154{
2155	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
2156	struct address_space *btcache = btnc_inode->i_mapping;
2157	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2158	struct folio_batch fbatch;
2159	struct buffer_head *bh, *head;
2160	pgoff_t index = 0;
2161	int level, i;
2162
2163	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2164	     level < NILFS_BTREE_LEVEL_MAX;
2165	     level++)
2166		INIT_LIST_HEAD(&lists[level]);
2167
2168	folio_batch_init(&fbatch);
2169
2170	while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1,
2171				PAGECACHE_TAG_DIRTY, &fbatch)) {
2172		for (i = 0; i < folio_batch_count(&fbatch); i++) {
2173			bh = head = folio_buffers(fbatch.folios[i]);
2174			do {
2175				if (buffer_dirty(bh))
2176					nilfs_btree_add_dirty_buffer(btree,
2177								     lists, bh);
2178			} while ((bh = bh->b_this_page) != head);
2179		}
2180		folio_batch_release(&fbatch);
2181		cond_resched();
2182	}
2183
2184	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2185	     level < NILFS_BTREE_LEVEL_MAX;
2186	     level++)
2187		list_splice_tail(&lists[level], listp);
2188}
2189
2190static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2191				struct nilfs_btree_path *path,
2192				int level,
2193				struct buffer_head **bh,
2194				sector_t blocknr,
2195				union nilfs_binfo *binfo)
2196{
2197	struct nilfs_btree_node *parent;
2198	__u64 key;
2199	__u64 ptr;
2200	int ncmax, ret;
2201
2202	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2203	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2204				       ncmax);
2205	if (buffer_nilfs_node(*bh)) {
2206		path[level].bp_ctxt.oldkey = ptr;
2207		path[level].bp_ctxt.newkey = blocknr;
2208		path[level].bp_ctxt.bh = *bh;
2209		ret = nilfs_btnode_prepare_change_key(
2210			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2211			&path[level].bp_ctxt);
2212		if (ret < 0)
2213			return ret;
2214		nilfs_btnode_commit_change_key(
2215			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2216			&path[level].bp_ctxt);
2217		*bh = path[level].bp_ctxt.bh;
2218	}
2219
2220	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2221				 ncmax);
2222
2223	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2224	/* on-disk format */
2225	binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2226	binfo->bi_dat.bi_level = level;
2227	memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
2228
2229	return 0;
2230}
2231
2232static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2233				struct nilfs_btree_path *path,
2234				int level,
2235				struct buffer_head **bh,
2236				sector_t blocknr,
2237				union nilfs_binfo *binfo)
2238{
2239	struct nilfs_btree_node *parent;
2240	struct inode *dat = nilfs_bmap_get_dat(btree);
2241	__u64 key;
2242	__u64 ptr;
2243	union nilfs_bmap_ptr_req req;
2244	int ncmax, ret;
2245
2246	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2247	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2248				       ncmax);
2249	req.bpr_ptr = ptr;
2250	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2251	if (ret < 0)
2252		return ret;
2253	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2254
2255	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2256	/* on-disk format */
2257	binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2258	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2259
2260	return 0;
2261}
2262
2263static int nilfs_btree_assign(struct nilfs_bmap *btree,
2264			      struct buffer_head **bh,
2265			      sector_t blocknr,
2266			      union nilfs_binfo *binfo)
2267{
2268	struct nilfs_btree_path *path;
2269	struct nilfs_btree_node *node;
2270	__u64 key;
2271	int level, ret;
2272
2273	path = nilfs_btree_alloc_path();
2274	if (path == NULL)
2275		return -ENOMEM;
2276
2277	if (buffer_nilfs_node(*bh)) {
2278		node = (struct nilfs_btree_node *)(*bh)->b_data;
2279		key = nilfs_btree_node_get_key(node, 0);
2280		level = nilfs_btree_node_get_level(node);
2281	} else {
2282		key = nilfs_bmap_data_get_key(btree, *bh);
2283		level = NILFS_BTREE_LEVEL_DATA;
2284	}
2285
2286	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2287	if (ret < 0) {
2288		WARN_ON(ret == -ENOENT);
2289		goto out;
2290	}
2291
2292	ret = NILFS_BMAP_USE_VBN(btree) ?
2293		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2294		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2295
2296 out:
2297	nilfs_btree_free_path(path);
2298
2299	return ret;
2300}
2301
2302static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2303				 struct buffer_head **bh,
2304				 sector_t blocknr,
2305				 union nilfs_binfo *binfo)
2306{
2307	struct nilfs_btree_node *node;
2308	__u64 key;
2309	int ret;
2310
2311	ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2312			     blocknr);
2313	if (ret < 0)
2314		return ret;
2315
2316	if (buffer_nilfs_node(*bh)) {
2317		node = (struct nilfs_btree_node *)(*bh)->b_data;
2318		key = nilfs_btree_node_get_key(node, 0);
2319	} else
2320		key = nilfs_bmap_data_get_key(btree, *bh);
2321
2322	/* on-disk format */
2323	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2324	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2325
2326	return 0;
2327}
2328
2329static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2330{
2331	struct buffer_head *bh;
2332	struct nilfs_btree_path *path;
2333	__u64 ptr;
2334	int ret;
2335
2336	path = nilfs_btree_alloc_path();
2337	if (path == NULL)
2338		return -ENOMEM;
2339
2340	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2341	if (ret < 0) {
2342		WARN_ON(ret == -ENOENT);
2343		goto out;
2344	}
2345	ret = nilfs_btree_get_block(btree, ptr, &bh);
2346	if (ret < 0) {
2347		WARN_ON(ret == -ENOENT);
2348		goto out;
2349	}
2350
2351	if (!buffer_dirty(bh))
2352		mark_buffer_dirty(bh);
2353	brelse(bh);
2354	if (!nilfs_bmap_dirty(btree))
2355		nilfs_bmap_set_dirty(btree);
2356
2357 out:
2358	nilfs_btree_free_path(path);
2359	return ret;
2360}
2361
2362static const struct nilfs_bmap_operations nilfs_btree_ops = {
2363	.bop_lookup		=	nilfs_btree_lookup,
2364	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2365	.bop_insert		=	nilfs_btree_insert,
2366	.bop_delete		=	nilfs_btree_delete,
2367	.bop_clear		=	NULL,
2368
2369	.bop_propagate		=	nilfs_btree_propagate,
2370
2371	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2372
2373	.bop_assign		=	nilfs_btree_assign,
2374	.bop_mark		=	nilfs_btree_mark,
2375
2376	.bop_seek_key		=	nilfs_btree_seek_key,
2377	.bop_last_key		=	nilfs_btree_last_key,
2378
2379	.bop_check_insert	=	NULL,
2380	.bop_check_delete	=	nilfs_btree_check_delete,
2381	.bop_gather_data	=	nilfs_btree_gather_data,
2382};
2383
2384static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2385	.bop_lookup		=	NULL,
2386	.bop_lookup_contig	=	NULL,
2387	.bop_insert		=	NULL,
2388	.bop_delete		=	NULL,
2389	.bop_clear		=	NULL,
2390
2391	.bop_propagate		=	nilfs_btree_propagate_gc,
2392
2393	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2394
2395	.bop_assign		=	nilfs_btree_assign_gc,
2396	.bop_mark		=	NULL,
2397
2398	.bop_seek_key		=	NULL,
2399	.bop_last_key		=	NULL,
2400
2401	.bop_check_insert	=	NULL,
2402	.bop_check_delete	=	NULL,
2403	.bop_gather_data	=	NULL,
2404};
2405
2406static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2407{
2408	bmap->b_ops = &nilfs_btree_ops;
2409	bmap->b_nchildren_per_block =
2410		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2411}
2412
2413int nilfs_btree_init(struct nilfs_bmap *bmap)
2414{
2415	int ret = 0;
2416
2417	__nilfs_btree_init(bmap);
2418
2419	if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2420		ret = -EIO;
2421	else
2422		ret = nilfs_attach_btree_node_cache(
2423			&NILFS_BMAP_I(bmap)->vfs_inode);
2424
2425	return ret;
2426}
2427
2428void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2429{
2430	bmap->b_ops = &nilfs_btree_ops_gc;
2431	bmap->b_nchildren_per_block =
2432		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2433}
2434