1/*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h"
29#include "xfs_dmapi.h"
30#include "xfs_mount.h"
31#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir_sf.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h"
37#include "xfs_dinode.h"
38#include "xfs_inode.h"
39#include "xfs_btree.h"
40#include "xfs_ialloc.h"
41#include "xfs_alloc.h"
42#include "xfs_error.h"
43
44STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
45STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
46STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
47STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
48STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *);
49STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *);
50STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *);
51STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
52		xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
53STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
54
55/*
56 * Single level of the xfs_inobt_delete record deletion routine.
57 * Delete record pointed to by cur/level.
58 * Remove the record from its block then rebalance the tree.
59 * Return 0 for error, 1 for done, 2 to go on to the next level.
60 */
61STATIC int				/* error */
62xfs_inobt_delrec(
63	xfs_btree_cur_t		*cur,	/* btree cursor */
64	int			level,	/* level removing record from */
65	int			*stat)	/* fail/done/go-on */
66{
67	xfs_buf_t		*agbp;	/* buffer for a.g. inode header */
68	xfs_mount_t		*mp;	/* mount structure */
69	xfs_agi_t		*agi;	/* allocation group inode header */
70	xfs_inobt_block_t	*block;	/* btree block record/key lives in */
71	xfs_agblock_t		bno;	/* btree block number */
72	xfs_buf_t		*bp;	/* buffer for block */
73	int			error;	/* error return value */
74	int			i;	/* loop index */
75	xfs_inobt_key_t		key;	/* kp points here if block is level 0 */
76	xfs_inobt_key_t		*kp = NULL;	/* pointer to btree keys */
77	xfs_agblock_t		lbno;	/* left block's block number */
78	xfs_buf_t		*lbp;	/* left block's buffer pointer */
79	xfs_inobt_block_t	*left;	/* left btree block */
80	xfs_inobt_key_t		*lkp;	/* left block key pointer */
81	xfs_inobt_ptr_t		*lpp;	/* left block address pointer */
82	int			lrecs = 0;	/* number of records in left block */
83	xfs_inobt_rec_t		*lrp;	/* left block record pointer */
84	xfs_inobt_ptr_t		*pp = NULL;	/* pointer to btree addresses */
85	int			ptr;	/* index in btree block for this rec */
86	xfs_agblock_t		rbno;	/* right block's block number */
87	xfs_buf_t		*rbp;	/* right block's buffer pointer */
88	xfs_inobt_block_t	*right;	/* right btree block */
89	xfs_inobt_key_t		*rkp;	/* right block key pointer */
90	xfs_inobt_rec_t		*rp;	/* pointer to btree records */
91	xfs_inobt_ptr_t		*rpp;	/* right block address pointer */
92	int			rrecs = 0;	/* number of records in right block */
93	int			numrecs;
94	xfs_inobt_rec_t		*rrp;	/* right block record pointer */
95	xfs_btree_cur_t		*tcur;	/* temporary btree cursor */
96
97	mp = cur->bc_mp;
98
99	/*
100	 * Get the index of the entry being deleted, check for nothing there.
101	 */
102	ptr = cur->bc_ptrs[level];
103	if (ptr == 0) {
104		*stat = 0;
105		return 0;
106	}
107
108	/*
109	 * Get the buffer & block containing the record or key/ptr.
110	 */
111	bp = cur->bc_bufs[level];
112	block = XFS_BUF_TO_INOBT_BLOCK(bp);
113#ifdef DEBUG
114	if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
115		return error;
116#endif
117	/*
118	 * Fail if we're off the end of the block.
119	 */
120
121	numrecs = be16_to_cpu(block->bb_numrecs);
122	if (ptr > numrecs) {
123		*stat = 0;
124		return 0;
125	}
126	/*
127	 * It's a nonleaf.  Excise the key and ptr being deleted, by
128	 * sliding the entries past them down one.
129	 * Log the changed areas of the block.
130	 */
131	if (level > 0) {
132		kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
133		pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
134#ifdef DEBUG
135		for (i = ptr; i < numrecs; i++) {
136			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level)))
137				return error;
138		}
139#endif
140		if (ptr < numrecs) {
141			memmove(&kp[ptr - 1], &kp[ptr],
142				(numrecs - ptr) * sizeof(*kp));
143			memmove(&pp[ptr - 1], &pp[ptr],
144				(numrecs - ptr) * sizeof(*kp));
145			xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
146			xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
147		}
148	}
149	/*
150	 * It's a leaf.  Excise the record being deleted, by sliding the
151	 * entries past it down one.  Log the changed areas of the block.
152	 */
153	else {
154		rp = XFS_INOBT_REC_ADDR(block, 1, cur);
155		if (ptr < numrecs) {
156			memmove(&rp[ptr - 1], &rp[ptr],
157				(numrecs - ptr) * sizeof(*rp));
158			xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
159		}
160		/*
161		 * If it's the first record in the block, we'll need a key
162		 * structure to pass up to the next level (updkey).
163		 */
164		if (ptr == 1) {
165			key.ir_startino = rp->ir_startino;
166			kp = &key;
167		}
168	}
169	/*
170	 * Decrement and log the number of entries in the block.
171	 */
172	numrecs--;
173	block->bb_numrecs = cpu_to_be16(numrecs);
174	xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
175	/*
176	 * Is this the root level?  If so, we're almost done.
177	 */
178	if (level == cur->bc_nlevels - 1) {
179		/*
180		 * If this is the root level,
181		 * and there's only one entry left,
182		 * and it's NOT the leaf level,
183		 * then we can get rid of this level.
184		 */
185		if (numrecs == 1 && level > 0) {
186			agbp = cur->bc_private.i.agbp;
187			agi = XFS_BUF_TO_AGI(agbp);
188			/*
189			 * pp is still set to the first pointer in the block.
190			 * Make it the new root of the btree.
191			 */
192			bno = be32_to_cpu(agi->agi_root);
193			agi->agi_root = *pp;
194			be32_add(&agi->agi_level, -1);
195			/*
196			 * Free the block.
197			 */
198			if ((error = xfs_free_extent(cur->bc_tp,
199				XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1)))
200				return error;
201			xfs_trans_binval(cur->bc_tp, bp);
202			xfs_ialloc_log_agi(cur->bc_tp, agbp,
203				XFS_AGI_ROOT | XFS_AGI_LEVEL);
204			/*
205			 * Update the cursor so there's one fewer level.
206			 */
207			cur->bc_bufs[level] = NULL;
208			cur->bc_nlevels--;
209		} else if (level > 0 &&
210			   (error = xfs_inobt_decrement(cur, level, &i)))
211			return error;
212		*stat = 1;
213		return 0;
214	}
215	/*
216	 * If we deleted the leftmost entry in the block, update the
217	 * key values above us in the tree.
218	 */
219	if (ptr == 1 && (error = xfs_inobt_updkey(cur, kp, level + 1)))
220		return error;
221	/*
222	 * If the number of records remaining in the block is at least
223	 * the minimum, we're done.
224	 */
225	if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
226		if (level > 0 &&
227		    (error = xfs_inobt_decrement(cur, level, &i)))
228			return error;
229		*stat = 1;
230		return 0;
231	}
232	/*
233	 * Otherwise, we have to move some records around to keep the
234	 * tree balanced.  Look at the left and right sibling blocks to
235	 * see if we can re-balance by moving only one record.
236	 */
237	rbno = be32_to_cpu(block->bb_rightsib);
238	lbno = be32_to_cpu(block->bb_leftsib);
239	bno = NULLAGBLOCK;
240	ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
241	/*
242	 * Duplicate the cursor so our btree manipulations here won't
243	 * disrupt the next level up.
244	 */
245	if ((error = xfs_btree_dup_cursor(cur, &tcur)))
246		return error;
247	/*
248	 * If there's a right sibling, see if it's ok to shift an entry
249	 * out of it.
250	 */
251	if (rbno != NULLAGBLOCK) {
252		/*
253		 * Move the temp cursor to the last entry in the next block.
254		 * Actually any entry but the first would suffice.
255		 */
256		i = xfs_btree_lastrec(tcur, level);
257		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
258		if ((error = xfs_inobt_increment(tcur, level, &i)))
259			goto error0;
260		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
261		i = xfs_btree_lastrec(tcur, level);
262		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
263		/*
264		 * Grab a pointer to the block.
265		 */
266		rbp = tcur->bc_bufs[level];
267		right = XFS_BUF_TO_INOBT_BLOCK(rbp);
268#ifdef DEBUG
269		if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
270			goto error0;
271#endif
272		/*
273		 * Grab the current block number, for future use.
274		 */
275		bno = be32_to_cpu(right->bb_leftsib);
276		/*
277		 * If right block is full enough so that removing one entry
278		 * won't make it too empty, and left-shifting an entry out
279		 * of right to us works, we're done.
280		 */
281		if (be16_to_cpu(right->bb_numrecs) - 1 >=
282		     XFS_INOBT_BLOCK_MINRECS(level, cur)) {
283			if ((error = xfs_inobt_lshift(tcur, level, &i)))
284				goto error0;
285			if (i) {
286				ASSERT(be16_to_cpu(block->bb_numrecs) >=
287				       XFS_INOBT_BLOCK_MINRECS(level, cur));
288				xfs_btree_del_cursor(tcur,
289						     XFS_BTREE_NOERROR);
290				if (level > 0 &&
291				    (error = xfs_inobt_decrement(cur, level,
292						&i)))
293					return error;
294				*stat = 1;
295				return 0;
296			}
297		}
298		/*
299		 * Otherwise, grab the number of records in right for
300		 * future reference, and fix up the temp cursor to point
301		 * to our block again (last record).
302		 */
303		rrecs = be16_to_cpu(right->bb_numrecs);
304		if (lbno != NULLAGBLOCK) {
305			xfs_btree_firstrec(tcur, level);
306			if ((error = xfs_inobt_decrement(tcur, level, &i)))
307				goto error0;
308		}
309	}
310	/*
311	 * If there's a left sibling, see if it's ok to shift an entry
312	 * out of it.
313	 */
314	if (lbno != NULLAGBLOCK) {
315		/*
316		 * Move the temp cursor to the first entry in the
317		 * previous block.
318		 */
319		xfs_btree_firstrec(tcur, level);
320		if ((error = xfs_inobt_decrement(tcur, level, &i)))
321			goto error0;
322		xfs_btree_firstrec(tcur, level);
323		/*
324		 * Grab a pointer to the block.
325		 */
326		lbp = tcur->bc_bufs[level];
327		left = XFS_BUF_TO_INOBT_BLOCK(lbp);
328#ifdef DEBUG
329		if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
330			goto error0;
331#endif
332		/*
333		 * Grab the current block number, for future use.
334		 */
335		bno = be32_to_cpu(left->bb_rightsib);
336		/*
337		 * If left block is full enough so that removing one entry
338		 * won't make it too empty, and right-shifting an entry out
339		 * of left to us works, we're done.
340		 */
341		if (be16_to_cpu(left->bb_numrecs) - 1 >=
342		     XFS_INOBT_BLOCK_MINRECS(level, cur)) {
343			if ((error = xfs_inobt_rshift(tcur, level, &i)))
344				goto error0;
345			if (i) {
346				ASSERT(be16_to_cpu(block->bb_numrecs) >=
347				       XFS_INOBT_BLOCK_MINRECS(level, cur));
348				xfs_btree_del_cursor(tcur,
349						     XFS_BTREE_NOERROR);
350				if (level == 0)
351					cur->bc_ptrs[0]++;
352				*stat = 1;
353				return 0;
354			}
355		}
356		/*
357		 * Otherwise, grab the number of records in right for
358		 * future reference.
359		 */
360		lrecs = be16_to_cpu(left->bb_numrecs);
361	}
362	/*
363	 * Delete the temp cursor, we're done with it.
364	 */
365	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
366	/*
367	 * If here, we need to do a join to keep the tree balanced.
368	 */
369	ASSERT(bno != NULLAGBLOCK);
370	/*
371	 * See if we can join with the left neighbor block.
372	 */
373	if (lbno != NULLAGBLOCK &&
374	    lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
375		/*
376		 * Set "right" to be the starting block,
377		 * "left" to be the left neighbor.
378		 */
379		rbno = bno;
380		right = block;
381		rrecs = be16_to_cpu(right->bb_numrecs);
382		rbp = bp;
383		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
384				cur->bc_private.i.agno, lbno, 0, &lbp,
385				XFS_INO_BTREE_REF)))
386			return error;
387		left = XFS_BUF_TO_INOBT_BLOCK(lbp);
388		lrecs = be16_to_cpu(left->bb_numrecs);
389		if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
390			return error;
391	}
392	/*
393	 * If that won't work, see if we can join with the right neighbor block.
394	 */
395	else if (rbno != NULLAGBLOCK &&
396		 rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
397		/*
398		 * Set "left" to be the starting block,
399		 * "right" to be the right neighbor.
400		 */
401		lbno = bno;
402		left = block;
403		lrecs = be16_to_cpu(left->bb_numrecs);
404		lbp = bp;
405		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
406				cur->bc_private.i.agno, rbno, 0, &rbp,
407				XFS_INO_BTREE_REF)))
408			return error;
409		right = XFS_BUF_TO_INOBT_BLOCK(rbp);
410		rrecs = be16_to_cpu(right->bb_numrecs);
411		if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
412			return error;
413	}
414	/*
415	 * Otherwise, we can't fix the imbalance.
416	 * Just return.  This is probably a logic error, but it's not fatal.
417	 */
418	else {
419		if (level > 0 && (error = xfs_inobt_decrement(cur, level, &i)))
420			return error;
421		*stat = 1;
422		return 0;
423	}
424	/*
425	 * We're now going to join "left" and "right" by moving all the stuff
426	 * in "right" to "left" and deleting "right".
427	 */
428	if (level > 0) {
429		/*
430		 * It's a non-leaf.  Move keys and pointers.
431		 */
432		lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
433		lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
434		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
435		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
436#ifdef DEBUG
437		for (i = 0; i < rrecs; i++) {
438			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
439				return error;
440		}
441#endif
442		memcpy(lkp, rkp, rrecs * sizeof(*lkp));
443		memcpy(lpp, rpp, rrecs * sizeof(*lpp));
444		xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
445		xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
446	} else {
447		/*
448		 * It's a leaf.  Move records.
449		 */
450		lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
451		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
452		memcpy(lrp, rrp, rrecs * sizeof(*lrp));
453		xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
454	}
455	/*
456	 * If we joined with the left neighbor, set the buffer in the
457	 * cursor to the left block, and fix up the index.
458	 */
459	if (bp != lbp) {
460		xfs_btree_setbuf(cur, level, lbp);
461		cur->bc_ptrs[level] += lrecs;
462	}
463	/*
464	 * If we joined with the right neighbor and there's a level above
465	 * us, increment the cursor at that level.
466	 */
467	else if (level + 1 < cur->bc_nlevels &&
468		 (error = xfs_alloc_increment(cur, level + 1, &i)))
469		return error;
470	/*
471	 * Fix up the number of records in the surviving block.
472	 */
473	lrecs += rrecs;
474	left->bb_numrecs = cpu_to_be16(lrecs);
475	/*
476	 * Fix up the right block pointer in the surviving block, and log it.
477	 */
478	left->bb_rightsib = right->bb_rightsib;
479	xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
480	/*
481	 * If there is a right sibling now, make it point to the
482	 * remaining block.
483	 */
484	if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
485		xfs_inobt_block_t	*rrblock;
486		xfs_buf_t		*rrbp;
487
488		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
489				cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 0,
490				&rrbp, XFS_INO_BTREE_REF)))
491			return error;
492		rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
493		if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
494			return error;
495		rrblock->bb_leftsib = cpu_to_be32(lbno);
496		xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
497	}
498	/*
499	 * Free the deleting block.
500	 */
501	if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
502				     cur->bc_private.i.agno, rbno), 1)))
503		return error;
504	xfs_trans_binval(cur->bc_tp, rbp);
505	/*
506	 * Readjust the ptr at this level if it's not a leaf, since it's
507	 * still pointing at the deletion point, which makes the cursor
508	 * inconsistent.  If this makes the ptr 0, the caller fixes it up.
509	 * We can't use decrement because it would change the next level up.
510	 */
511	if (level > 0)
512		cur->bc_ptrs[level]--;
513	/*
514	 * Return value means the next level up has something to do.
515	 */
516	*stat = 2;
517	return 0;
518
519error0:
520	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
521	return error;
522}
523
524/*
525 * Insert one record/level.  Return information to the caller
526 * allowing the next level up to proceed if necessary.
527 */
528STATIC int				/* error */
529xfs_inobt_insrec(
530	xfs_btree_cur_t		*cur,	/* btree cursor */
531	int			level,	/* level to insert record at */
532	xfs_agblock_t		*bnop,	/* i/o: block number inserted */
533	xfs_inobt_rec_t		*recp,	/* i/o: record data inserted */
534	xfs_btree_cur_t		**curp,	/* output: new cursor replacing cur */
535	int			*stat)	/* success/failure */
536{
537	xfs_inobt_block_t	*block;	/* btree block record/key lives in */
538	xfs_buf_t		*bp;	/* buffer for block */
539	int			error;	/* error return value */
540	int			i;	/* loop index */
541	xfs_inobt_key_t		key;	/* key value being inserted */
542	xfs_inobt_key_t		*kp=NULL;	/* pointer to btree keys */
543	xfs_agblock_t		nbno;	/* block number of allocated block */
544	xfs_btree_cur_t		*ncur;	/* new cursor to be used at next lvl */
545	xfs_inobt_key_t		nkey;	/* new key value, from split */
546	xfs_inobt_rec_t		nrec;	/* new record value, for caller */
547	int			numrecs;
548	int			optr;	/* old ptr value */
549	xfs_inobt_ptr_t		*pp;	/* pointer to btree addresses */
550	int			ptr;	/* index in btree block for this rec */
551	xfs_inobt_rec_t		*rp=NULL;	/* pointer to btree records */
552
553	/*
554	 * GCC doesn't understand the (arguably complex) control flow in
555	 * this function and complains about uninitialized structure fields
556	 * without this.
557	 */
558	memset(&nrec, 0, sizeof(nrec));
559
560	/*
561	 * If we made it to the root level, allocate a new root block
562	 * and we're done.
563	 */
564	if (level >= cur->bc_nlevels) {
565		error = xfs_inobt_newroot(cur, &i);
566		*bnop = NULLAGBLOCK;
567		*stat = i;
568		return error;
569	}
570	/*
571	 * Make a key out of the record data to be inserted, and save it.
572	 */
573	key.ir_startino = recp->ir_startino; /* INT_: direct copy */
574	optr = ptr = cur->bc_ptrs[level];
575	/*
576	 * If we're off the left edge, return failure.
577	 */
578	if (ptr == 0) {
579		*stat = 0;
580		return 0;
581	}
582	/*
583	 * Get pointers to the btree buffer and block.
584	 */
585	bp = cur->bc_bufs[level];
586	block = XFS_BUF_TO_INOBT_BLOCK(bp);
587	numrecs = be16_to_cpu(block->bb_numrecs);
588#ifdef DEBUG
589	if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
590		return error;
591	/*
592	 * Check that the new entry is being inserted in the right place.
593	 */
594	if (ptr <= numrecs) {
595		if (level == 0) {
596			rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
597			xfs_btree_check_rec(cur->bc_btnum, recp, rp);
598		} else {
599			kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
600			xfs_btree_check_key(cur->bc_btnum, &key, kp);
601		}
602	}
603#endif
604	nbno = NULLAGBLOCK;
605	ncur = (xfs_btree_cur_t *)0;
606	/*
607	 * If the block is full, we can't insert the new entry until we
608	 * make the block un-full.
609	 */
610	if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
611		/*
612		 * First, try shifting an entry to the right neighbor.
613		 */
614		if ((error = xfs_inobt_rshift(cur, level, &i)))
615			return error;
616		if (i) {
617			/* nothing */
618		}
619		/*
620		 * Next, try shifting an entry to the left neighbor.
621		 */
622		else {
623			if ((error = xfs_inobt_lshift(cur, level, &i)))
624				return error;
625			if (i) {
626				optr = ptr = cur->bc_ptrs[level];
627			} else {
628				/*
629				 * Next, try splitting the current block
630				 * in half. If this works we have to
631				 * re-set our variables because
632				 * we could be in a different block now.
633				 */
634				if ((error = xfs_inobt_split(cur, level, &nbno,
635						&nkey, &ncur, &i)))
636					return error;
637				if (i) {
638					bp = cur->bc_bufs[level];
639					block = XFS_BUF_TO_INOBT_BLOCK(bp);
640#ifdef DEBUG
641					if ((error = xfs_btree_check_sblock(cur,
642							block, level, bp)))
643						return error;
644#endif
645					ptr = cur->bc_ptrs[level];
646					nrec.ir_startino = nkey.ir_startino; /* INT_: direct copy */
647				} else {
648					/*
649					 * Otherwise the insert fails.
650					 */
651					*stat = 0;
652					return 0;
653				}
654			}
655		}
656	}
657	/*
658	 * At this point we know there's room for our new entry in the block
659	 * we're pointing at.
660	 */
661	numrecs = be16_to_cpu(block->bb_numrecs);
662	if (level > 0) {
663		/*
664		 * It's a non-leaf entry.  Make a hole for the new data
665		 * in the key and ptr regions of the block.
666		 */
667		kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
668		pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
669#ifdef DEBUG
670		for (i = numrecs; i >= ptr; i--) {
671			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
672				return error;
673		}
674#endif
675		memmove(&kp[ptr], &kp[ptr - 1],
676			(numrecs - ptr + 1) * sizeof(*kp));
677		memmove(&pp[ptr], &pp[ptr - 1],
678			(numrecs - ptr + 1) * sizeof(*pp));
679		/*
680		 * Now stuff the new data in, bump numrecs and log the new data.
681		 */
682#ifdef DEBUG
683		if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
684			return error;
685#endif
686		kp[ptr - 1] = key; /* INT_: struct copy */
687		pp[ptr - 1] = cpu_to_be32(*bnop);
688		numrecs++;
689		block->bb_numrecs = cpu_to_be16(numrecs);
690		xfs_inobt_log_keys(cur, bp, ptr, numrecs);
691		xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
692	} else {
693		/*
694		 * It's a leaf entry.  Make a hole for the new record.
695		 */
696		rp = XFS_INOBT_REC_ADDR(block, 1, cur);
697		memmove(&rp[ptr], &rp[ptr - 1],
698			(numrecs - ptr + 1) * sizeof(*rp));
699		/*
700		 * Now stuff the new record in, bump numrecs
701		 * and log the new data.
702		 */
703		rp[ptr - 1] = *recp; /* INT_: struct copy */
704		numrecs++;
705		block->bb_numrecs = cpu_to_be16(numrecs);
706		xfs_inobt_log_recs(cur, bp, ptr, numrecs);
707	}
708	/*
709	 * Log the new number of records in the btree header.
710	 */
711	xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
712#ifdef DEBUG
713	/*
714	 * Check that the key/record is in the right place, now.
715	 */
716	if (ptr < numrecs) {
717		if (level == 0)
718			xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
719				rp + ptr);
720		else
721			xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
722				kp + ptr);
723	}
724#endif
725	/*
726	 * If we inserted at the start of a block, update the parents' keys.
727	 */
728	if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1)))
729		return error;
730	/*
731	 * Return the new block number, if any.
732	 * If there is one, give back a record value and a cursor too.
733	 */
734	*bnop = nbno;
735	if (nbno != NULLAGBLOCK) {
736		*recp = nrec; /* INT_: struct copy */
737		*curp = ncur;
738	}
739	*stat = 1;
740	return 0;
741}
742
743/*
744 * Log header fields from a btree block.
745 */
746STATIC void
747xfs_inobt_log_block(
748	xfs_trans_t		*tp,	/* transaction pointer */
749	xfs_buf_t		*bp,	/* buffer containing btree block */
750	int			fields)	/* mask of fields: XFS_BB_... */
751{
752	int			first;	/* first byte offset logged */
753	int			last;	/* last byte offset logged */
754	static const short	offsets[] = {	/* table of offsets */
755		offsetof(xfs_inobt_block_t, bb_magic),
756		offsetof(xfs_inobt_block_t, bb_level),
757		offsetof(xfs_inobt_block_t, bb_numrecs),
758		offsetof(xfs_inobt_block_t, bb_leftsib),
759		offsetof(xfs_inobt_block_t, bb_rightsib),
760		sizeof(xfs_inobt_block_t)
761	};
762
763	xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
764	xfs_trans_log_buf(tp, bp, first, last);
765}
766
767/*
768 * Log keys from a btree block (nonleaf).
769 */
770STATIC void
771xfs_inobt_log_keys(
772	xfs_btree_cur_t		*cur,	/* btree cursor */
773	xfs_buf_t		*bp,	/* buffer containing btree block */
774	int			kfirst,	/* index of first key to log */
775	int			klast)	/* index of last key to log */
776{
777	xfs_inobt_block_t	*block;	/* btree block to log from */
778	int			first;	/* first byte offset logged */
779	xfs_inobt_key_t		*kp;	/* key pointer in btree block */
780	int			last;	/* last byte offset logged */
781
782	block = XFS_BUF_TO_INOBT_BLOCK(bp);
783	kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
784	first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
785	last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
786	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
787}
788
789/*
790 * Log block pointer fields from a btree block (nonleaf).
791 */
792STATIC void
793xfs_inobt_log_ptrs(
794	xfs_btree_cur_t		*cur,	/* btree cursor */
795	xfs_buf_t		*bp,	/* buffer containing btree block */
796	int			pfirst,	/* index of first pointer to log */
797	int			plast)	/* index of last pointer to log */
798{
799	xfs_inobt_block_t	*block;	/* btree block to log from */
800	int			first;	/* first byte offset logged */
801	int			last;	/* last byte offset logged */
802	xfs_inobt_ptr_t		*pp;	/* block-pointer pointer in btree blk */
803
804	block = XFS_BUF_TO_INOBT_BLOCK(bp);
805	pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
806	first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
807	last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
808	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
809}
810
811/*
812 * Log records from a btree block (leaf).
813 */
814STATIC void
815xfs_inobt_log_recs(
816	xfs_btree_cur_t		*cur,	/* btree cursor */
817	xfs_buf_t		*bp,	/* buffer containing btree block */
818	int			rfirst,	/* index of first record to log */
819	int			rlast)	/* index of last record to log */
820{
821	xfs_inobt_block_t	*block;	/* btree block to log from */
822	int			first;	/* first byte offset logged */
823	int			last;	/* last byte offset logged */
824	xfs_inobt_rec_t		*rp;	/* record pointer for btree block */
825
826	block = XFS_BUF_TO_INOBT_BLOCK(bp);
827	rp = XFS_INOBT_REC_ADDR(block, 1, cur);
828	first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
829	last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
830	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
831}
832
833/*
834 * Lookup the record.  The cursor is made to point to it, based on dir.
835 * Return 0 if can't find any such record, 1 for success.
836 */
837STATIC int				/* error */
838xfs_inobt_lookup(
839	xfs_btree_cur_t		*cur,	/* btree cursor */
840	xfs_lookup_t		dir,	/* <=, ==, or >= */
841	int			*stat)	/* success/failure */
842{
843	xfs_agblock_t		agbno;	/* a.g. relative btree block number */
844	xfs_agnumber_t		agno;	/* allocation group number */
845	xfs_inobt_block_t	*block=NULL;	/* current btree block */
846	__int64_t		diff;	/* difference for the current key */
847	int			error;	/* error return value */
848	int			keyno=0;	/* current key number */
849	int			level;	/* level in the btree */
850	xfs_mount_t		*mp;	/* file system mount point */
851
852	/*
853	 * Get the allocation group header, and the root block number.
854	 */
855	mp = cur->bc_mp;
856	{
857		xfs_agi_t	*agi;	/* a.g. inode header */
858
859		agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
860		agno = be32_to_cpu(agi->agi_seqno);
861		agbno = be32_to_cpu(agi->agi_root);
862	}
863	/*
864	 * Iterate over each level in the btree, starting at the root.
865	 * For each level above the leaves, find the key we need, based
866	 * on the lookup record, then follow the corresponding block
867	 * pointer down to the next level.
868	 */
869	for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
870		xfs_buf_t	*bp;	/* buffer pointer for btree block */
871		xfs_daddr_t	d;	/* disk address of btree block */
872
873		/*
874		 * Get the disk address we're looking for.
875		 */
876		d = XFS_AGB_TO_DADDR(mp, agno, agbno);
877		/*
878		 * If the old buffer at this level is for a different block,
879		 * throw it away, otherwise just use it.
880		 */
881		bp = cur->bc_bufs[level];
882		if (bp && XFS_BUF_ADDR(bp) != d)
883			bp = (xfs_buf_t *)0;
884		if (!bp) {
885			/*
886			 * Need to get a new buffer.  Read it, then
887			 * set it in the cursor, releasing the old one.
888			 */
889			if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
890					agno, agbno, 0, &bp, XFS_INO_BTREE_REF)))
891				return error;
892			xfs_btree_setbuf(cur, level, bp);
893			/*
894			 * Point to the btree block, now that we have the buffer
895			 */
896			block = XFS_BUF_TO_INOBT_BLOCK(bp);
897			if ((error = xfs_btree_check_sblock(cur, block, level,
898					bp)))
899				return error;
900		} else
901			block = XFS_BUF_TO_INOBT_BLOCK(bp);
902		/*
903		 * If we already had a key match at a higher level, we know
904		 * we need to use the first entry in this block.
905		 */
906		if (diff == 0)
907			keyno = 1;
908		/*
909		 * Otherwise we need to search this block.  Do a binary search.
910		 */
911		else {
912			int		high;	/* high entry number */
913			xfs_inobt_key_t	*kkbase=NULL;/* base of keys in block */
914			xfs_inobt_rec_t	*krbase=NULL;/* base of records in block */
915			int		low;	/* low entry number */
916
917			/*
918			 * Get a pointer to keys or records.
919			 */
920			if (level > 0)
921				kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur);
922			else
923				krbase = XFS_INOBT_REC_ADDR(block, 1, cur);
924			/*
925			 * Set low and high entry numbers, 1-based.
926			 */
927			low = 1;
928			if (!(high = be16_to_cpu(block->bb_numrecs))) {
929				/*
930				 * If the block is empty, the tree must
931				 * be an empty leaf.
932				 */
933				ASSERT(level == 0 && cur->bc_nlevels == 1);
934				cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
935				*stat = 0;
936				return 0;
937			}
938			/*
939			 * Binary search the block.
940			 */
941			while (low <= high) {
942				xfs_agino_t	startino;	/* key value */
943
944				/*
945				 * keyno is average of low and high.
946				 */
947				keyno = (low + high) >> 1;
948				/*
949				 * Get startino.
950				 */
951				if (level > 0) {
952					xfs_inobt_key_t	*kkp;
953
954					kkp = kkbase + keyno - 1;
955					startino = INT_GET(kkp->ir_startino, ARCH_CONVERT);
956				} else {
957					xfs_inobt_rec_t	*krp;
958
959					krp = krbase + keyno - 1;
960					startino = INT_GET(krp->ir_startino, ARCH_CONVERT);
961				}
962				/*
963				 * Compute difference to get next direction.
964				 */
965				diff = (__int64_t)
966					startino - cur->bc_rec.i.ir_startino;
967				/*
968				 * Less than, move right.
969				 */
970				if (diff < 0)
971					low = keyno + 1;
972				/*
973				 * Greater than, move left.
974				 */
975				else if (diff > 0)
976					high = keyno - 1;
977				/*
978				 * Equal, we're done.
979				 */
980				else
981					break;
982			}
983		}
984		/*
985		 * If there are more levels, set up for the next level
986		 * by getting the block number and filling in the cursor.
987		 */
988		if (level > 0) {
989			/*
990			 * If we moved left, need the previous key number,
991			 * unless there isn't one.
992			 */
993			if (diff > 0 && --keyno < 1)
994				keyno = 1;
995			agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, keyno, cur));
996#ifdef DEBUG
997			if ((error = xfs_btree_check_sptr(cur, agbno, level)))
998				return error;
999#endif
1000			cur->bc_ptrs[level] = keyno;
1001		}
1002	}
1003	/*
1004	 * Done with the search.
1005	 * See if we need to adjust the results.
1006	 */
1007	if (dir != XFS_LOOKUP_LE && diff < 0) {
1008		keyno++;
1009		/*
1010		 * If ge search and we went off the end of the block, but it's
1011		 * not the last block, we're in the wrong block.
1012		 */
1013		if (dir == XFS_LOOKUP_GE &&
1014		    keyno > be16_to_cpu(block->bb_numrecs) &&
1015		    be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1016			int	i;
1017
1018			cur->bc_ptrs[0] = keyno;
1019			if ((error = xfs_inobt_increment(cur, 0, &i)))
1020				return error;
1021			ASSERT(i == 1);
1022			*stat = 1;
1023			return 0;
1024		}
1025	}
1026	else if (dir == XFS_LOOKUP_LE && diff > 0)
1027		keyno--;
1028	cur->bc_ptrs[0] = keyno;
1029	/*
1030	 * Return if we succeeded or not.
1031	 */
1032	if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1033		*stat = 0;
1034	else
1035		*stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1036	return 0;
1037}
1038
1039/*
1040 * Move 1 record left from cur/level if possible.
1041 * Update cur to reflect the new path.
1042 */
1043STATIC int				/* error */
1044xfs_inobt_lshift(
1045	xfs_btree_cur_t		*cur,	/* btree cursor */
1046	int			level,	/* level to shift record on */
1047	int			*stat)	/* success/failure */
1048{
1049	int			error;	/* error return value */
1050#ifdef DEBUG
1051	int			i;	/* loop index */
1052#endif
1053	xfs_inobt_key_t		key;	/* key value for leaf level upward */
1054	xfs_buf_t		*lbp;	/* buffer for left neighbor block */
1055	xfs_inobt_block_t	*left;	/* left neighbor btree block */
1056	xfs_inobt_key_t		*lkp=NULL;	/* key pointer for left block */
1057	xfs_inobt_ptr_t		*lpp;	/* address pointer for left block */
1058	xfs_inobt_rec_t		*lrp=NULL;	/* record pointer for left block */
1059	int			nrec;	/* new number of left block entries */
1060	xfs_buf_t		*rbp;	/* buffer for right (current) block */
1061	xfs_inobt_block_t	*right;	/* right (current) btree block */
1062	xfs_inobt_key_t		*rkp=NULL;	/* key pointer for right block */
1063	xfs_inobt_ptr_t		*rpp=NULL;	/* address pointer for right block */
1064	xfs_inobt_rec_t		*rrp=NULL;	/* record pointer for right block */
1065
1066	/*
1067	 * Set up variables for this block as "right".
1068	 */
1069	rbp = cur->bc_bufs[level];
1070	right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1071#ifdef DEBUG
1072	if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1073		return error;
1074#endif
1075	/*
1076	 * If we've got no left sibling then we can't shift an entry left.
1077	 */
1078	if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1079		*stat = 0;
1080		return 0;
1081	}
1082	/*
1083	 * If the cursor entry is the one that would be moved, don't
1084	 * do it... it's too complicated.
1085	 */
1086	if (cur->bc_ptrs[level] <= 1) {
1087		*stat = 0;
1088		return 0;
1089	}
1090	/*
1091	 * Set up the left neighbor as "left".
1092	 */
1093	if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1094			cur->bc_private.i.agno, be32_to_cpu(right->bb_leftsib),
1095			0, &lbp, XFS_INO_BTREE_REF)))
1096		return error;
1097	left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1098	if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1099		return error;
1100	/*
1101	 * If it's full, it can't take another entry.
1102	 */
1103	if (be16_to_cpu(left->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1104		*stat = 0;
1105		return 0;
1106	}
1107	nrec = be16_to_cpu(left->bb_numrecs) + 1;
1108	/*
1109	 * If non-leaf, copy a key and a ptr to the left block.
1110	 */
1111	if (level > 0) {
1112		lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
1113		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1114		*lkp = *rkp;
1115		xfs_inobt_log_keys(cur, lbp, nrec, nrec);
1116		lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
1117		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1118#ifdef DEBUG
1119		if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1120			return error;
1121#endif
1122		*lpp = *rpp; /* INT_: no-change copy */
1123		xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
1124	}
1125	/*
1126	 * If leaf, copy a record to the left block.
1127	 */
1128	else {
1129		lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
1130		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1131		*lrp = *rrp;
1132		xfs_inobt_log_recs(cur, lbp, nrec, nrec);
1133	}
1134	/*
1135	 * Bump and log left's numrecs, decrement and log right's numrecs.
1136	 */
1137	be16_add(&left->bb_numrecs, 1);
1138	xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1139#ifdef DEBUG
1140	if (level > 0)
1141		xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1142	else
1143		xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1144#endif
1145	be16_add(&right->bb_numrecs, -1);
1146	xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1147	/*
1148	 * Slide the contents of right down one entry.
1149	 */
1150	if (level > 0) {
1151#ifdef DEBUG
1152		for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1153			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1154					level)))
1155				return error;
1156		}
1157#endif
1158		memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1159		memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1160		xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1161		xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1162	} else {
1163		memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1164		xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1165		key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
1166		rkp = &key;
1167	}
1168	/*
1169	 * Update the parent key values of right.
1170	 */
1171	if ((error = xfs_inobt_updkey(cur, rkp, level + 1)))
1172		return error;
1173	/*
1174	 * Slide the cursor value left one.
1175	 */
1176	cur->bc_ptrs[level]--;
1177	*stat = 1;
1178	return 0;
1179}
1180
1181/*
1182 * Allocate a new root block, fill it in.
1183 */
1184STATIC int				/* error */
1185xfs_inobt_newroot(
1186	xfs_btree_cur_t		*cur,	/* btree cursor */
1187	int			*stat)	/* success/failure */
1188{
1189	xfs_agi_t		*agi;	/* a.g. inode header */
1190	xfs_alloc_arg_t		args;	/* allocation argument structure */
1191	xfs_inobt_block_t	*block;	/* one half of the old root block */
1192	xfs_buf_t		*bp;	/* buffer containing block */
1193	int			error;	/* error return value */
1194	xfs_inobt_key_t		*kp;	/* btree key pointer */
1195	xfs_agblock_t		lbno;	/* left block number */
1196	xfs_buf_t		*lbp;	/* left buffer pointer */
1197	xfs_inobt_block_t	*left;	/* left btree block */
1198	xfs_buf_t		*nbp;	/* new (root) buffer */
1199	xfs_inobt_block_t	*new;	/* new (root) btree block */
1200	int			nptr;	/* new value for key index, 1 or 2 */
1201	xfs_inobt_ptr_t		*pp;	/* btree address pointer */
1202	xfs_agblock_t		rbno;	/* right block number */
1203	xfs_buf_t		*rbp;	/* right buffer pointer */
1204	xfs_inobt_block_t	*right;	/* right btree block */
1205	xfs_inobt_rec_t		*rp;	/* btree record pointer */
1206
1207	ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp));
1208
1209	/*
1210	 * Get a block & a buffer.
1211	 */
1212	agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
1213	args.tp = cur->bc_tp;
1214	args.mp = cur->bc_mp;
1215	args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno,
1216		be32_to_cpu(agi->agi_root));
1217	args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1218		args.isfl = args.userdata = args.minalignslop = 0;
1219	args.minlen = args.maxlen = args.prod = 1;
1220	args.type = XFS_ALLOCTYPE_NEAR_BNO;
1221	if ((error = xfs_alloc_vextent(&args)))
1222		return error;
1223	/*
1224	 * None available, we fail.
1225	 */
1226	if (args.fsbno == NULLFSBLOCK) {
1227		*stat = 0;
1228		return 0;
1229	}
1230	ASSERT(args.len == 1);
1231	nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1232	new = XFS_BUF_TO_INOBT_BLOCK(nbp);
1233	/*
1234	 * Set the root data in the a.g. inode structure.
1235	 */
1236	agi->agi_root = cpu_to_be32(args.agbno);
1237	be32_add(&agi->agi_level, 1);
1238	xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp,
1239		XFS_AGI_ROOT | XFS_AGI_LEVEL);
1240	/*
1241	 * At the previous root level there are now two blocks: the old
1242	 * root, and the new block generated when it was split.
1243	 * We don't know which one the cursor is pointing at, so we
1244	 * set up variables "left" and "right" for each case.
1245	 */
1246	bp = cur->bc_bufs[cur->bc_nlevels - 1];
1247	block = XFS_BUF_TO_INOBT_BLOCK(bp);
1248#ifdef DEBUG
1249	if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)))
1250		return error;
1251#endif
1252	if (be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1253		/*
1254		 * Our block is left, pick up the right block.
1255		 */
1256		lbp = bp;
1257		lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1258		left = block;
1259		rbno = be32_to_cpu(left->bb_rightsib);
1260		if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1261				rbno, 0, &rbp, XFS_INO_BTREE_REF)))
1262			return error;
1263		bp = rbp;
1264		right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1265		if ((error = xfs_btree_check_sblock(cur, right,
1266				cur->bc_nlevels - 1, rbp)))
1267			return error;
1268		nptr = 1;
1269	} else {
1270		/*
1271		 * Our block is right, pick up the left block.
1272		 */
1273		rbp = bp;
1274		rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
1275		right = block;
1276		lbno = be32_to_cpu(right->bb_leftsib);
1277		if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1278				lbno, 0, &lbp, XFS_INO_BTREE_REF)))
1279			return error;
1280		bp = lbp;
1281		left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1282		if ((error = xfs_btree_check_sblock(cur, left,
1283				cur->bc_nlevels - 1, lbp)))
1284			return error;
1285		nptr = 2;
1286	}
1287	/*
1288	 * Fill in the new block's btree header and log it.
1289	 */
1290	new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1291	new->bb_level = cpu_to_be16(cur->bc_nlevels);
1292	new->bb_numrecs = cpu_to_be16(2);
1293	new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1294	new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1295	xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
1296	ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1297	/*
1298	 * Fill in the key data in the new root.
1299	 */
1300	kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
1301	if (be16_to_cpu(left->bb_level) > 0) {
1302		kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); /* INT_: struct copy */
1303		kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); /* INT_: struct copy */
1304	} else {
1305		rp = XFS_INOBT_REC_ADDR(left, 1, cur);
1306		INT_COPY(kp[0].ir_startino, rp->ir_startino, ARCH_CONVERT);
1307		rp = XFS_INOBT_REC_ADDR(right, 1, cur);
1308		INT_COPY(kp[1].ir_startino, rp->ir_startino, ARCH_CONVERT);
1309	}
1310	xfs_inobt_log_keys(cur, nbp, 1, 2);
1311	/*
1312	 * Fill in the pointer data in the new root.
1313	 */
1314	pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
1315	pp[0] = cpu_to_be32(lbno);
1316	pp[1] = cpu_to_be32(rbno);
1317	xfs_inobt_log_ptrs(cur, nbp, 1, 2);
1318	/*
1319	 * Fix up the cursor.
1320	 */
1321	xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1322	cur->bc_ptrs[cur->bc_nlevels] = nptr;
1323	cur->bc_nlevels++;
1324	*stat = 1;
1325	return 0;
1326}
1327
1328/*
1329 * Move 1 record right from cur/level if possible.
1330 * Update cur to reflect the new path.
1331 */
1332STATIC int				/* error */
1333xfs_inobt_rshift(
1334	xfs_btree_cur_t		*cur,	/* btree cursor */
1335	int			level,	/* level to shift record on */
1336	int			*stat)	/* success/failure */
1337{
1338	int			error;	/* error return value */
1339	int			i;	/* loop index */
1340	xfs_inobt_key_t		key;	/* key value for leaf level upward */
1341	xfs_buf_t		*lbp;	/* buffer for left (current) block */
1342	xfs_inobt_block_t	*left;	/* left (current) btree block */
1343	xfs_inobt_key_t		*lkp;	/* key pointer for left block */
1344	xfs_inobt_ptr_t		*lpp;	/* address pointer for left block */
1345	xfs_inobt_rec_t		*lrp;	/* record pointer for left block */
1346	xfs_buf_t		*rbp;	/* buffer for right neighbor block */
1347	xfs_inobt_block_t	*right;	/* right neighbor btree block */
1348	xfs_inobt_key_t		*rkp;	/* key pointer for right block */
1349	xfs_inobt_ptr_t		*rpp;	/* address pointer for right block */
1350	xfs_inobt_rec_t		*rrp=NULL;	/* record pointer for right block */
1351	xfs_btree_cur_t		*tcur;	/* temporary cursor */
1352
1353	/*
1354	 * Set up variables for this block as "left".
1355	 */
1356	lbp = cur->bc_bufs[level];
1357	left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1358#ifdef DEBUG
1359	if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1360		return error;
1361#endif
1362	/*
1363	 * If we've got no right sibling then we can't shift an entry right.
1364	 */
1365	if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1366		*stat = 0;
1367		return 0;
1368	}
1369	/*
1370	 * If the cursor entry is the one that would be moved, don't
1371	 * do it... it's too complicated.
1372	 */
1373	if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1374		*stat = 0;
1375		return 0;
1376	}
1377	/*
1378	 * Set up the right neighbor as "right".
1379	 */
1380	if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1381			cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib),
1382			0, &rbp, XFS_INO_BTREE_REF)))
1383		return error;
1384	right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1385	if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1386		return error;
1387	/*
1388	 * If it's full, it can't take another entry.
1389	 */
1390	if (be16_to_cpu(right->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1391		*stat = 0;
1392		return 0;
1393	}
1394	/*
1395	 * Make a hole at the start of the right neighbor block, then
1396	 * copy the last left block entry to the hole.
1397	 */
1398	if (level > 0) {
1399		lkp = XFS_INOBT_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1400		lpp = XFS_INOBT_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1401		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1402		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1403#ifdef DEBUG
1404		for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1405			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
1406				return error;
1407		}
1408#endif
1409		memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1410		memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1411#ifdef DEBUG
1412		if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
1413			return error;
1414#endif
1415		*rkp = *lkp; /* INT_: no change copy */
1416		*rpp = *lpp; /* INT_: no change copy */
1417		xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1418		xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1419	} else {
1420		lrp = XFS_INOBT_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1421		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1422		memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1423		*rrp = *lrp;
1424		xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1425		key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
1426		rkp = &key;
1427	}
1428	/*
1429	 * Decrement and log left's numrecs, bump and log right's numrecs.
1430	 */
1431	be16_add(&left->bb_numrecs, -1);
1432	xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1433	be16_add(&right->bb_numrecs, 1);
1434#ifdef DEBUG
1435	if (level > 0)
1436		xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1437	else
1438		xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1439#endif
1440	xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1441	/*
1442	 * Using a temporary cursor, update the parent key values of the
1443	 * block on the right.
1444	 */
1445	if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1446		return error;
1447	xfs_btree_lastrec(tcur, level);
1448	if ((error = xfs_inobt_increment(tcur, level, &i)) ||
1449	    (error = xfs_inobt_updkey(tcur, rkp, level + 1))) {
1450		xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1451		return error;
1452	}
1453	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1454	*stat = 1;
1455	return 0;
1456}
1457
1458/*
1459 * Split cur/level block in half.
1460 * Return new block number and its first record (to be inserted into parent).
1461 */
1462STATIC int				/* error */
1463xfs_inobt_split(
1464	xfs_btree_cur_t		*cur,	/* btree cursor */
1465	int			level,	/* level to split */
1466	xfs_agblock_t		*bnop,	/* output: block number allocated */
1467	xfs_inobt_key_t		*keyp,	/* output: first key of new block */
1468	xfs_btree_cur_t		**curp,	/* output: new cursor */
1469	int			*stat)	/* success/failure */
1470{
1471	xfs_alloc_arg_t		args;	/* allocation argument structure */
1472	int			error;	/* error return value */
1473	int			i;	/* loop index/record number */
1474	xfs_agblock_t		lbno;	/* left (current) block number */
1475	xfs_buf_t		*lbp;	/* buffer for left block */
1476	xfs_inobt_block_t	*left;	/* left (current) btree block */
1477	xfs_inobt_key_t		*lkp;	/* left btree key pointer */
1478	xfs_inobt_ptr_t		*lpp;	/* left btree address pointer */
1479	xfs_inobt_rec_t		*lrp;	/* left btree record pointer */
1480	xfs_buf_t		*rbp;	/* buffer for right block */
1481	xfs_inobt_block_t	*right;	/* right (new) btree block */
1482	xfs_inobt_key_t		*rkp;	/* right btree key pointer */
1483	xfs_inobt_ptr_t		*rpp;	/* right btree address pointer */
1484	xfs_inobt_rec_t		*rrp;	/* right btree record pointer */
1485
1486	/*
1487	 * Set up left block (current one).
1488	 */
1489	lbp = cur->bc_bufs[level];
1490	args.tp = cur->bc_tp;
1491	args.mp = cur->bc_mp;
1492	lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1493	/*
1494	 * Allocate the new block.
1495	 * If we can't do it, we're toast.  Give up.
1496	 */
1497	args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno);
1498	args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1499		args.isfl = args.userdata = args.minalignslop = 0;
1500	args.minlen = args.maxlen = args.prod = 1;
1501	args.type = XFS_ALLOCTYPE_NEAR_BNO;
1502	if ((error = xfs_alloc_vextent(&args)))
1503		return error;
1504	if (args.fsbno == NULLFSBLOCK) {
1505		*stat = 0;
1506		return 0;
1507	}
1508	ASSERT(args.len == 1);
1509	rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1510	/*
1511	 * Set up the new block as "right".
1512	 */
1513	right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1514	/*
1515	 * "Left" is the current (according to the cursor) block.
1516	 */
1517	left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1518#ifdef DEBUG
1519	if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1520		return error;
1521#endif
1522	/*
1523	 * Fill in the btree header for the new block.
1524	 */
1525	right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1526	right->bb_level = left->bb_level;
1527	right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1528	/*
1529	 * Make sure that if there's an odd number of entries now, that
1530	 * each new block will have the same number of entries.
1531	 */
1532	if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1533	    cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1534		be16_add(&right->bb_numrecs, 1);
1535	i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1536	/*
1537	 * For non-leaf blocks, copy keys and addresses over to the new block.
1538	 */
1539	if (level > 0) {
1540		lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
1541		lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
1542		rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1543		rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1544#ifdef DEBUG
1545		for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1546			if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1547				return error;
1548		}
1549#endif
1550		memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1551		memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1552		xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1553		xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1554		*keyp = *rkp;
1555	}
1556	/*
1557	 * For leaf blocks, copy records over to the new block.
1558	 */
1559	else {
1560		lrp = XFS_INOBT_REC_ADDR(left, i, cur);
1561		rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1562		memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1563		xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1564		keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */
1565	}
1566	/*
1567	 * Find the left block number by looking in the buffer.
1568	 * Adjust numrecs, sibling pointers.
1569	 */
1570	be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1571	right->bb_rightsib = left->bb_rightsib;
1572	left->bb_rightsib = cpu_to_be32(args.agbno);
1573	right->bb_leftsib = cpu_to_be32(lbno);
1574	xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
1575	xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1576	/*
1577	 * If there's a block to the new block's right, make that block
1578	 * point back to right instead of to left.
1579	 */
1580	if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1581		xfs_inobt_block_t	*rrblock;	/* rr btree block */
1582		xfs_buf_t		*rrbp;		/* buffer for rrblock */
1583
1584		if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1585				be32_to_cpu(right->bb_rightsib), 0, &rrbp,
1586				XFS_INO_BTREE_REF)))
1587			return error;
1588		rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
1589		if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1590			return error;
1591		rrblock->bb_leftsib = cpu_to_be32(args.agbno);
1592		xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
1593	}
1594	/*
1595	 * If the cursor is really in the right block, move it there.
1596	 * If it's just pointing past the last entry in left, then we'll
1597	 * insert there, so don't change anything in that case.
1598	 */
1599	if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1600		xfs_btree_setbuf(cur, level, rbp);
1601		cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1602	}
1603	/*
1604	 * If there are more levels, we'll need another cursor which refers
1605	 * the right block, no matter where this cursor was.
1606	 */
1607	if (level + 1 < cur->bc_nlevels) {
1608		if ((error = xfs_btree_dup_cursor(cur, curp)))
1609			return error;
1610		(*curp)->bc_ptrs[level + 1]++;
1611	}
1612	*bnop = args.agbno;
1613	*stat = 1;
1614	return 0;
1615}
1616
1617/*
1618 * Update keys at all levels from here to the root along the cursor's path.
1619 */
1620STATIC int				/* error */
1621xfs_inobt_updkey(
1622	xfs_btree_cur_t		*cur,	/* btree cursor */
1623	xfs_inobt_key_t		*keyp,	/* new key value to update to */
1624	int			level)	/* starting level for update */
1625{
1626	int			ptr;	/* index of key in block */
1627
1628	/*
1629	 * Go up the tree from this level toward the root.
1630	 * At each level, update the key value to the value input.
1631	 * Stop when we reach a level where the cursor isn't pointing
1632	 * at the first entry in the block.
1633	 */
1634	for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1635		xfs_buf_t		*bp;	/* buffer for block */
1636		xfs_inobt_block_t	*block;	/* btree block */
1637#ifdef DEBUG
1638		int			error;	/* error return value */
1639#endif
1640		xfs_inobt_key_t		*kp;	/* ptr to btree block keys */
1641
1642		bp = cur->bc_bufs[level];
1643		block = XFS_BUF_TO_INOBT_BLOCK(bp);
1644#ifdef DEBUG
1645		if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1646			return error;
1647#endif
1648		ptr = cur->bc_ptrs[level];
1649		kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
1650		*kp = *keyp;
1651		xfs_inobt_log_keys(cur, bp, ptr, ptr);
1652	}
1653	return 0;
1654}
1655
1656/*
1657 * Externally visible routines.
1658 */
1659
1660/*
1661 * Decrement cursor by one record at the level.
1662 * For nonzero levels the leaf-ward information is untouched.
1663 */
1664int					/* error */
1665xfs_inobt_decrement(
1666	xfs_btree_cur_t		*cur,	/* btree cursor */
1667	int			level,	/* level in btree, 0 is leaf */
1668	int			*stat)	/* success/failure */
1669{
1670	xfs_inobt_block_t	*block;	/* btree block */
1671	int			error;
1672	int			lev;	/* btree level */
1673
1674	ASSERT(level < cur->bc_nlevels);
1675	/*
1676	 * Read-ahead to the left at this level.
1677	 */
1678	xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1679	/*
1680	 * Decrement the ptr at this level.  If we're still in the block
1681	 * then we're done.
1682	 */
1683	if (--cur->bc_ptrs[level] > 0) {
1684		*stat = 1;
1685		return 0;
1686	}
1687	/*
1688	 * Get a pointer to the btree block.
1689	 */
1690	block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]);
1691#ifdef DEBUG
1692	if ((error = xfs_btree_check_sblock(cur, block, level,
1693			cur->bc_bufs[level])))
1694		return error;
1695#endif
1696	/*
1697	 * If we just went off the left edge of the tree, return failure.
1698	 */
1699	if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1700		*stat = 0;
1701		return 0;
1702	}
1703	/*
1704	 * March up the tree decrementing pointers.
1705	 * Stop when we don't go off the left edge of a block.
1706	 */
1707	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1708		if (--cur->bc_ptrs[lev] > 0)
1709			break;
1710		/*
1711		 * Read-ahead the left block, we're going to read it
1712		 * in the next loop.
1713		 */
1714		xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1715	}
1716	/*
1717	 * If we went off the root then we are seriously confused.
1718	 */
1719	ASSERT(lev < cur->bc_nlevels);
1720	/*
1721	 * Now walk back down the tree, fixing up the cursor's buffer
1722	 * pointers and key numbers.
1723	 */
1724	for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1725		xfs_agblock_t	agbno;	/* block number of btree block */
1726		xfs_buf_t	*bp;	/* buffer containing btree block */
1727
1728		agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1729		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1730				cur->bc_private.i.agno, agbno, 0, &bp,
1731				XFS_INO_BTREE_REF)))
1732			return error;
1733		lev--;
1734		xfs_btree_setbuf(cur, lev, bp);
1735		block = XFS_BUF_TO_INOBT_BLOCK(bp);
1736		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1737			return error;
1738		cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1739	}
1740	*stat = 1;
1741	return 0;
1742}
1743
1744/*
1745 * Delete the record pointed to by cur.
1746 * The cursor refers to the place where the record was (could be inserted)
1747 * when the operation returns.
1748 */
1749int					/* error */
1750xfs_inobt_delete(
1751	xfs_btree_cur_t	*cur,		/* btree cursor */
1752	int		*stat)		/* success/failure */
1753{
1754	int		error;
1755	int		i;		/* result code */
1756	int		level;		/* btree level */
1757
1758	/*
1759	 * Go up the tree, starting at leaf level.
1760	 * If 2 is returned then a join was done; go to the next level.
1761	 * Otherwise we are done.
1762	 */
1763	for (level = 0, i = 2; i == 2; level++) {
1764		if ((error = xfs_inobt_delrec(cur, level, &i)))
1765			return error;
1766	}
1767	if (i == 0) {
1768		for (level = 1; level < cur->bc_nlevels; level++) {
1769			if (cur->bc_ptrs[level] == 0) {
1770				if ((error = xfs_inobt_decrement(cur, level, &i)))
1771					return error;
1772				break;
1773			}
1774		}
1775	}
1776	*stat = i;
1777	return 0;
1778}
1779
1780
1781/*
1782 * Get the data from the pointed-to record.
1783 */
1784int					/* error */
1785xfs_inobt_get_rec(
1786	xfs_btree_cur_t		*cur,	/* btree cursor */
1787	xfs_agino_t		*ino,	/* output: starting inode of chunk */
1788	__int32_t		*fcnt,	/* output: number of free inodes */
1789	xfs_inofree_t		*free,	/* output: free inode mask */
1790	int			*stat)	/* output: success/failure */
1791{
1792	xfs_inobt_block_t	*block;	/* btree block */
1793	xfs_buf_t		*bp;	/* buffer containing btree block */
1794#ifdef DEBUG
1795	int			error;	/* error return value */
1796#endif
1797	int			ptr;	/* record number */
1798	xfs_inobt_rec_t		*rec;	/* record data */
1799
1800	bp = cur->bc_bufs[0];
1801	ptr = cur->bc_ptrs[0];
1802	block = XFS_BUF_TO_INOBT_BLOCK(bp);
1803#ifdef DEBUG
1804	if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
1805		return error;
1806#endif
1807	/*
1808	 * Off the right end or left end, return failure.
1809	 */
1810	if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1811		*stat = 0;
1812		return 0;
1813	}
1814	/*
1815	 * Point to the record and extract its data.
1816	 */
1817	rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
1818	*ino = INT_GET(rec->ir_startino, ARCH_CONVERT);
1819	*fcnt = INT_GET(rec->ir_freecount, ARCH_CONVERT);
1820	*free = INT_GET(rec->ir_free, ARCH_CONVERT);
1821	*stat = 1;
1822	return 0;
1823}
1824
1825/*
1826 * Increment cursor by one record at the level.
1827 * For nonzero levels the leaf-ward information is untouched.
1828 */
1829int					/* error */
1830xfs_inobt_increment(
1831	xfs_btree_cur_t		*cur,	/* btree cursor */
1832	int			level,	/* level in btree, 0 is leaf */
1833	int			*stat)	/* success/failure */
1834{
1835	xfs_inobt_block_t	*block;	/* btree block */
1836	xfs_buf_t		*bp;	/* buffer containing btree block */
1837	int			error;	/* error return value */
1838	int			lev;	/* btree level */
1839
1840	ASSERT(level < cur->bc_nlevels);
1841	/*
1842	 * Read-ahead to the right at this level.
1843	 */
1844	xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1845	/*
1846	 * Get a pointer to the btree block.
1847	 */
1848	bp = cur->bc_bufs[level];
1849	block = XFS_BUF_TO_INOBT_BLOCK(bp);
1850#ifdef DEBUG
1851	if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1852		return error;
1853#endif
1854	/*
1855	 * Increment the ptr at this level.  If we're still in the block
1856	 * then we're done.
1857	 */
1858	if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1859		*stat = 1;
1860		return 0;
1861	}
1862	/*
1863	 * If we just went off the right edge of the tree, return failure.
1864	 */
1865	if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1866		*stat = 0;
1867		return 0;
1868	}
1869	/*
1870	 * March up the tree incrementing pointers.
1871	 * Stop when we don't go off the right edge of a block.
1872	 */
1873	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1874		bp = cur->bc_bufs[lev];
1875		block = XFS_BUF_TO_INOBT_BLOCK(bp);
1876#ifdef DEBUG
1877		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1878			return error;
1879#endif
1880		if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
1881			break;
1882		/*
1883		 * Read-ahead the right block, we're going to read it
1884		 * in the next loop.
1885		 */
1886		xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1887	}
1888	/*
1889	 * If we went off the root then we are seriously confused.
1890	 */
1891	ASSERT(lev < cur->bc_nlevels);
1892	/*
1893	 * Now walk back down the tree, fixing up the cursor's buffer
1894	 * pointers and key numbers.
1895	 */
1896	for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp);
1897	     lev > level; ) {
1898		xfs_agblock_t	agbno;	/* block number of btree block */
1899
1900		agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1901		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1902				cur->bc_private.i.agno, agbno, 0, &bp,
1903				XFS_INO_BTREE_REF)))
1904			return error;
1905		lev--;
1906		xfs_btree_setbuf(cur, lev, bp);
1907		block = XFS_BUF_TO_INOBT_BLOCK(bp);
1908		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1909			return error;
1910		cur->bc_ptrs[lev] = 1;
1911	}
1912	*stat = 1;
1913	return 0;
1914}
1915
1916/*
1917 * Insert the current record at the point referenced by cur.
1918 * The cursor may be inconsistent on return if splits have been done.
1919 */
1920int					/* error */
1921xfs_inobt_insert(
1922	xfs_btree_cur_t	*cur,		/* btree cursor */
1923	int		*stat)		/* success/failure */
1924{
1925	int		error;		/* error return value */
1926	int		i;		/* result value, 0 for failure */
1927	int		level;		/* current level number in btree */
1928	xfs_agblock_t	nbno;		/* new block number (split result) */
1929	xfs_btree_cur_t	*ncur;		/* new cursor (split result) */
1930	xfs_inobt_rec_t	nrec;		/* record being inserted this level */
1931	xfs_btree_cur_t	*pcur;		/* previous level's cursor */
1932
1933	level = 0;
1934	nbno = NULLAGBLOCK;
1935	INT_SET(nrec.ir_startino, ARCH_CONVERT, cur->bc_rec.i.ir_startino);
1936	INT_SET(nrec.ir_freecount, ARCH_CONVERT, cur->bc_rec.i.ir_freecount);
1937	INT_SET(nrec.ir_free, ARCH_CONVERT, cur->bc_rec.i.ir_free);
1938	ncur = (xfs_btree_cur_t *)0;
1939	pcur = cur;
1940	/*
1941	 * Loop going up the tree, starting at the leaf level.
1942	 * Stop when we don't get a split block, that must mean that
1943	 * the insert is finished with this level.
1944	 */
1945	do {
1946		/*
1947		 * Insert nrec/nbno into this level of the tree.
1948		 * Note if we fail, nbno will be null.
1949		 */
1950		if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
1951				&i))) {
1952			if (pcur != cur)
1953				xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
1954			return error;
1955		}
1956		/*
1957		 * See if the cursor we just used is trash.
1958		 * Can't trash the caller's cursor, but otherwise we should
1959		 * if ncur is a new cursor or we're about to be done.
1960		 */
1961		if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
1962			cur->bc_nlevels = pcur->bc_nlevels;
1963			xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
1964		}
1965		/*
1966		 * If we got a new cursor, switch to it.
1967		 */
1968		if (ncur) {
1969			pcur = ncur;
1970			ncur = (xfs_btree_cur_t *)0;
1971		}
1972	} while (nbno != NULLAGBLOCK);
1973	*stat = i;
1974	return 0;
1975}
1976
1977/*
1978 * Lookup the record equal to ino in the btree given by cur.
1979 */
1980int					/* error */
1981xfs_inobt_lookup_eq(
1982	xfs_btree_cur_t	*cur,		/* btree cursor */
1983	xfs_agino_t	ino,		/* starting inode of chunk */
1984	__int32_t	fcnt,		/* free inode count */
1985	xfs_inofree_t	free,		/* free inode mask */
1986	int		*stat)		/* success/failure */
1987{
1988	cur->bc_rec.i.ir_startino = ino;
1989	cur->bc_rec.i.ir_freecount = fcnt;
1990	cur->bc_rec.i.ir_free = free;
1991	return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat);
1992}
1993
1994/*
1995 * Lookup the first record greater than or equal to ino
1996 * in the btree given by cur.
1997 */
1998int					/* error */
1999xfs_inobt_lookup_ge(
2000	xfs_btree_cur_t	*cur,		/* btree cursor */
2001	xfs_agino_t	ino,		/* starting inode of chunk */
2002	__int32_t	fcnt,		/* free inode count */
2003	xfs_inofree_t	free,		/* free inode mask */
2004	int		*stat)		/* success/failure */
2005{
2006	cur->bc_rec.i.ir_startino = ino;
2007	cur->bc_rec.i.ir_freecount = fcnt;
2008	cur->bc_rec.i.ir_free = free;
2009	return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat);
2010}
2011
2012/*
2013 * Lookup the first record less than or equal to ino
2014 * in the btree given by cur.
2015 */
2016int					/* error */
2017xfs_inobt_lookup_le(
2018	xfs_btree_cur_t	*cur,		/* btree cursor */
2019	xfs_agino_t	ino,		/* starting inode of chunk */
2020	__int32_t	fcnt,		/* free inode count */
2021	xfs_inofree_t	free,		/* free inode mask */
2022	int		*stat)		/* success/failure */
2023{
2024	cur->bc_rec.i.ir_startino = ino;
2025	cur->bc_rec.i.ir_freecount = fcnt;
2026	cur->bc_rec.i.ir_free = free;
2027	return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat);
2028}
2029
2030/*
2031 * Update the record referred to by cur, to the value given
2032 * by [ino, fcnt, free].
2033 * This either works (return 0) or gets an EFSCORRUPTED error.
2034 */
2035int					/* error */
2036xfs_inobt_update(
2037	xfs_btree_cur_t		*cur,	/* btree cursor */
2038	xfs_agino_t		ino,	/* starting inode of chunk */
2039	__int32_t		fcnt,	/* free inode count */
2040	xfs_inofree_t		free)	/* free inode mask */
2041{
2042	xfs_inobt_block_t	*block;	/* btree block to update */
2043	xfs_buf_t		*bp;	/* buffer containing btree block */
2044	int			error;	/* error return value */
2045	int			ptr;	/* current record number (updating) */
2046	xfs_inobt_rec_t		*rp;	/* pointer to updated record */
2047
2048	/*
2049	 * Pick up the current block.
2050	 */
2051	bp = cur->bc_bufs[0];
2052	block = XFS_BUF_TO_INOBT_BLOCK(bp);
2053#ifdef DEBUG
2054	if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
2055		return error;
2056#endif
2057	/*
2058	 * Get the address of the rec to be updated.
2059	 */
2060	ptr = cur->bc_ptrs[0];
2061	rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
2062	/*
2063	 * Fill in the new contents and log them.
2064	 */
2065	INT_SET(rp->ir_startino, ARCH_CONVERT, ino);
2066	INT_SET(rp->ir_freecount, ARCH_CONVERT, fcnt);
2067	INT_SET(rp->ir_free, ARCH_CONVERT, free);
2068	xfs_inobt_log_recs(cur, bp, ptr, ptr);
2069	/*
2070	 * Updating first record in leaf. Pass new key value up to our parent.
2071	 */
2072	if (ptr == 1) {
2073		xfs_inobt_key_t	key;	/* key containing [ino] */
2074
2075		INT_SET(key.ir_startino, ARCH_CONVERT, ino);
2076		if ((error = xfs_inobt_updkey(cur, &key, 1)))
2077			return error;
2078	}
2079	return 0;
2080}
2081