1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *   Copyright (C) International Business Machines Corp., 2000-2005
4 */
5/*
6 *	jfs_xtree.c: extent allocation descriptor B+-tree manager
7 */
8
9#include <linux/fs.h>
10#include <linux/module.h>
11#include <linux/quotaops.h>
12#include <linux/seq_file.h>
13#include "jfs_incore.h"
14#include "jfs_filsys.h"
15#include "jfs_metapage.h"
16#include "jfs_dmap.h"
17#include "jfs_dinode.h"
18#include "jfs_superblock.h"
19#include "jfs_debug.h"
20
21/*
22 * xtree local flag
23 */
24#define XT_INSERT	0x00000001
25
26/*
27 *	xtree key/entry comparison: extent offset
28 *
29 * return:
30 *	-1: k < start of extent
31 *	 0: start_of_extent <= k <= end_of_extent
32 *	 1: k > end_of_extent
33 */
34#define XT_CMP(CMP, K, X, OFFSET64)\
35{\
36	OFFSET64 = offsetXAD(X);\
37	(CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
38		((K) < OFFSET64) ? -1 : 0;\
39}
40
41/* write a xad entry */
42#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
43{\
44	(XAD)->flag = (FLAG);\
45	XADoffset((XAD), (OFF));\
46	XADlength((XAD), (LEN));\
47	XADaddress((XAD), (ADDR));\
48}
49
50#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
51
52/* get page buffer for specified block address */
53/* ToDo: Replace this ugly macro with a function */
54#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)				\
55do {									\
56	BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot);	\
57	if (!(RC)) {							\
58		if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \
59		    (le16_to_cpu((P)->header.nextindex) >		\
60		     le16_to_cpu((P)->header.maxentry)) ||		\
61		    (le16_to_cpu((P)->header.maxentry) >		\
62		     (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \
63			jfs_error((IP)->i_sb,				\
64				  "XT_GETPAGE: xtree page corrupt\n");	\
65			BT_PUTPAGE(MP);					\
66			MP = NULL;					\
67			RC = -EIO;					\
68		}							\
69	}								\
70} while (0)
71
72/* for consistency */
73#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
74
75#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
76	BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
77/* xtree entry parameter descriptor */
78struct xtsplit {
79	struct metapage *mp;
80	s16 index;
81	u8 flag;
82	s64 off;
83	s64 addr;
84	int len;
85	struct pxdlist *pxdlist;
86};
87
88
89/*
90 *	statistics
91 */
92#ifdef CONFIG_JFS_STATISTICS
93static struct {
94	uint search;
95	uint fastSearch;
96	uint split;
97} xtStat;
98#endif
99
100
101/*
102 * forward references
103 */
104static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
105		    struct btstack * btstack, int flag);
106
107static int xtSplitUp(tid_t tid,
108		     struct inode *ip,
109		     struct xtsplit * split, struct btstack * btstack);
110
111static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
112		       struct metapage ** rmpp, s64 * rbnp);
113
114static int xtSplitRoot(tid_t tid, struct inode *ip,
115		       struct xtsplit * split, struct metapage ** rmpp);
116
117/*
118 *	xtLookup()
119 *
120 * function: map a single page into a physical extent;
121 */
122int xtLookup(struct inode *ip, s64 lstart,
123	     s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
124{
125	int rc = 0;
126	struct btstack btstack;
127	int cmp;
128	s64 bn;
129	struct metapage *mp;
130	xtpage_t *p;
131	int index;
132	xad_t *xad;
133	s64 next, size, xoff, xend;
134	int xlen;
135	s64 xaddr;
136
137	*paddr = 0;
138	*plen = llen;
139
140	if (!no_check) {
141		/* is lookup offset beyond eof ? */
142		size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
143		    JFS_SBI(ip->i_sb)->l2bsize;
144		if (lstart >= size)
145			return 0;
146	}
147
148	/*
149	 * search for the xad entry covering the logical extent
150	 */
151//search:
152	if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
153		jfs_err("xtLookup: xtSearch returned %d", rc);
154		return rc;
155	}
156
157	/*
158	 *	compute the physical extent covering logical extent
159	 *
160	 * N.B. search may have failed (e.g., hole in sparse file),
161	 * and returned the index of the next entry.
162	 */
163	/* retrieve search result */
164	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
165
166	/* is xad found covering start of logical extent ?
167	 * lstart is a page start address,
168	 * i.e., lstart cannot start in a hole;
169	 */
170	if (cmp) {
171		if (next)
172			*plen = min(next - lstart, llen);
173		goto out;
174	}
175
176	/*
177	 * lxd covered by xad
178	 */
179	xad = &p->xad[index];
180	xoff = offsetXAD(xad);
181	xlen = lengthXAD(xad);
182	xend = xoff + xlen;
183	xaddr = addressXAD(xad);
184
185	/* initialize new pxd */
186	*pflag = xad->flag;
187	*paddr = xaddr + (lstart - xoff);
188	/* a page must be fully covered by an xad */
189	*plen = min(xend - lstart, llen);
190
191      out:
192	XT_PUTPAGE(mp);
193
194	return rc;
195}
196
197/*
198 *	xtSearch()
199 *
200 * function:	search for the xad entry covering specified offset.
201 *
202 * parameters:
203 *	ip	- file object;
204 *	xoff	- extent offset;
205 *	nextp	- address of next extent (if any) for search miss
206 *	cmpp	- comparison result:
207 *	btstack - traverse stack;
208 *	flag	- search process flag (XT_INSERT);
209 *
210 * returns:
211 *	btstack contains (bn, index) of search path traversed to the entry.
212 *	*cmpp is set to result of comparison with the entry returned.
213 *	the page containing the entry is pinned at exit.
214 */
215static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
216		    int *cmpp, struct btstack * btstack, int flag)
217{
218	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
219	int rc = 0;
220	int cmp = 1;		/* init for empty page */
221	s64 bn;			/* block number */
222	struct metapage *mp;	/* page buffer */
223	xtpage_t *p;		/* page */
224	xad_t *xad;
225	int base, index, lim, btindex;
226	struct btframe *btsp;
227	int nsplit = 0;		/* number of pages to split */
228	s64 t64;
229	s64 next = 0;
230
231	INCREMENT(xtStat.search);
232
233	BT_CLR(btstack);
234
235	btstack->nsplit = 0;
236
237	/*
238	 *	search down tree from root:
239	 *
240	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
241	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
242	 *
243	 * if entry with search key K is not found
244	 * internal page search find the entry with largest key Ki
245	 * less than K which point to the child page to search;
246	 * leaf page search find the entry with smallest key Kj
247	 * greater than K so that the returned index is the position of
248	 * the entry to be shifted right for insertion of new entry.
249	 * for empty tree, search key is greater than any key of the tree.
250	 *
251	 * by convention, root bn = 0.
252	 */
253	for (bn = 0;;) {
254		/* get/pin the page to search */
255		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
256		if (rc)
257			return rc;
258
259		/* try sequential access heuristics with the previous
260		 * access entry in target leaf page:
261		 * once search narrowed down into the target leaf,
262		 * key must either match an entry in the leaf or
263		 * key entry does not exist in the tree;
264		 */
265//fastSearch:
266		if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
267		    (p->header.flag & BT_LEAF) &&
268		    (index = jfs_ip->btindex) <
269		    le16_to_cpu(p->header.nextindex)) {
270			xad = &p->xad[index];
271			t64 = offsetXAD(xad);
272			if (xoff < t64 + lengthXAD(xad)) {
273				if (xoff >= t64) {
274					*cmpp = 0;
275					goto out;
276				}
277
278				/* stop sequential access heuristics */
279				goto binarySearch;
280			} else {	/* (t64 + lengthXAD(xad)) <= xoff */
281
282				/* try next sequential entry */
283				index++;
284				if (index <
285				    le16_to_cpu(p->header.nextindex)) {
286					xad++;
287					t64 = offsetXAD(xad);
288					if (xoff < t64 + lengthXAD(xad)) {
289						if (xoff >= t64) {
290							*cmpp = 0;
291							goto out;
292						}
293
294						/* miss: key falls between
295						 * previous and this entry
296						 */
297						*cmpp = 1;
298						next = t64;
299						goto out;
300					}
301
302					/* (xoff >= t64 + lengthXAD(xad));
303					 * matching entry may be further out:
304					 * stop heuristic search
305					 */
306					/* stop sequential access heuristics */
307					goto binarySearch;
308				}
309
310				/* (index == p->header.nextindex);
311				 * miss: key entry does not exist in
312				 * the target leaf/tree
313				 */
314				*cmpp = 1;
315				goto out;
316			}
317
318			/*
319			 * if hit, return index of the entry found, and
320			 * if miss, where new entry with search key is
321			 * to be inserted;
322			 */
323		      out:
324			/* compute number of pages to split */
325			if (flag & XT_INSERT) {
326				if (p->header.nextindex ==	/* little-endian */
327				    p->header.maxentry)
328					nsplit++;
329				else
330					nsplit = 0;
331				btstack->nsplit = nsplit;
332			}
333
334			/* save search result */
335			btsp = btstack->top;
336			btsp->bn = bn;
337			btsp->index = index;
338			btsp->mp = mp;
339
340			/* update sequential access heuristics */
341			jfs_ip->btindex = index;
342
343			if (nextp)
344				*nextp = next;
345
346			INCREMENT(xtStat.fastSearch);
347			return 0;
348		}
349
350		/* well, ... full search now */
351	      binarySearch:
352		lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
353
354		/*
355		 * binary search with search key K on the current page
356		 */
357		for (base = XTENTRYSTART; lim; lim >>= 1) {
358			index = base + (lim >> 1);
359
360			XT_CMP(cmp, xoff, &p->xad[index], t64);
361			if (cmp == 0) {
362				/*
363				 *	search hit
364				 */
365				/* search hit - leaf page:
366				 * return the entry found
367				 */
368				if (p->header.flag & BT_LEAF) {
369					*cmpp = cmp;
370
371					/* compute number of pages to split */
372					if (flag & XT_INSERT) {
373						if (p->header.nextindex ==
374						    p->header.maxentry)
375							nsplit++;
376						else
377							nsplit = 0;
378						btstack->nsplit = nsplit;
379					}
380
381					/* save search result */
382					btsp = btstack->top;
383					btsp->bn = bn;
384					btsp->index = index;
385					btsp->mp = mp;
386
387					/* init sequential access heuristics */
388					btindex = jfs_ip->btindex;
389					if (index == btindex ||
390					    index == btindex + 1)
391						jfs_ip->btorder = BT_SEQUENTIAL;
392					else
393						jfs_ip->btorder = BT_RANDOM;
394					jfs_ip->btindex = index;
395
396					return 0;
397				}
398				/* search hit - internal page:
399				 * descend/search its child page
400				 */
401				if (index < le16_to_cpu(p->header.nextindex)-1)
402					next = offsetXAD(&p->xad[index + 1]);
403				goto next;
404			}
405
406			if (cmp > 0) {
407				base = index + 1;
408				--lim;
409			}
410		}
411
412		/*
413		 *	search miss
414		 *
415		 * base is the smallest index with key (Kj) greater than
416		 * search key (K) and may be zero or maxentry index.
417		 */
418		if (base < le16_to_cpu(p->header.nextindex))
419			next = offsetXAD(&p->xad[base]);
420		/*
421		 * search miss - leaf page:
422		 *
423		 * return location of entry (base) where new entry with
424		 * search key K is to be inserted.
425		 */
426		if (p->header.flag & BT_LEAF) {
427			*cmpp = cmp;
428
429			/* compute number of pages to split */
430			if (flag & XT_INSERT) {
431				if (p->header.nextindex ==
432				    p->header.maxentry)
433					nsplit++;
434				else
435					nsplit = 0;
436				btstack->nsplit = nsplit;
437			}
438
439			/* save search result */
440			btsp = btstack->top;
441			btsp->bn = bn;
442			btsp->index = base;
443			btsp->mp = mp;
444
445			/* init sequential access heuristics */
446			btindex = jfs_ip->btindex;
447			if (base == btindex || base == btindex + 1)
448				jfs_ip->btorder = BT_SEQUENTIAL;
449			else
450				jfs_ip->btorder = BT_RANDOM;
451			jfs_ip->btindex = base;
452
453			if (nextp)
454				*nextp = next;
455
456			return 0;
457		}
458
459		/*
460		 * search miss - non-leaf page:
461		 *
462		 * if base is non-zero, decrement base by one to get the parent
463		 * entry of the child page to search.
464		 */
465		index = base ? base - 1 : base;
466
467		/*
468		 * go down to child page
469		 */
470	      next:
471		/* update number of pages to split */
472		if (p->header.nextindex == p->header.maxentry)
473			nsplit++;
474		else
475			nsplit = 0;
476
477		/* push (bn, index) of the parent page/entry */
478		if (BT_STACK_FULL(btstack)) {
479			jfs_error(ip->i_sb, "stack overrun!\n");
480			XT_PUTPAGE(mp);
481			return -EIO;
482		}
483		BT_PUSH(btstack, bn, index);
484
485		/* get the child page block number */
486		bn = addressXAD(&p->xad[index]);
487
488		/* unpin the parent page */
489		XT_PUTPAGE(mp);
490	}
491}
492
493/*
494 *	xtInsert()
495 *
496 * function:
497 *
498 * parameter:
499 *	tid	- transaction id;
500 *	ip	- file object;
501 *	xflag	- extent flag (XAD_NOTRECORDED):
502 *	xoff	- extent offset;
503 *	xlen	- extent length;
504 *	xaddrp	- extent address pointer (in/out):
505 *		if (*xaddrp)
506 *			caller allocated data extent at *xaddrp;
507 *		else
508 *			allocate data extent and return its xaddr;
509 *	flag	-
510 *
511 * return:
512 */
513int xtInsert(tid_t tid,		/* transaction id */
514	     struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
515	     int flag)
516{
517	int rc = 0;
518	s64 xaddr, hint;
519	struct metapage *mp;	/* meta-page buffer */
520	xtpage_t *p;		/* base B+-tree index page */
521	s64 bn;
522	int index, nextindex;
523	struct btstack btstack;	/* traverse stack */
524	struct xtsplit split;	/* split information */
525	xad_t *xad;
526	int cmp;
527	s64 next;
528	struct tlock *tlck;
529	struct xtlock *xtlck;
530
531	jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
532
533	/*
534	 *	search for the entry location at which to insert:
535	 *
536	 * xtFastSearch() and xtSearch() both returns (leaf page
537	 * pinned, index at which to insert).
538	 * n.b. xtSearch() may return index of maxentry of
539	 * the full page.
540	 */
541	if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
542		return rc;
543
544	/* retrieve search result */
545	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
546
547	/* This test must follow XT_GETSEARCH since mp must be valid if
548	 * we branch to out: */
549	if ((cmp == 0) || (next && (xlen > next - xoff))) {
550		rc = -EEXIST;
551		goto out;
552	}
553
554	/*
555	 * allocate data extent requested
556	 *
557	 * allocation hint: last xad
558	 */
559	if ((xaddr = *xaddrp) == 0) {
560		if (index > XTENTRYSTART) {
561			xad = &p->xad[index - 1];
562			hint = addressXAD(xad) + lengthXAD(xad) - 1;
563		} else
564			hint = 0;
565		if ((rc = dquot_alloc_block(ip, xlen)))
566			goto out;
567		if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
568			dquot_free_block(ip, xlen);
569			goto out;
570		}
571	}
572
573	/*
574	 *	insert entry for new extent
575	 */
576	xflag |= XAD_NEW;
577
578	/*
579	 *	if the leaf page is full, split the page and
580	 *	propagate up the router entry for the new page from split
581	 *
582	 * The xtSplitUp() will insert the entry and unpin the leaf page.
583	 */
584	nextindex = le16_to_cpu(p->header.nextindex);
585	if (nextindex == le16_to_cpu(p->header.maxentry)) {
586		split.mp = mp;
587		split.index = index;
588		split.flag = xflag;
589		split.off = xoff;
590		split.len = xlen;
591		split.addr = xaddr;
592		split.pxdlist = NULL;
593		if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
594			/* undo data extent allocation */
595			if (*xaddrp == 0) {
596				dbFree(ip, xaddr, (s64) xlen);
597				dquot_free_block(ip, xlen);
598			}
599			return rc;
600		}
601
602		*xaddrp = xaddr;
603		return 0;
604	}
605
606	/*
607	 *	insert the new entry into the leaf page
608	 */
609	/*
610	 * acquire a transaction lock on the leaf page;
611	 *
612	 * action: xad insertion/extension;
613	 */
614	BT_MARK_DIRTY(mp, ip);
615
616	/* if insert into middle, shift right remaining entries. */
617	if (index < nextindex)
618		memmove(&p->xad[index + 1], &p->xad[index],
619			(nextindex - index) * sizeof(xad_t));
620
621	/* insert the new entry: mark the entry NEW */
622	xad = &p->xad[index];
623	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
624
625	/* advance next available entry index */
626	le16_add_cpu(&p->header.nextindex, 1);
627
628	/* Don't log it if there are no links to the file */
629	if (!test_cflag(COMMIT_Nolink, ip)) {
630		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
631		xtlck = (struct xtlock *) & tlck->lock;
632		xtlck->lwm.offset =
633		    (xtlck->lwm.offset) ? min(index,
634					      (int)xtlck->lwm.offset) : index;
635		xtlck->lwm.length =
636		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
637	}
638
639	*xaddrp = xaddr;
640
641      out:
642	/* unpin the leaf page */
643	XT_PUTPAGE(mp);
644
645	return rc;
646}
647
648
649/*
650 *	xtSplitUp()
651 *
652 * function:
653 *	split full pages as propagating insertion up the tree
654 *
655 * parameter:
656 *	tid	- transaction id;
657 *	ip	- file object;
658 *	split	- entry parameter descriptor;
659 *	btstack - traverse stack from xtSearch()
660 *
661 * return:
662 */
663static int
664xtSplitUp(tid_t tid,
665	  struct inode *ip, struct xtsplit * split, struct btstack * btstack)
666{
667	int rc = 0;
668	struct metapage *smp;
669	xtpage_t *sp;		/* split page */
670	struct metapage *rmp;
671	s64 rbn;		/* new right page block number */
672	struct metapage *rcmp;
673	xtpage_t *rcp;		/* right child page */
674	s64 rcbn;		/* right child page block number */
675	int skip;		/* index of entry of insertion */
676	int nextindex;		/* next available entry index of p */
677	struct btframe *parent;	/* parent page entry on traverse stack */
678	xad_t *xad;
679	s64 xaddr;
680	int xlen;
681	int nsplit;		/* number of pages split */
682	struct pxdlist pxdlist;
683	pxd_t *pxd;
684	struct tlock *tlck;
685	struct xtlock *xtlck;
686
687	smp = split->mp;
688	sp = XT_PAGE(ip, smp);
689
690	/* is inode xtree root extension/inline EA area free ? */
691	if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
692	    (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
693	    (JFS_IP(ip)->mode2 & INLINEEA)) {
694		sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
695		JFS_IP(ip)->mode2 &= ~INLINEEA;
696
697		BT_MARK_DIRTY(smp, ip);
698		/*
699		 * acquire a transaction lock on the leaf page;
700		 *
701		 * action: xad insertion/extension;
702		 */
703
704		/* if insert into middle, shift right remaining entries. */
705		skip = split->index;
706		nextindex = le16_to_cpu(sp->header.nextindex);
707		if (skip < nextindex)
708			memmove(&sp->xad[skip + 1], &sp->xad[skip],
709				(nextindex - skip) * sizeof(xad_t));
710
711		/* insert the new entry: mark the entry NEW */
712		xad = &sp->xad[skip];
713		XT_PUTENTRY(xad, split->flag, split->off, split->len,
714			    split->addr);
715
716		/* advance next available entry index */
717		le16_add_cpu(&sp->header.nextindex, 1);
718
719		/* Don't log it if there are no links to the file */
720		if (!test_cflag(COMMIT_Nolink, ip)) {
721			tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
722			xtlck = (struct xtlock *) & tlck->lock;
723			xtlck->lwm.offset = (xtlck->lwm.offset) ?
724			    min(skip, (int)xtlck->lwm.offset) : skip;
725			xtlck->lwm.length =
726			    le16_to_cpu(sp->header.nextindex) -
727			    xtlck->lwm.offset;
728		}
729
730		return 0;
731	}
732
733	/*
734	 * allocate new index blocks to cover index page split(s)
735	 *
736	 * allocation hint: ?
737	 */
738	if (split->pxdlist == NULL) {
739		nsplit = btstack->nsplit;
740		split->pxdlist = &pxdlist;
741		pxdlist.maxnpxd = pxdlist.npxd = 0;
742		pxd = &pxdlist.pxd[0];
743		xlen = JFS_SBI(ip->i_sb)->nbperpage;
744		for (; nsplit > 0; nsplit--, pxd++) {
745			if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
746			    == 0) {
747				PXDaddress(pxd, xaddr);
748				PXDlength(pxd, xlen);
749
750				pxdlist.maxnpxd++;
751
752				continue;
753			}
754
755			/* undo allocation */
756
757			XT_PUTPAGE(smp);
758			return rc;
759		}
760	}
761
762	/*
763	 * Split leaf page <sp> into <sp> and a new right page <rp>.
764	 *
765	 * The split routines insert the new entry into the leaf page,
766	 * and acquire txLock as appropriate.
767	 * return <rp> pinned and its block number <rpbn>.
768	 */
769	rc = (sp->header.flag & BT_ROOT) ?
770	    xtSplitRoot(tid, ip, split, &rmp) :
771	    xtSplitPage(tid, ip, split, &rmp, &rbn);
772
773	XT_PUTPAGE(smp);
774
775	if (rc)
776		return -EIO;
777	/*
778	 * propagate up the router entry for the leaf page just split
779	 *
780	 * insert a router entry for the new page into the parent page,
781	 * propagate the insert/split up the tree by walking back the stack
782	 * of (bn of parent page, index of child page entry in parent page)
783	 * that were traversed during the search for the page that split.
784	 *
785	 * the propagation of insert/split up the tree stops if the root
786	 * splits or the page inserted into doesn't have to split to hold
787	 * the new entry.
788	 *
789	 * the parent entry for the split page remains the same, and
790	 * a new entry is inserted at its right with the first key and
791	 * block number of the new right page.
792	 *
793	 * There are a maximum of 3 pages pinned at any time:
794	 * right child, left parent and right parent (when the parent splits)
795	 * to keep the child page pinned while working on the parent.
796	 * make sure that all pins are released at exit.
797	 */
798	while ((parent = BT_POP(btstack)) != NULL) {
799		/* parent page specified by stack frame <parent> */
800
801		/* keep current child pages <rcp> pinned */
802		rcmp = rmp;
803		rcbn = rbn;
804		rcp = XT_PAGE(ip, rcmp);
805
806		/*
807		 * insert router entry in parent for new right child page <rp>
808		 */
809		/* get/pin the parent page <sp> */
810		XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
811		if (rc) {
812			XT_PUTPAGE(rcmp);
813			return rc;
814		}
815
816		/*
817		 * The new key entry goes ONE AFTER the index of parent entry,
818		 * because the split was to the right.
819		 */
820		skip = parent->index + 1;
821
822		/*
823		 * split or shift right remaining entries of the parent page
824		 */
825		nextindex = le16_to_cpu(sp->header.nextindex);
826		/*
827		 * parent page is full - split the parent page
828		 */
829		if (nextindex == le16_to_cpu(sp->header.maxentry)) {
830			/* init for parent page split */
831			split->mp = smp;
832			split->index = skip;	/* index at insert */
833			split->flag = XAD_NEW;
834			split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
835			split->len = JFS_SBI(ip->i_sb)->nbperpage;
836			split->addr = rcbn;
837
838			/* unpin previous right child page */
839			XT_PUTPAGE(rcmp);
840
841			/* The split routines insert the new entry,
842			 * and acquire txLock as appropriate.
843			 * return <rp> pinned and its block number <rpbn>.
844			 */
845			rc = (sp->header.flag & BT_ROOT) ?
846			    xtSplitRoot(tid, ip, split, &rmp) :
847			    xtSplitPage(tid, ip, split, &rmp, &rbn);
848			if (rc) {
849				XT_PUTPAGE(smp);
850				return rc;
851			}
852
853			XT_PUTPAGE(smp);
854			/* keep new child page <rp> pinned */
855		}
856		/*
857		 * parent page is not full - insert in parent page
858		 */
859		else {
860			/*
861			 * insert router entry in parent for the right child
862			 * page from the first entry of the right child page:
863			 */
864			/*
865			 * acquire a transaction lock on the parent page;
866			 *
867			 * action: router xad insertion;
868			 */
869			BT_MARK_DIRTY(smp, ip);
870
871			/*
872			 * if insert into middle, shift right remaining entries
873			 */
874			if (skip < nextindex)
875				memmove(&sp->xad[skip + 1], &sp->xad[skip],
876					(nextindex -
877					 skip) << L2XTSLOTSIZE);
878
879			/* insert the router entry */
880			xad = &sp->xad[skip];
881			XT_PUTENTRY(xad, XAD_NEW,
882				    offsetXAD(&rcp->xad[XTENTRYSTART]),
883				    JFS_SBI(ip->i_sb)->nbperpage, rcbn);
884
885			/* advance next available entry index. */
886			le16_add_cpu(&sp->header.nextindex, 1);
887
888			/* Don't log it if there are no links to the file */
889			if (!test_cflag(COMMIT_Nolink, ip)) {
890				tlck = txLock(tid, ip, smp,
891					      tlckXTREE | tlckGROW);
892				xtlck = (struct xtlock *) & tlck->lock;
893				xtlck->lwm.offset = (xtlck->lwm.offset) ?
894				    min(skip, (int)xtlck->lwm.offset) : skip;
895				xtlck->lwm.length =
896				    le16_to_cpu(sp->header.nextindex) -
897				    xtlck->lwm.offset;
898			}
899
900			/* unpin parent page */
901			XT_PUTPAGE(smp);
902
903			/* exit propagate up */
904			break;
905		}
906	}
907
908	/* unpin current right page */
909	XT_PUTPAGE(rmp);
910
911	return 0;
912}
913
914
915/*
916 *	xtSplitPage()
917 *
918 * function:
919 *	split a full non-root page into
920 *	original/split/left page and new right page
921 *	i.e., the original/split page remains as left page.
922 *
923 * parameter:
924 *	int		tid,
925 *	struct inode	*ip,
926 *	struct xtsplit	*split,
927 *	struct metapage	**rmpp,
928 *	u64		*rbnp,
929 *
930 * return:
931 *	Pointer to page in which to insert or NULL on error.
932 */
933static int
934xtSplitPage(tid_t tid, struct inode *ip,
935	    struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
936{
937	int rc = 0;
938	struct metapage *smp;
939	xtpage_t *sp;
940	struct metapage *rmp;
941	xtpage_t *rp;		/* new right page allocated */
942	s64 rbn;		/* new right page block number */
943	struct metapage *mp;
944	xtpage_t *p;
945	s64 nextbn;
946	int skip, maxentry, middle, righthalf, n;
947	xad_t *xad;
948	struct pxdlist *pxdlist;
949	pxd_t *pxd;
950	struct tlock *tlck;
951	struct xtlock *sxtlck = NULL, *rxtlck = NULL;
952	int quota_allocation = 0;
953
954	smp = split->mp;
955	sp = XT_PAGE(ip, smp);
956
957	INCREMENT(xtStat.split);
958
959	pxdlist = split->pxdlist;
960	pxd = &pxdlist->pxd[pxdlist->npxd];
961	pxdlist->npxd++;
962	rbn = addressPXD(pxd);
963
964	/* Allocate blocks to quota. */
965	rc = dquot_alloc_block(ip, lengthPXD(pxd));
966	if (rc)
967		goto clean_up;
968
969	quota_allocation += lengthPXD(pxd);
970
971	/*
972	 * allocate the new right page for the split
973	 */
974	rmp = get_metapage(ip, rbn, PSIZE, 1);
975	if (rmp == NULL) {
976		rc = -EIO;
977		goto clean_up;
978	}
979
980	jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
981
982	BT_MARK_DIRTY(rmp, ip);
983	/*
984	 * action: new page;
985	 */
986
987	rp = (xtpage_t *) rmp->data;
988	rp->header.self = *pxd;
989	rp->header.flag = sp->header.flag & BT_TYPE;
990	rp->header.maxentry = sp->header.maxentry;	/* little-endian */
991	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
992
993	BT_MARK_DIRTY(smp, ip);
994	/* Don't log it if there are no links to the file */
995	if (!test_cflag(COMMIT_Nolink, ip)) {
996		/*
997		 * acquire a transaction lock on the new right page;
998		 */
999		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1000		rxtlck = (struct xtlock *) & tlck->lock;
1001		rxtlck->lwm.offset = XTENTRYSTART;
1002		/*
1003		 * acquire a transaction lock on the split page
1004		 */
1005		tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1006		sxtlck = (struct xtlock *) & tlck->lock;
1007	}
1008
1009	/*
1010	 * initialize/update sibling pointers of <sp> and <rp>
1011	 */
1012	nextbn = le64_to_cpu(sp->header.next);
1013	rp->header.next = cpu_to_le64(nextbn);
1014	rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1015	sp->header.next = cpu_to_le64(rbn);
1016
1017	skip = split->index;
1018
1019	/*
1020	 *	sequential append at tail (after last entry of last page)
1021	 *
1022	 * if splitting the last page on a level because of appending
1023	 * a entry to it (skip is maxentry), it's likely that the access is
1024	 * sequential. adding an empty page on the side of the level is less
1025	 * work and can push the fill factor much higher than normal.
1026	 * if we're wrong it's no big deal -  we will do the split the right
1027	 * way next time.
1028	 * (it may look like it's equally easy to do a similar hack for
1029	 * reverse sorted data, that is, split the tree left, but it's not.
1030	 * Be my guest.)
1031	 */
1032	if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1033		/*
1034		 * acquire a transaction lock on the new/right page;
1035		 *
1036		 * action: xad insertion;
1037		 */
1038		/* insert entry at the first entry of the new right page */
1039		xad = &rp->xad[XTENTRYSTART];
1040		XT_PUTENTRY(xad, split->flag, split->off, split->len,
1041			    split->addr);
1042
1043		rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1044
1045		if (!test_cflag(COMMIT_Nolink, ip)) {
1046			/* rxtlck->lwm.offset = XTENTRYSTART; */
1047			rxtlck->lwm.length = 1;
1048		}
1049
1050		*rmpp = rmp;
1051		*rbnp = rbn;
1052
1053		jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1054		return 0;
1055	}
1056
1057	/*
1058	 *	non-sequential insert (at possibly middle page)
1059	 */
1060
1061	/*
1062	 * update previous pointer of old next/right page of <sp>
1063	 */
1064	if (nextbn != 0) {
1065		XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1066		if (rc) {
1067			XT_PUTPAGE(rmp);
1068			goto clean_up;
1069		}
1070
1071		BT_MARK_DIRTY(mp, ip);
1072		/*
1073		 * acquire a transaction lock on the next page;
1074		 *
1075		 * action:sibling pointer update;
1076		 */
1077		if (!test_cflag(COMMIT_Nolink, ip))
1078			tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1079
1080		p->header.prev = cpu_to_le64(rbn);
1081
1082		/* sibling page may have been updated previously, or
1083		 * it may be updated later;
1084		 */
1085
1086		XT_PUTPAGE(mp);
1087	}
1088
1089	/*
1090	 * split the data between the split and new/right pages
1091	 */
1092	maxentry = le16_to_cpu(sp->header.maxentry);
1093	middle = maxentry >> 1;
1094	righthalf = maxentry - middle;
1095
1096	/*
1097	 * skip index in old split/left page - insert into left page:
1098	 */
1099	if (skip <= middle) {
1100		/* move right half of split page to the new right page */
1101		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1102			righthalf << L2XTSLOTSIZE);
1103
1104		/* shift right tail of left half to make room for new entry */
1105		if (skip < middle)
1106			memmove(&sp->xad[skip + 1], &sp->xad[skip],
1107				(middle - skip) << L2XTSLOTSIZE);
1108
1109		/* insert new entry */
1110		xad = &sp->xad[skip];
1111		XT_PUTENTRY(xad, split->flag, split->off, split->len,
1112			    split->addr);
1113
1114		/* update page header */
1115		sp->header.nextindex = cpu_to_le16(middle + 1);
1116		if (!test_cflag(COMMIT_Nolink, ip)) {
1117			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1118			    min(skip, (int)sxtlck->lwm.offset) : skip;
1119		}
1120
1121		rp->header.nextindex =
1122		    cpu_to_le16(XTENTRYSTART + righthalf);
1123	}
1124	/*
1125	 * skip index in new right page - insert into right page:
1126	 */
1127	else {
1128		/* move left head of right half to right page */
1129		n = skip - middle;
1130		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1131			n << L2XTSLOTSIZE);
1132
1133		/* insert new entry */
1134		n += XTENTRYSTART;
1135		xad = &rp->xad[n];
1136		XT_PUTENTRY(xad, split->flag, split->off, split->len,
1137			    split->addr);
1138
1139		/* move right tail of right half to right page */
1140		if (skip < maxentry)
1141			memmove(&rp->xad[n + 1], &sp->xad[skip],
1142				(maxentry - skip) << L2XTSLOTSIZE);
1143
1144		/* update page header */
1145		sp->header.nextindex = cpu_to_le16(middle);
1146		if (!test_cflag(COMMIT_Nolink, ip)) {
1147			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1148			    min(middle, (int)sxtlck->lwm.offset) : middle;
1149		}
1150
1151		rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1152						   righthalf + 1);
1153	}
1154
1155	if (!test_cflag(COMMIT_Nolink, ip)) {
1156		sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1157		    sxtlck->lwm.offset;
1158
1159		/* rxtlck->lwm.offset = XTENTRYSTART; */
1160		rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1161		    XTENTRYSTART;
1162	}
1163
1164	*rmpp = rmp;
1165	*rbnp = rbn;
1166
1167	jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1168	return rc;
1169
1170      clean_up:
1171
1172	/* Rollback quota allocation. */
1173	if (quota_allocation)
1174		dquot_free_block(ip, quota_allocation);
1175
1176	return (rc);
1177}
1178
1179
1180/*
1181 *	xtSplitRoot()
1182 *
1183 * function:
1184 *	split the full root page into original/root/split page and new
1185 *	right page
1186 *	i.e., root remains fixed in tree anchor (inode) and the root is
1187 *	copied to a single new right child page since root page <<
1188 *	non-root page, and the split root page contains a single entry
1189 *	for the new right child page.
1190 *
1191 * parameter:
1192 *	int		tid,
1193 *	struct inode	*ip,
1194 *	struct xtsplit	*split,
1195 *	struct metapage	**rmpp)
1196 *
1197 * return:
1198 *	Pointer to page in which to insert or NULL on error.
1199 */
1200static int
1201xtSplitRoot(tid_t tid,
1202	    struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1203{
1204	xtpage_t *sp;
1205	struct metapage *rmp;
1206	xtpage_t *rp;
1207	s64 rbn;
1208	int skip, nextindex;
1209	xad_t *xad;
1210	pxd_t *pxd;
1211	struct pxdlist *pxdlist;
1212	struct tlock *tlck;
1213	struct xtlock *xtlck;
1214	int rc;
1215
1216	sp = (xtpage_t *) &JFS_IP(ip)->i_xtroot;
1217
1218	INCREMENT(xtStat.split);
1219
1220	/*
1221	 *	allocate a single (right) child page
1222	 */
1223	pxdlist = split->pxdlist;
1224	pxd = &pxdlist->pxd[pxdlist->npxd];
1225	pxdlist->npxd++;
1226	rbn = addressPXD(pxd);
1227	rmp = get_metapage(ip, rbn, PSIZE, 1);
1228	if (rmp == NULL)
1229		return -EIO;
1230
1231	/* Allocate blocks to quota. */
1232	rc = dquot_alloc_block(ip, lengthPXD(pxd));
1233	if (rc) {
1234		release_metapage(rmp);
1235		return rc;
1236	}
1237
1238	jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1239
1240	/*
1241	 * acquire a transaction lock on the new right page;
1242	 *
1243	 * action: new page;
1244	 */
1245	BT_MARK_DIRTY(rmp, ip);
1246
1247	rp = (xtpage_t *) rmp->data;
1248	rp->header.flag =
1249	    (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1250	rp->header.self = *pxd;
1251	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1252	rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1253
1254	/* initialize sibling pointers */
1255	rp->header.next = 0;
1256	rp->header.prev = 0;
1257
1258	/*
1259	 * copy the in-line root page into new right page extent
1260	 */
1261	nextindex = le16_to_cpu(sp->header.maxentry);
1262	memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1263		(nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1264
1265	/*
1266	 * insert the new entry into the new right/child page
1267	 * (skip index in the new right page will not change)
1268	 */
1269	skip = split->index;
1270	/* if insert into middle, shift right remaining entries */
1271	if (skip != nextindex)
1272		memmove(&rp->xad[skip + 1], &rp->xad[skip],
1273			(nextindex - skip) * sizeof(xad_t));
1274
1275	xad = &rp->xad[skip];
1276	XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1277
1278	/* update page header */
1279	rp->header.nextindex = cpu_to_le16(nextindex + 1);
1280
1281	if (!test_cflag(COMMIT_Nolink, ip)) {
1282		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1283		xtlck = (struct xtlock *) & tlck->lock;
1284		xtlck->lwm.offset = XTENTRYSTART;
1285		xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1286		    XTENTRYSTART;
1287	}
1288
1289	/*
1290	 *	reset the root
1291	 *
1292	 * init root with the single entry for the new right page
1293	 * set the 1st entry offset to 0, which force the left-most key
1294	 * at any level of the tree to be less than any search key.
1295	 */
1296	/*
1297	 * acquire a transaction lock on the root page (in-memory inode);
1298	 *
1299	 * action: root split;
1300	 */
1301	BT_MARK_DIRTY(split->mp, ip);
1302
1303	xad = &sp->xad[XTENTRYSTART];
1304	XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1305
1306	/* update page header of root */
1307	sp->header.flag &= ~BT_LEAF;
1308	sp->header.flag |= BT_INTERNAL;
1309
1310	sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1311
1312	if (!test_cflag(COMMIT_Nolink, ip)) {
1313		tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1314		xtlck = (struct xtlock *) & tlck->lock;
1315		xtlck->lwm.offset = XTENTRYSTART;
1316		xtlck->lwm.length = 1;
1317	}
1318
1319	*rmpp = rmp;
1320
1321	jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1322	return 0;
1323}
1324
1325
1326/*
1327 *	xtExtend()
1328 *
1329 * function: extend in-place;
1330 *
1331 * note: existing extent may or may not have been committed.
1332 * caller is responsible for pager buffer cache update, and
1333 * working block allocation map update;
1334 * update pmap: alloc whole extended extent;
1335 */
1336int xtExtend(tid_t tid,		/* transaction id */
1337	     struct inode *ip, s64 xoff,	/* delta extent offset */
1338	     s32 xlen,		/* delta extent length */
1339	     int flag)
1340{
1341	int rc = 0;
1342	int cmp;
1343	struct metapage *mp;	/* meta-page buffer */
1344	xtpage_t *p;		/* base B+-tree index page */
1345	s64 bn;
1346	int index, nextindex, len;
1347	struct btstack btstack;	/* traverse stack */
1348	struct xtsplit split;	/* split information */
1349	xad_t *xad;
1350	s64 xaddr;
1351	struct tlock *tlck;
1352	struct xtlock *xtlck = NULL;
1353
1354	jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1355
1356	/* there must exist extent to be extended */
1357	if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
1358		return rc;
1359
1360	/* retrieve search result */
1361	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1362
1363	if (cmp != 0) {
1364		XT_PUTPAGE(mp);
1365		jfs_error(ip->i_sb, "xtSearch did not find extent\n");
1366		return -EIO;
1367	}
1368
1369	/* extension must be contiguous */
1370	xad = &p->xad[index];
1371	if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1372		XT_PUTPAGE(mp);
1373		jfs_error(ip->i_sb, "extension is not contiguous\n");
1374		return -EIO;
1375	}
1376
1377	/*
1378	 * acquire a transaction lock on the leaf page;
1379	 *
1380	 * action: xad insertion/extension;
1381	 */
1382	BT_MARK_DIRTY(mp, ip);
1383	if (!test_cflag(COMMIT_Nolink, ip)) {
1384		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1385		xtlck = (struct xtlock *) & tlck->lock;
1386	}
1387
1388	/* extend will overflow extent ? */
1389	xlen = lengthXAD(xad) + xlen;
1390	if ((len = xlen - MAXXLEN) <= 0)
1391		goto extendOld;
1392
1393	/*
1394	 *	extent overflow: insert entry for new extent
1395	 */
1396//insertNew:
1397	xoff = offsetXAD(xad) + MAXXLEN;
1398	xaddr = addressXAD(xad) + MAXXLEN;
1399	nextindex = le16_to_cpu(p->header.nextindex);
1400
1401	/*
1402	 *	if the leaf page is full, insert the new entry and
1403	 *	propagate up the router entry for the new page from split
1404	 *
1405	 * The xtSplitUp() will insert the entry and unpin the leaf page.
1406	 */
1407	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1408		/* xtSpliUp() unpins leaf pages */
1409		split.mp = mp;
1410		split.index = index + 1;
1411		split.flag = XAD_NEW;
1412		split.off = xoff;	/* split offset */
1413		split.len = len;
1414		split.addr = xaddr;
1415		split.pxdlist = NULL;
1416		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1417			return rc;
1418
1419		/* get back old page */
1420		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1421		if (rc)
1422			return rc;
1423		/*
1424		 * if leaf root has been split, original root has been
1425		 * copied to new child page, i.e., original entry now
1426		 * resides on the new child page;
1427		 */
1428		if (p->header.flag & BT_INTERNAL) {
1429			ASSERT(p->header.nextindex ==
1430			       cpu_to_le16(XTENTRYSTART + 1));
1431			xad = &p->xad[XTENTRYSTART];
1432			bn = addressXAD(xad);
1433			XT_PUTPAGE(mp);
1434
1435			/* get new child page */
1436			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1437			if (rc)
1438				return rc;
1439
1440			BT_MARK_DIRTY(mp, ip);
1441			if (!test_cflag(COMMIT_Nolink, ip)) {
1442				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1443				xtlck = (struct xtlock *) & tlck->lock;
1444			}
1445		}
1446	}
1447	/*
1448	 *	insert the new entry into the leaf page
1449	 */
1450	else {
1451		/* insert the new entry: mark the entry NEW */
1452		xad = &p->xad[index + 1];
1453		XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1454
1455		/* advance next available entry index */
1456		le16_add_cpu(&p->header.nextindex, 1);
1457	}
1458
1459	/* get back old entry */
1460	xad = &p->xad[index];
1461	xlen = MAXXLEN;
1462
1463	/*
1464	 * extend old extent
1465	 */
1466      extendOld:
1467	XADlength(xad, xlen);
1468	if (!(xad->flag & XAD_NEW))
1469		xad->flag |= XAD_EXTENDED;
1470
1471	if (!test_cflag(COMMIT_Nolink, ip)) {
1472		xtlck->lwm.offset =
1473		    (xtlck->lwm.offset) ? min(index,
1474					      (int)xtlck->lwm.offset) : index;
1475		xtlck->lwm.length =
1476		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1477	}
1478
1479	/* unpin the leaf page */
1480	XT_PUTPAGE(mp);
1481
1482	return rc;
1483}
1484
1485/*
1486 *	xtUpdate()
1487 *
1488 * function: update XAD;
1489 *
1490 *	update extent for allocated_but_not_recorded or
1491 *	compressed extent;
1492 *
1493 * parameter:
1494 *	nxad	- new XAD;
1495 *		logical extent of the specified XAD must be completely
1496 *		contained by an existing XAD;
1497 */
1498int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1499{				/* new XAD */
1500	int rc = 0;
1501	int cmp;
1502	struct metapage *mp;	/* meta-page buffer */
1503	xtpage_t *p;		/* base B+-tree index page */
1504	s64 bn;
1505	int index0, index, newindex, nextindex;
1506	struct btstack btstack;	/* traverse stack */
1507	struct xtsplit split;	/* split information */
1508	xad_t *xad, *lxad, *rxad;
1509	int xflag;
1510	s64 nxoff, xoff;
1511	int nxlen, xlen, lxlen, rxlen;
1512	s64 nxaddr, xaddr;
1513	struct tlock *tlck;
1514	struct xtlock *xtlck = NULL;
1515	int newpage = 0;
1516
1517	/* there must exist extent to be tailgated */
1518	nxoff = offsetXAD(nxad);
1519	nxlen = lengthXAD(nxad);
1520	nxaddr = addressXAD(nxad);
1521
1522	if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1523		return rc;
1524
1525	/* retrieve search result */
1526	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1527
1528	if (cmp != 0) {
1529		XT_PUTPAGE(mp);
1530		jfs_error(ip->i_sb, "Could not find extent\n");
1531		return -EIO;
1532	}
1533
1534	BT_MARK_DIRTY(mp, ip);
1535	/*
1536	 * acquire tlock of the leaf page containing original entry
1537	 */
1538	if (!test_cflag(COMMIT_Nolink, ip)) {
1539		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1540		xtlck = (struct xtlock *) & tlck->lock;
1541	}
1542
1543	xad = &p->xad[index0];
1544	xflag = xad->flag;
1545	xoff = offsetXAD(xad);
1546	xlen = lengthXAD(xad);
1547	xaddr = addressXAD(xad);
1548
1549	/* nXAD must be completely contained within XAD */
1550	if ((xoff > nxoff) ||
1551	    (nxoff + nxlen > xoff + xlen)) {
1552		XT_PUTPAGE(mp);
1553		jfs_error(ip->i_sb,
1554			  "nXAD in not completely contained within XAD\n");
1555		return -EIO;
1556	}
1557
1558	index = index0;
1559	newindex = index + 1;
1560	nextindex = le16_to_cpu(p->header.nextindex);
1561
1562	if (xoff < nxoff)
1563		goto coalesceRight;
1564
1565	/*
1566	 * coalesce with left XAD
1567	 */
1568	/* is XAD first entry of page ? */
1569	if (index == XTENTRYSTART)
1570		goto replace;
1571
1572	/* is nXAD logically and physically contiguous with lXAD ? */
1573	lxad = &p->xad[index - 1];
1574	lxlen = lengthXAD(lxad);
1575	if (!(lxad->flag & XAD_NOTRECORDED) &&
1576	    (nxoff == offsetXAD(lxad) + lxlen) &&
1577	    (nxaddr == addressXAD(lxad) + lxlen) &&
1578	    (lxlen + nxlen < MAXXLEN)) {
1579		/* extend right lXAD */
1580		index0 = index - 1;
1581		XADlength(lxad, lxlen + nxlen);
1582
1583		/* If we just merged two extents together, need to make sure the
1584		 * right extent gets logged.  If the left one is marked XAD_NEW,
1585		 * then we know it will be logged.  Otherwise, mark as
1586		 * XAD_EXTENDED
1587		 */
1588		if (!(lxad->flag & XAD_NEW))
1589			lxad->flag |= XAD_EXTENDED;
1590
1591		if (xlen > nxlen) {
1592			/* truncate XAD */
1593			XADoffset(xad, xoff + nxlen);
1594			XADlength(xad, xlen - nxlen);
1595			XADaddress(xad, xaddr + nxlen);
1596			goto out;
1597		} else {	/* (xlen == nxlen) */
1598
1599			/* remove XAD */
1600			if (index < nextindex - 1)
1601				memmove(&p->xad[index], &p->xad[index + 1],
1602					(nextindex - index -
1603					 1) << L2XTSLOTSIZE);
1604
1605			p->header.nextindex =
1606			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1607					1);
1608
1609			index = index0;
1610			newindex = index + 1;
1611			nextindex = le16_to_cpu(p->header.nextindex);
1612			xoff = nxoff = offsetXAD(lxad);
1613			xlen = nxlen = lxlen + nxlen;
1614			xaddr = nxaddr = addressXAD(lxad);
1615			goto coalesceRight;
1616		}
1617	}
1618
1619	/*
1620	 * replace XAD with nXAD
1621	 */
1622      replace:			/* (nxoff == xoff) */
1623	if (nxlen == xlen) {
1624		/* replace XAD with nXAD:recorded */
1625		*xad = *nxad;
1626		xad->flag = xflag & ~XAD_NOTRECORDED;
1627
1628		goto coalesceRight;
1629	} else			/* (nxlen < xlen) */
1630		goto updateLeft;
1631
1632	/*
1633	 * coalesce with right XAD
1634	 */
1635      coalesceRight:		/* (xoff <= nxoff) */
1636	/* is XAD last entry of page ? */
1637	if (newindex == nextindex) {
1638		if (xoff == nxoff)
1639			goto out;
1640		goto updateRight;
1641	}
1642
1643	/* is nXAD logically and physically contiguous with rXAD ? */
1644	rxad = &p->xad[index + 1];
1645	rxlen = lengthXAD(rxad);
1646	if (!(rxad->flag & XAD_NOTRECORDED) &&
1647	    (nxoff + nxlen == offsetXAD(rxad)) &&
1648	    (nxaddr + nxlen == addressXAD(rxad)) &&
1649	    (rxlen + nxlen < MAXXLEN)) {
1650		/* extend left rXAD */
1651		XADoffset(rxad, nxoff);
1652		XADlength(rxad, rxlen + nxlen);
1653		XADaddress(rxad, nxaddr);
1654
1655		/* If we just merged two extents together, need to make sure
1656		 * the left extent gets logged.  If the right one is marked
1657		 * XAD_NEW, then we know it will be logged.  Otherwise, mark as
1658		 * XAD_EXTENDED
1659		 */
1660		if (!(rxad->flag & XAD_NEW))
1661			rxad->flag |= XAD_EXTENDED;
1662
1663		if (xlen > nxlen)
1664			/* truncate XAD */
1665			XADlength(xad, xlen - nxlen);
1666		else {		/* (xlen == nxlen) */
1667
1668			/* remove XAD */
1669			memmove(&p->xad[index], &p->xad[index + 1],
1670				(nextindex - index - 1) << L2XTSLOTSIZE);
1671
1672			p->header.nextindex =
1673			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1674					1);
1675		}
1676
1677		goto out;
1678	} else if (xoff == nxoff)
1679		goto out;
1680
1681	if (xoff >= nxoff) {
1682		XT_PUTPAGE(mp);
1683		jfs_error(ip->i_sb, "xoff >= nxoff\n");
1684		return -EIO;
1685	}
1686
1687	/*
1688	 * split XAD into (lXAD, nXAD):
1689	 *
1690	 *          |---nXAD--->
1691	 * --|----------XAD----------|--
1692	 *   |-lXAD-|
1693	 */
1694      updateRight:		/* (xoff < nxoff) */
1695	/* truncate old XAD as lXAD:not_recorded */
1696	xad = &p->xad[index];
1697	XADlength(xad, nxoff - xoff);
1698
1699	/* insert nXAD:recorded */
1700	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1701
1702		/* xtSpliUp() unpins leaf pages */
1703		split.mp = mp;
1704		split.index = newindex;
1705		split.flag = xflag & ~XAD_NOTRECORDED;
1706		split.off = nxoff;
1707		split.len = nxlen;
1708		split.addr = nxaddr;
1709		split.pxdlist = NULL;
1710		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1711			return rc;
1712
1713		/* get back old page */
1714		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1715		if (rc)
1716			return rc;
1717		/*
1718		 * if leaf root has been split, original root has been
1719		 * copied to new child page, i.e., original entry now
1720		 * resides on the new child page;
1721		 */
1722		if (p->header.flag & BT_INTERNAL) {
1723			ASSERT(p->header.nextindex ==
1724			       cpu_to_le16(XTENTRYSTART + 1));
1725			xad = &p->xad[XTENTRYSTART];
1726			bn = addressXAD(xad);
1727			XT_PUTPAGE(mp);
1728
1729			/* get new child page */
1730			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1731			if (rc)
1732				return rc;
1733
1734			BT_MARK_DIRTY(mp, ip);
1735			if (!test_cflag(COMMIT_Nolink, ip)) {
1736				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1737				xtlck = (struct xtlock *) & tlck->lock;
1738			}
1739		} else {
1740			/* is nXAD on new page ? */
1741			if (newindex >
1742			    (le16_to_cpu(p->header.maxentry) >> 1)) {
1743				newindex =
1744				    newindex -
1745				    le16_to_cpu(p->header.nextindex) +
1746				    XTENTRYSTART;
1747				newpage = 1;
1748			}
1749		}
1750	} else {
1751		/* if insert into middle, shift right remaining entries */
1752		if (newindex < nextindex)
1753			memmove(&p->xad[newindex + 1], &p->xad[newindex],
1754				(nextindex - newindex) << L2XTSLOTSIZE);
1755
1756		/* insert the entry */
1757		xad = &p->xad[newindex];
1758		*xad = *nxad;
1759		xad->flag = xflag & ~XAD_NOTRECORDED;
1760
1761		/* advance next available entry index. */
1762		p->header.nextindex =
1763		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1764	}
1765
1766	/*
1767	 * does nXAD force 3-way split ?
1768	 *
1769	 *          |---nXAD--->|
1770	 * --|----------XAD-------------|--
1771	 *   |-lXAD-|           |-rXAD -|
1772	 */
1773	if (nxoff + nxlen == xoff + xlen)
1774		goto out;
1775
1776	/* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
1777	if (newpage) {
1778		/* close out old page */
1779		if (!test_cflag(COMMIT_Nolink, ip)) {
1780			xtlck->lwm.offset = (xtlck->lwm.offset) ?
1781			    min(index0, (int)xtlck->lwm.offset) : index0;
1782			xtlck->lwm.length =
1783			    le16_to_cpu(p->header.nextindex) -
1784			    xtlck->lwm.offset;
1785		}
1786
1787		bn = le64_to_cpu(p->header.next);
1788		XT_PUTPAGE(mp);
1789
1790		/* get new right page */
1791		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1792		if (rc)
1793			return rc;
1794
1795		BT_MARK_DIRTY(mp, ip);
1796		if (!test_cflag(COMMIT_Nolink, ip)) {
1797			tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1798			xtlck = (struct xtlock *) & tlck->lock;
1799		}
1800
1801		index0 = index = newindex;
1802	} else
1803		index++;
1804
1805	newindex = index + 1;
1806	nextindex = le16_to_cpu(p->header.nextindex);
1807	xlen = xlen - (nxoff - xoff);
1808	xoff = nxoff;
1809	xaddr = nxaddr;
1810
1811	/* recompute split pages */
1812	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1813		XT_PUTPAGE(mp);
1814
1815		if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1816			return rc;
1817
1818		/* retrieve search result */
1819		XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1820
1821		if (cmp != 0) {
1822			XT_PUTPAGE(mp);
1823			jfs_error(ip->i_sb, "xtSearch failed\n");
1824			return -EIO;
1825		}
1826
1827		if (index0 != index) {
1828			XT_PUTPAGE(mp);
1829			jfs_error(ip->i_sb, "unexpected value of index\n");
1830			return -EIO;
1831		}
1832	}
1833
1834	/*
1835	 * split XAD into (nXAD, rXAD)
1836	 *
1837	 *          ---nXAD---|
1838	 * --|----------XAD----------|--
1839	 *                    |-rXAD-|
1840	 */
1841      updateLeft:		/* (nxoff == xoff) && (nxlen < xlen) */
1842	/* update old XAD with nXAD:recorded */
1843	xad = &p->xad[index];
1844	*xad = *nxad;
1845	xad->flag = xflag & ~XAD_NOTRECORDED;
1846
1847	/* insert rXAD:not_recorded */
1848	xoff = xoff + nxlen;
1849	xlen = xlen - nxlen;
1850	xaddr = xaddr + nxlen;
1851	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1852/*
1853printf("xtUpdate.updateLeft.split p:0x%p\n", p);
1854*/
1855		/* xtSpliUp() unpins leaf pages */
1856		split.mp = mp;
1857		split.index = newindex;
1858		split.flag = xflag;
1859		split.off = xoff;
1860		split.len = xlen;
1861		split.addr = xaddr;
1862		split.pxdlist = NULL;
1863		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1864			return rc;
1865
1866		/* get back old page */
1867		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1868		if (rc)
1869			return rc;
1870
1871		/*
1872		 * if leaf root has been split, original root has been
1873		 * copied to new child page, i.e., original entry now
1874		 * resides on the new child page;
1875		 */
1876		if (p->header.flag & BT_INTERNAL) {
1877			ASSERT(p->header.nextindex ==
1878			       cpu_to_le16(XTENTRYSTART + 1));
1879			xad = &p->xad[XTENTRYSTART];
1880			bn = addressXAD(xad);
1881			XT_PUTPAGE(mp);
1882
1883			/* get new child page */
1884			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1885			if (rc)
1886				return rc;
1887
1888			BT_MARK_DIRTY(mp, ip);
1889			if (!test_cflag(COMMIT_Nolink, ip)) {
1890				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1891				xtlck = (struct xtlock *) & tlck->lock;
1892			}
1893		}
1894	} else {
1895		/* if insert into middle, shift right remaining entries */
1896		if (newindex < nextindex)
1897			memmove(&p->xad[newindex + 1], &p->xad[newindex],
1898				(nextindex - newindex) << L2XTSLOTSIZE);
1899
1900		/* insert the entry */
1901		xad = &p->xad[newindex];
1902		XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
1903
1904		/* advance next available entry index. */
1905		p->header.nextindex =
1906		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1907	}
1908
1909      out:
1910	if (!test_cflag(COMMIT_Nolink, ip)) {
1911		xtlck->lwm.offset = (xtlck->lwm.offset) ?
1912		    min(index0, (int)xtlck->lwm.offset) : index0;
1913		xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1914		    xtlck->lwm.offset;
1915	}
1916
1917	/* unpin the leaf page */
1918	XT_PUTPAGE(mp);
1919
1920	return rc;
1921}
1922
1923
1924/*
1925 *	xtAppend()
1926 *
1927 * function: grow in append mode from contiguous region specified ;
1928 *
1929 * parameter:
1930 *	tid		- transaction id;
1931 *	ip		- file object;
1932 *	xflag		- extent flag:
1933 *	xoff		- extent offset;
1934 *	maxblocks	- max extent length;
1935 *	xlen		- extent length (in/out);
1936 *	xaddrp		- extent address pointer (in/out):
1937 *	flag		-
1938 *
1939 * return:
1940 */
1941int xtAppend(tid_t tid,		/* transaction id */
1942	     struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
1943	     s32 * xlenp,	/* (in/out) */
1944	     s64 * xaddrp,	/* (in/out) */
1945	     int flag)
1946{
1947	int rc = 0;
1948	struct metapage *mp;	/* meta-page buffer */
1949	xtpage_t *p;		/* base B+-tree index page */
1950	s64 bn, xaddr;
1951	int index, nextindex;
1952	struct btstack btstack;	/* traverse stack */
1953	struct xtsplit split;	/* split information */
1954	xad_t *xad;
1955	int cmp;
1956	struct tlock *tlck;
1957	struct xtlock *xtlck;
1958	int nsplit, nblocks, xlen;
1959	struct pxdlist pxdlist;
1960	pxd_t *pxd;
1961	s64 next;
1962
1963	xaddr = *xaddrp;
1964	xlen = *xlenp;
1965	jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
1966		 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
1967
1968	/*
1969	 *	search for the entry location at which to insert:
1970	 *
1971	 * xtFastSearch() and xtSearch() both returns (leaf page
1972	 * pinned, index at which to insert).
1973	 * n.b. xtSearch() may return index of maxentry of
1974	 * the full page.
1975	 */
1976	if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
1977		return rc;
1978
1979	/* retrieve search result */
1980	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1981
1982	if (cmp == 0) {
1983		rc = -EEXIST;
1984		goto out;
1985	}
1986
1987	if (next)
1988		xlen = min(xlen, (int)(next - xoff));
1989//insert:
1990	/*
1991	 *	insert entry for new extent
1992	 */
1993	xflag |= XAD_NEW;
1994
1995	/*
1996	 *	if the leaf page is full, split the page and
1997	 *	propagate up the router entry for the new page from split
1998	 *
1999	 * The xtSplitUp() will insert the entry and unpin the leaf page.
2000	 */
2001	nextindex = le16_to_cpu(p->header.nextindex);
2002	if (nextindex < le16_to_cpu(p->header.maxentry))
2003		goto insertLeaf;
2004
2005	/*
2006	 * allocate new index blocks to cover index page split(s)
2007	 */
2008	nsplit = btstack.nsplit;
2009	split.pxdlist = &pxdlist;
2010	pxdlist.maxnpxd = pxdlist.npxd = 0;
2011	pxd = &pxdlist.pxd[0];
2012	nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2013	for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2014		if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2015			PXDaddress(pxd, xaddr);
2016			PXDlength(pxd, nblocks);
2017
2018			pxdlist.maxnpxd++;
2019
2020			continue;
2021		}
2022
2023		/* undo allocation */
2024
2025		goto out;
2026	}
2027
2028	xlen = min(xlen, maxblocks);
2029
2030	/*
2031	 * allocate data extent requested
2032	 */
2033	if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2034		goto out;
2035
2036	split.mp = mp;
2037	split.index = index;
2038	split.flag = xflag;
2039	split.off = xoff;
2040	split.len = xlen;
2041	split.addr = xaddr;
2042	if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
2043		/* undo data extent allocation */
2044		dbFree(ip, *xaddrp, (s64) * xlenp);
2045
2046		return rc;
2047	}
2048
2049	*xaddrp = xaddr;
2050	*xlenp = xlen;
2051	return 0;
2052
2053	/*
2054	 *	insert the new entry into the leaf page
2055	 */
2056      insertLeaf:
2057	/*
2058	 * allocate data extent requested
2059	 */
2060	if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2061		goto out;
2062
2063	BT_MARK_DIRTY(mp, ip);
2064	/*
2065	 * acquire a transaction lock on the leaf page;
2066	 *
2067	 * action: xad insertion/extension;
2068	 */
2069	tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2070	xtlck = (struct xtlock *) & tlck->lock;
2071
2072	/* insert the new entry: mark the entry NEW */
2073	xad = &p->xad[index];
2074	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2075
2076	/* advance next available entry index */
2077	le16_add_cpu(&p->header.nextindex, 1);
2078
2079	xtlck->lwm.offset =
2080	    (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
2081	xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2082	    xtlck->lwm.offset;
2083
2084	*xaddrp = xaddr;
2085	*xlenp = xlen;
2086
2087      out:
2088	/* unpin the leaf page */
2089	XT_PUTPAGE(mp);
2090
2091	return rc;
2092}
2093
2094/*
2095 *	xtInitRoot()
2096 *
2097 * initialize file root (inline in inode)
2098 */
2099void xtInitRoot(tid_t tid, struct inode *ip)
2100{
2101	xtroot_t *p;
2102
2103	/*
2104	 * acquire a transaction lock on the root
2105	 *
2106	 * action:
2107	 */
2108	txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
2109		      tlckXTREE | tlckNEW);
2110	p = &JFS_IP(ip)->i_xtroot;
2111
2112	p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
2113	p->header.nextindex = cpu_to_le16(XTENTRYSTART);
2114
2115	if (S_ISDIR(ip->i_mode))
2116		p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
2117	else {
2118		p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
2119		ip->i_size = 0;
2120	}
2121
2122
2123	return;
2124}
2125
2126
2127/*
2128 * We can run into a deadlock truncating a file with a large number of
2129 * xtree pages (large fragmented file).  A robust fix would entail a
2130 * reservation system where we would reserve a number of metadata pages
2131 * and tlocks which we would be guaranteed without a deadlock.  Without
2132 * this, a partial fix is to limit number of metadata pages we will lock
2133 * in a single transaction.  Currently we will truncate the file so that
2134 * no more than 50 leaf pages will be locked.  The caller of xtTruncate
2135 * will be responsible for ensuring that the current transaction gets
2136 * committed, and that subsequent transactions are created to truncate
2137 * the file further if needed.
2138 */
2139#define MAX_TRUNCATE_LEAVES 50
2140
2141/*
2142 *	xtTruncate()
2143 *
2144 * function:
2145 *	traverse for truncation logging backward bottom up;
2146 *	terminate at the last extent entry at the current subtree
2147 *	root page covering new down size.
2148 *	truncation may occur within the last extent entry.
2149 *
2150 * parameter:
2151 *	int		tid,
2152 *	struct inode	*ip,
2153 *	s64		newsize,
2154 *	int		type)	{PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
2155 *
2156 * return:
2157 *
2158 * note:
2159 *	PWMAP:
2160 *	 1. truncate (non-COMMIT_NOLINK file)
2161 *	    by jfs_truncate() or jfs_open(O_TRUNC):
2162 *	    xtree is updated;
2163 *	 2. truncate index table of directory when last entry removed
2164 *	map update via tlock at commit time;
2165 *	PMAP:
2166 *	 Call xtTruncate_pmap instead
2167 *	WMAP:
2168 *	 1. remove (free zero link count) on last reference release
2169 *	    (pmap has been freed at commit zero link count);
2170 *	 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
2171 *	    xtree is updated;
2172 *	 map update directly at truncation time;
2173 *
2174 *	if (DELETE)
2175 *		no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
2176 *	else if (TRUNCATE)
2177 *		must write LOG_NOREDOPAGE for deleted index page;
2178 *
2179 * pages may already have been tlocked by anonymous transactions
2180 * during file growth (i.e., write) before truncation;
2181 *
2182 * except last truncated entry, deleted entries remains as is
2183 * in the page (nextindex is updated) for other use
2184 * (e.g., log/update allocation map): this avoid copying the page
2185 * info but delay free of pages;
2186 *
2187 */
2188s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
2189{
2190	int rc = 0;
2191	s64 teof;
2192	struct metapage *mp;
2193	xtpage_t *p;
2194	s64 bn;
2195	int index, nextindex;
2196	xad_t *xad;
2197	s64 xoff, xaddr;
2198	int xlen, len, freexlen;
2199	struct btstack btstack;
2200	struct btframe *parent;
2201	struct tblock *tblk = NULL;
2202	struct tlock *tlck = NULL;
2203	struct xtlock *xtlck = NULL;
2204	struct xdlistlock xadlock;	/* maplock for COMMIT_WMAP */
2205	struct pxd_lock *pxdlock;		/* maplock for COMMIT_WMAP */
2206	s64 nfreed;
2207	int freed, log;
2208	int locked_leaves = 0;
2209
2210	/* save object truncation type */
2211	if (tid) {
2212		tblk = tid_to_tblock(tid);
2213		tblk->xflag |= flag;
2214	}
2215
2216	nfreed = 0;
2217
2218	flag &= COMMIT_MAP;
2219	assert(flag != COMMIT_PMAP);
2220
2221	if (flag == COMMIT_PWMAP)
2222		log = 1;
2223	else {
2224		log = 0;
2225		xadlock.flag = mlckFREEXADLIST;
2226		xadlock.index = 1;
2227	}
2228
2229	/*
2230	 * if the newsize is not an integral number of pages,
2231	 * the file between newsize and next page boundary will
2232	 * be cleared.
2233	 * if truncating into a file hole, it will cause
2234	 * a full block to be allocated for the logical block.
2235	 */
2236
2237	/*
2238	 * release page blocks of truncated region <teof, eof>
2239	 *
2240	 * free the data blocks from the leaf index blocks.
2241	 * delete the parent index entries corresponding to
2242	 * the freed child data/index blocks.
2243	 * free the index blocks themselves which aren't needed
2244	 * in new sized file.
2245	 *
2246	 * index blocks are updated only if the blocks are to be
2247	 * retained in the new sized file.
2248	 * if type is PMAP, the data and index pages are NOT
2249	 * freed, and the data and index blocks are NOT freed
2250	 * from working map.
2251	 * (this will allow continued access of data/index of
2252	 * temporary file (zerolink count file truncated to zero-length)).
2253	 */
2254	teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
2255	    JFS_SBI(ip->i_sb)->l2bsize;
2256
2257	/* clear stack */
2258	BT_CLR(&btstack);
2259
2260	/*
2261	 * start with root
2262	 *
2263	 * root resides in the inode
2264	 */
2265	bn = 0;
2266
2267	/*
2268	 * first access of each page:
2269	 */
2270      getPage:
2271	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2272	if (rc)
2273		return rc;
2274
2275	/* process entries backward from last index */
2276	index = le16_to_cpu(p->header.nextindex) - 1;
2277
2278
2279	/* Since this is the rightmost page at this level, and we may have
2280	 * already freed a page that was formerly to the right, let's make
2281	 * sure that the next pointer is zero.
2282	 */
2283	if (p->header.next) {
2284		if (log)
2285			/*
2286			 * Make sure this change to the header is logged.
2287			 * If we really truncate this leaf, the flag
2288			 * will be changed to tlckTRUNCATE
2289			 */
2290			tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2291		BT_MARK_DIRTY(mp, ip);
2292		p->header.next = 0;
2293	}
2294
2295	if (p->header.flag & BT_INTERNAL)
2296		goto getChild;
2297
2298	/*
2299	 *	leaf page
2300	 */
2301	freed = 0;
2302
2303	/* does region covered by leaf page precede Teof ? */
2304	xad = &p->xad[index];
2305	xoff = offsetXAD(xad);
2306	xlen = lengthXAD(xad);
2307	if (teof >= xoff + xlen) {
2308		XT_PUTPAGE(mp);
2309		goto getParent;
2310	}
2311
2312	/* (re)acquire tlock of the leaf page */
2313	if (log) {
2314		if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
2315			/*
2316			 * We need to limit the size of the transaction
2317			 * to avoid exhausting pagecache & tlocks
2318			 */
2319			XT_PUTPAGE(mp);
2320			newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
2321			goto getParent;
2322		}
2323		tlck = txLock(tid, ip, mp, tlckXTREE);
2324		tlck->type = tlckXTREE | tlckTRUNCATE;
2325		xtlck = (struct xtlock *) & tlck->lock;
2326		xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
2327	}
2328	BT_MARK_DIRTY(mp, ip);
2329
2330	/*
2331	 * scan backward leaf page entries
2332	 */
2333	for (; index >= XTENTRYSTART; index--) {
2334		xad = &p->xad[index];
2335		xoff = offsetXAD(xad);
2336		xlen = lengthXAD(xad);
2337		xaddr = addressXAD(xad);
2338
2339		/*
2340		 * The "data" for a directory is indexed by the block
2341		 * device's address space.  This metadata must be invalidated
2342		 * here
2343		 */
2344		if (S_ISDIR(ip->i_mode) && (teof == 0))
2345			invalidate_xad_metapages(ip, *xad);
2346		/*
2347		 * entry beyond eof: continue scan of current page
2348		 *          xad
2349		 * ---|---=======------->
2350		 *   eof
2351		 */
2352		if (teof < xoff) {
2353			nfreed += xlen;
2354			continue;
2355		}
2356
2357		/*
2358		 * (xoff <= teof): last entry to be deleted from page;
2359		 * If other entries remain in page: keep and update the page.
2360		 */
2361
2362		/*
2363		 * eof == entry_start: delete the entry
2364		 *           xad
2365		 * -------|=======------->
2366		 *       eof
2367		 *
2368		 */
2369		if (teof == xoff) {
2370			nfreed += xlen;
2371
2372			if (index == XTENTRYSTART)
2373				break;
2374
2375			nextindex = index;
2376		}
2377		/*
2378		 * eof within the entry: truncate the entry.
2379		 *          xad
2380		 * -------===|===------->
2381		 *          eof
2382		 */
2383		else if (teof < xoff + xlen) {
2384			/* update truncated entry */
2385			len = teof - xoff;
2386			freexlen = xlen - len;
2387			XADlength(xad, len);
2388
2389			/* save pxd of truncated extent in tlck */
2390			xaddr += len;
2391			if (log) {	/* COMMIT_PWMAP */
2392				xtlck->lwm.offset = (xtlck->lwm.offset) ?
2393				    min(index, (int)xtlck->lwm.offset) : index;
2394				xtlck->lwm.length = index + 1 -
2395				    xtlck->lwm.offset;
2396				xtlck->twm.offset = index;
2397				pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
2398				pxdlock->flag = mlckFREEPXD;
2399				PXDaddress(&pxdlock->pxd, xaddr);
2400				PXDlength(&pxdlock->pxd, freexlen);
2401			}
2402			/* free truncated extent */
2403			else {	/* COMMIT_WMAP */
2404
2405				pxdlock = (struct pxd_lock *) & xadlock;
2406				pxdlock->flag = mlckFREEPXD;
2407				PXDaddress(&pxdlock->pxd, xaddr);
2408				PXDlength(&pxdlock->pxd, freexlen);
2409				txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
2410
2411				/* reset map lock */
2412				xadlock.flag = mlckFREEXADLIST;
2413			}
2414
2415			/* current entry is new last entry; */
2416			nextindex = index + 1;
2417
2418			nfreed += freexlen;
2419		}
2420		/*
2421		 * eof beyond the entry:
2422		 *          xad
2423		 * -------=======---|--->
2424		 *                 eof
2425		 */
2426		else {		/* (xoff + xlen < teof) */
2427
2428			nextindex = index + 1;
2429		}
2430
2431		if (nextindex < le16_to_cpu(p->header.nextindex)) {
2432			if (!log) {	/* COMMIT_WAMP */
2433				xadlock.xdlist = &p->xad[nextindex];
2434				xadlock.count =
2435				    le16_to_cpu(p->header.nextindex) -
2436				    nextindex;
2437				txFreeMap(ip, (struct maplock *) & xadlock,
2438					  NULL, COMMIT_WMAP);
2439			}
2440			p->header.nextindex = cpu_to_le16(nextindex);
2441		}
2442
2443		XT_PUTPAGE(mp);
2444
2445		/* assert(freed == 0); */
2446		goto getParent;
2447	}			/* end scan of leaf page entries */
2448
2449	freed = 1;
2450
2451	/*
2452	 * leaf page become empty: free the page if type != PMAP
2453	 */
2454	if (log) {		/* COMMIT_PWMAP */
2455		/* txCommit() with tlckFREE:
2456		 * free data extents covered by leaf [XTENTRYSTART:hwm);
2457		 * invalidate leaf if COMMIT_PWMAP;
2458		 * if (TRUNCATE), will write LOG_NOREDOPAGE;
2459		 */
2460		tlck->type = tlckXTREE | tlckFREE;
2461	} else {		/* COMMIT_WAMP */
2462
2463		/* free data extents covered by leaf */
2464		xadlock.xdlist = &p->xad[XTENTRYSTART];
2465		xadlock.count =
2466		    le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
2467		txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
2468	}
2469
2470	if (p->header.flag & BT_ROOT) {
2471		p->header.flag &= ~BT_INTERNAL;
2472		p->header.flag |= BT_LEAF;
2473		p->header.nextindex = cpu_to_le16(XTENTRYSTART);
2474
2475		XT_PUTPAGE(mp);	/* debug */
2476		goto out;
2477	} else {
2478		if (log) {	/* COMMIT_PWMAP */
2479			/* page will be invalidated at tx completion
2480			 */
2481			XT_PUTPAGE(mp);
2482		} else {	/* COMMIT_WMAP */
2483
2484			if (mp->lid)
2485				lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
2486
2487			/* invalidate empty leaf page */
2488			discard_metapage(mp);
2489		}
2490	}
2491
2492	/*
2493	 * the leaf page become empty: delete the parent entry
2494	 * for the leaf page if the parent page is to be kept
2495	 * in the new sized file.
2496	 */
2497
2498	/*
2499	 * go back up to the parent page
2500	 */
2501      getParent:
2502	/* pop/restore parent entry for the current child page */
2503	if ((parent = BT_POP(&btstack)) == NULL)
2504		/* current page must have been root */
2505		goto out;
2506
2507	/* get back the parent page */
2508	bn = parent->bn;
2509	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2510	if (rc)
2511		return rc;
2512
2513	index = parent->index;
2514
2515	/*
2516	 * child page was not empty:
2517	 */
2518	if (freed == 0) {
2519		/* has any entry deleted from parent ? */
2520		if (index < le16_to_cpu(p->header.nextindex) - 1) {
2521			/* (re)acquire tlock on the parent page */
2522			if (log) {	/* COMMIT_PWMAP */
2523				/* txCommit() with tlckTRUNCATE:
2524				 * free child extents covered by parent [);
2525				 */
2526				tlck = txLock(tid, ip, mp, tlckXTREE);
2527				xtlck = (struct xtlock *) & tlck->lock;
2528				if (!(tlck->type & tlckTRUNCATE)) {
2529					xtlck->hwm.offset =
2530					    le16_to_cpu(p->header.
2531							nextindex) - 1;
2532					tlck->type =
2533					    tlckXTREE | tlckTRUNCATE;
2534				}
2535			} else {	/* COMMIT_WMAP */
2536
2537				/* free child extents covered by parent */
2538				xadlock.xdlist = &p->xad[index + 1];
2539				xadlock.count =
2540				    le16_to_cpu(p->header.nextindex) -
2541				    index - 1;
2542				txFreeMap(ip, (struct maplock *) & xadlock,
2543					  NULL, COMMIT_WMAP);
2544			}
2545			BT_MARK_DIRTY(mp, ip);
2546
2547			p->header.nextindex = cpu_to_le16(index + 1);
2548		}
2549		XT_PUTPAGE(mp);
2550		goto getParent;
2551	}
2552
2553	/*
2554	 * child page was empty:
2555	 */
2556	nfreed += lengthXAD(&p->xad[index]);
2557
2558	/*
2559	 * During working map update, child page's tlock must be handled
2560	 * before parent's.  This is because the parent's tlock will cause
2561	 * the child's disk space to be marked available in the wmap, so
2562	 * it's important that the child page be released by that time.
2563	 *
2564	 * ToDo:  tlocks should be on doubly-linked list, so we can
2565	 * quickly remove it and add it to the end.
2566	 */
2567
2568	/*
2569	 * Move parent page's tlock to the end of the tid's tlock list
2570	 */
2571	if (log && mp->lid && (tblk->last != mp->lid) &&
2572	    lid_to_tlock(mp->lid)->tid) {
2573		lid_t lid = mp->lid;
2574		struct tlock *prev;
2575
2576		tlck = lid_to_tlock(lid);
2577
2578		if (tblk->next == lid)
2579			tblk->next = tlck->next;
2580		else {
2581			for (prev = lid_to_tlock(tblk->next);
2582			     prev->next != lid;
2583			     prev = lid_to_tlock(prev->next)) {
2584				assert(prev->next);
2585			}
2586			prev->next = tlck->next;
2587		}
2588		lid_to_tlock(tblk->last)->next = lid;
2589		tlck->next = 0;
2590		tblk->last = lid;
2591	}
2592
2593	/*
2594	 * parent page become empty: free the page
2595	 */
2596	if (index == XTENTRYSTART) {
2597		if (log) {	/* COMMIT_PWMAP */
2598			/* txCommit() with tlckFREE:
2599			 * free child extents covered by parent;
2600			 * invalidate parent if COMMIT_PWMAP;
2601			 */
2602			tlck = txLock(tid, ip, mp, tlckXTREE);
2603			xtlck = (struct xtlock *) & tlck->lock;
2604			xtlck->hwm.offset =
2605			    le16_to_cpu(p->header.nextindex) - 1;
2606			tlck->type = tlckXTREE | tlckFREE;
2607		} else {	/* COMMIT_WMAP */
2608
2609			/* free child extents covered by parent */
2610			xadlock.xdlist = &p->xad[XTENTRYSTART];
2611			xadlock.count =
2612			    le16_to_cpu(p->header.nextindex) -
2613			    XTENTRYSTART;
2614			txFreeMap(ip, (struct maplock *) & xadlock, NULL,
2615				  COMMIT_WMAP);
2616		}
2617		BT_MARK_DIRTY(mp, ip);
2618
2619		if (p->header.flag & BT_ROOT) {
2620			p->header.flag &= ~BT_INTERNAL;
2621			p->header.flag |= BT_LEAF;
2622			p->header.nextindex = cpu_to_le16(XTENTRYSTART);
2623			if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
2624				/*
2625				 * Shrink root down to allow inline
2626				 * EA (otherwise fsck complains)
2627				 */
2628				p->header.maxentry =
2629				    cpu_to_le16(XTROOTINITSLOT);
2630				JFS_IP(ip)->mode2 |= INLINEEA;
2631			}
2632
2633			XT_PUTPAGE(mp);	/* debug */
2634			goto out;
2635		} else {
2636			if (log) {	/* COMMIT_PWMAP */
2637				/* page will be invalidated at tx completion
2638				 */
2639				XT_PUTPAGE(mp);
2640			} else {	/* COMMIT_WMAP */
2641
2642				if (mp->lid)
2643					lid_to_tlock(mp->lid)->flag |=
2644						tlckFREELOCK;
2645
2646				/* invalidate parent page */
2647				discard_metapage(mp);
2648			}
2649
2650			/* parent has become empty and freed:
2651			 * go back up to its parent page
2652			 */
2653			/* freed = 1; */
2654			goto getParent;
2655		}
2656	}
2657	/*
2658	 * parent page still has entries for front region;
2659	 */
2660	else {
2661		/* try truncate region covered by preceding entry
2662		 * (process backward)
2663		 */
2664		index--;
2665
2666		/* go back down to the child page corresponding
2667		 * to the entry
2668		 */
2669		goto getChild;
2670	}
2671
2672	/*
2673	 *	internal page: go down to child page of current entry
2674	 */
2675      getChild:
2676	/* save current parent entry for the child page */
2677	if (BT_STACK_FULL(&btstack)) {
2678		jfs_error(ip->i_sb, "stack overrun!\n");
2679		XT_PUTPAGE(mp);
2680		return -EIO;
2681	}
2682	BT_PUSH(&btstack, bn, index);
2683
2684	/* get child page */
2685	xad = &p->xad[index];
2686	bn = addressXAD(xad);
2687
2688	/*
2689	 * first access of each internal entry:
2690	 */
2691	/* release parent page */
2692	XT_PUTPAGE(mp);
2693
2694	/* process the child page */
2695	goto getPage;
2696
2697      out:
2698	/*
2699	 * update file resource stat
2700	 */
2701	/* set size
2702	 */
2703	if (S_ISDIR(ip->i_mode) && !newsize)
2704		ip->i_size = 1;	/* fsck hates zero-length directories */
2705	else
2706		ip->i_size = newsize;
2707
2708	/* update quota allocation to reflect freed blocks */
2709	dquot_free_block(ip, nfreed);
2710
2711	/*
2712	 * free tlock of invalidated pages
2713	 */
2714	if (flag == COMMIT_WMAP)
2715		txFreelock(ip);
2716
2717	return newsize;
2718}
2719
2720
2721/*
2722 *	xtTruncate_pmap()
2723 *
2724 * function:
2725 *	Perform truncate to zero length for deleted file, leaving the
2726 *	xtree and working map untouched.  This allows the file to
2727 *	be accessed via open file handles, while the delete of the file
2728 *	is committed to disk.
2729 *
2730 * parameter:
2731 *	tid_t		tid,
2732 *	struct inode	*ip,
2733 *	s64		committed_size)
2734 *
2735 * return: new committed size
2736 *
2737 * note:
2738 *
2739 *	To avoid deadlock by holding too many transaction locks, the
2740 *	truncation may be broken up into multiple transactions.
2741 *	The committed_size keeps track of part of the file has been
2742 *	freed from the pmaps.
2743 */
2744s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
2745{
2746	s64 bn;
2747	struct btstack btstack;
2748	int cmp;
2749	int index;
2750	int locked_leaves = 0;
2751	struct metapage *mp;
2752	xtpage_t *p;
2753	struct btframe *parent;
2754	int rc;
2755	struct tblock *tblk;
2756	struct tlock *tlck = NULL;
2757	xad_t *xad;
2758	int xlen;
2759	s64 xoff;
2760	struct xtlock *xtlck = NULL;
2761
2762	/* save object truncation type */
2763	tblk = tid_to_tblock(tid);
2764	tblk->xflag |= COMMIT_PMAP;
2765
2766	/* clear stack */
2767	BT_CLR(&btstack);
2768
2769	if (committed_size) {
2770		xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
2771		rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
2772		if (rc)
2773			return rc;
2774
2775		XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2776
2777		if (cmp != 0) {
2778			XT_PUTPAGE(mp);
2779			jfs_error(ip->i_sb, "did not find extent\n");
2780			return -EIO;
2781		}
2782	} else {
2783		/*
2784		 * start with root
2785		 *
2786		 * root resides in the inode
2787		 */
2788		bn = 0;
2789
2790		/*
2791		 * first access of each page:
2792		 */
2793      getPage:
2794		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2795		if (rc)
2796			return rc;
2797
2798		/* process entries backward from last index */
2799		index = le16_to_cpu(p->header.nextindex) - 1;
2800
2801		if (p->header.flag & BT_INTERNAL)
2802			goto getChild;
2803	}
2804
2805	/*
2806	 *	leaf page
2807	 */
2808
2809	if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
2810		/*
2811		 * We need to limit the size of the transaction
2812		 * to avoid exhausting pagecache & tlocks
2813		 */
2814		xad = &p->xad[index];
2815		xoff = offsetXAD(xad);
2816		xlen = lengthXAD(xad);
2817		XT_PUTPAGE(mp);
2818		return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
2819	}
2820	tlck = txLock(tid, ip, mp, tlckXTREE);
2821	tlck->type = tlckXTREE | tlckFREE;
2822	xtlck = (struct xtlock *) & tlck->lock;
2823	xtlck->hwm.offset = index;
2824
2825
2826	XT_PUTPAGE(mp);
2827
2828	/*
2829	 * go back up to the parent page
2830	 */
2831      getParent:
2832	/* pop/restore parent entry for the current child page */
2833	if ((parent = BT_POP(&btstack)) == NULL)
2834		/* current page must have been root */
2835		goto out;
2836
2837	/* get back the parent page */
2838	bn = parent->bn;
2839	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2840	if (rc)
2841		return rc;
2842
2843	index = parent->index;
2844
2845	/*
2846	 * parent page become empty: free the page
2847	 */
2848	if (index == XTENTRYSTART) {
2849		/* txCommit() with tlckFREE:
2850		 * free child extents covered by parent;
2851		 * invalidate parent if COMMIT_PWMAP;
2852		 */
2853		tlck = txLock(tid, ip, mp, tlckXTREE);
2854		xtlck = (struct xtlock *) & tlck->lock;
2855		xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
2856		tlck->type = tlckXTREE | tlckFREE;
2857
2858		XT_PUTPAGE(mp);
2859
2860		if (p->header.flag & BT_ROOT) {
2861
2862			goto out;
2863		} else {
2864			goto getParent;
2865		}
2866	}
2867	/*
2868	 * parent page still has entries for front region;
2869	 */
2870	else
2871		index--;
2872	/*
2873	 *	internal page: go down to child page of current entry
2874	 */
2875      getChild:
2876	/* save current parent entry for the child page */
2877	if (BT_STACK_FULL(&btstack)) {
2878		jfs_error(ip->i_sb, "stack overrun!\n");
2879		XT_PUTPAGE(mp);
2880		return -EIO;
2881	}
2882	BT_PUSH(&btstack, bn, index);
2883
2884	/* get child page */
2885	xad = &p->xad[index];
2886	bn = addressXAD(xad);
2887
2888	/*
2889	 * first access of each internal entry:
2890	 */
2891	/* release parent page */
2892	XT_PUTPAGE(mp);
2893
2894	/* process the child page */
2895	goto getPage;
2896
2897      out:
2898
2899	return 0;
2900}
2901
2902#ifdef CONFIG_JFS_STATISTICS
2903int jfs_xtstat_proc_show(struct seq_file *m, void *v)
2904{
2905	seq_printf(m,
2906		       "JFS Xtree statistics\n"
2907		       "====================\n"
2908		       "searches = %d\n"
2909		       "fast searches = %d\n"
2910		       "splits = %d\n",
2911		       xtStat.search,
2912		       xtStat.fastSearch,
2913		       xtStat.split);
2914	return 0;
2915}
2916#endif
2917