1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h"
30#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h"
36#include "xfs_inode.h"
37#include "xfs_btree.h"
38#include "xfs_ialloc.h"
39#include "xfs_alloc.h"
40#include "xfs_error.h"
41
42
43#define XFS_ABSDIFF(a,b)	(((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
44
45#define	XFSA_FIXUP_BNO_OK	1
46#define	XFSA_FIXUP_CNT_OK	2
47
48STATIC int
49xfs_alloc_search_busy(xfs_trans_t *tp,
50		    xfs_agnumber_t agno,
51		    xfs_agblock_t bno,
52		    xfs_extlen_t len);
53
54#if defined(XFS_ALLOC_TRACE)
55ktrace_t *xfs_alloc_trace_buf;
56
57#define	TRACE_ALLOC(s,a)	\
58	xfs_alloc_trace_alloc(fname, s, a, __LINE__)
59#define	TRACE_FREE(s,a,b,x,f)	\
60	xfs_alloc_trace_free(fname, s, mp, a, b, x, f, __LINE__)
61#define	TRACE_MODAGF(s,a,f)	\
62	xfs_alloc_trace_modagf(fname, s, mp, a, f, __LINE__)
63#define	TRACE_BUSY(fname,s,ag,agb,l,sl,tp)	\
64	xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
65#define	TRACE_UNBUSY(fname,s,ag,sl,tp)	\
66	xfs_alloc_trace_busy(fname, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
67#define	TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp)	\
68	xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
69#else
70#define	TRACE_ALLOC(s,a)
71#define	TRACE_FREE(s,a,b,x,f)
72#define	TRACE_MODAGF(s,a,f)
73#define	TRACE_BUSY(s,a,ag,agb,l,sl,tp)
74#define	TRACE_UNBUSY(fname,s,ag,sl,tp)
75#define	TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp)
76#endif	/* XFS_ALLOC_TRACE */
77
78/*
79 * Prototypes for per-ag allocation routines
80 */
81
82STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
83STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
84STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
85STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
86	xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
87
88/*
89 * Internal functions.
90 */
91
92/*
93 * Compute aligned version of the found extent.
94 * Takes alignment and min length into account.
95 */
96STATIC int				/* success (>= minlen) */
97xfs_alloc_compute_aligned(
98	xfs_agblock_t	foundbno,	/* starting block in found extent */
99	xfs_extlen_t	foundlen,	/* length in found extent */
100	xfs_extlen_t	alignment,	/* alignment for allocation */
101	xfs_extlen_t	minlen,		/* minimum length for allocation */
102	xfs_agblock_t	*resbno,	/* result block number */
103	xfs_extlen_t	*reslen)	/* result length */
104{
105	xfs_agblock_t	bno;
106	xfs_extlen_t	diff;
107	xfs_extlen_t	len;
108
109	if (alignment > 1 && foundlen >= minlen) {
110		bno = roundup(foundbno, alignment);
111		diff = bno - foundbno;
112		len = diff >= foundlen ? 0 : foundlen - diff;
113	} else {
114		bno = foundbno;
115		len = foundlen;
116	}
117	*resbno = bno;
118	*reslen = len;
119	return len >= minlen;
120}
121
122/*
123 * Compute best start block and diff for "near" allocations.
124 * freelen >= wantlen already checked by caller.
125 */
126STATIC xfs_extlen_t			/* difference value (absolute) */
127xfs_alloc_compute_diff(
128	xfs_agblock_t	wantbno,	/* target starting block */
129	xfs_extlen_t	wantlen,	/* target length */
130	xfs_extlen_t	alignment,	/* target alignment */
131	xfs_agblock_t	freebno,	/* freespace's starting block */
132	xfs_extlen_t	freelen,	/* freespace's length */
133	xfs_agblock_t	*newbnop)	/* result: best start block from free */
134{
135	xfs_agblock_t	freeend;	/* end of freespace extent */
136	xfs_agblock_t	newbno1;	/* return block number */
137	xfs_agblock_t	newbno2;	/* other new block number */
138	xfs_extlen_t	newlen1=0;	/* length with newbno1 */
139	xfs_extlen_t	newlen2=0;	/* length with newbno2 */
140	xfs_agblock_t	wantend;	/* end of target extent */
141
142	ASSERT(freelen >= wantlen);
143	freeend = freebno + freelen;
144	wantend = wantbno + wantlen;
145	if (freebno >= wantbno) {
146		if ((newbno1 = roundup(freebno, alignment)) >= freeend)
147			newbno1 = NULLAGBLOCK;
148	} else if (freeend >= wantend && alignment > 1) {
149		newbno1 = roundup(wantbno, alignment);
150		newbno2 = newbno1 - alignment;
151		if (newbno1 >= freeend)
152			newbno1 = NULLAGBLOCK;
153		else
154			newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
155		if (newbno2 < freebno)
156			newbno2 = NULLAGBLOCK;
157		else
158			newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
159		if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
160			if (newlen1 < newlen2 ||
161			    (newlen1 == newlen2 &&
162			     XFS_ABSDIFF(newbno1, wantbno) >
163			     XFS_ABSDIFF(newbno2, wantbno)))
164				newbno1 = newbno2;
165		} else if (newbno2 != NULLAGBLOCK)
166			newbno1 = newbno2;
167	} else if (freeend >= wantend) {
168		newbno1 = wantbno;
169	} else if (alignment > 1) {
170		newbno1 = roundup(freeend - wantlen, alignment);
171		if (newbno1 > freeend - wantlen &&
172		    newbno1 - alignment >= freebno)
173			newbno1 -= alignment;
174		else if (newbno1 >= freeend)
175			newbno1 = NULLAGBLOCK;
176	} else
177		newbno1 = freeend - wantlen;
178	*newbnop = newbno1;
179	return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
180}
181
182/*
183 * Fix up the length, based on mod and prod.
184 * len should be k * prod + mod for some k.
185 * If len is too small it is returned unchanged.
186 * If len hits maxlen it is left alone.
187 */
188STATIC void
189xfs_alloc_fix_len(
190	xfs_alloc_arg_t	*args)		/* allocation argument structure */
191{
192	xfs_extlen_t	k;
193	xfs_extlen_t	rlen;
194
195	ASSERT(args->mod < args->prod);
196	rlen = args->len;
197	ASSERT(rlen >= args->minlen);
198	ASSERT(rlen <= args->maxlen);
199	if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
200	    (args->mod == 0 && rlen < args->prod))
201		return;
202	k = rlen % args->prod;
203	if (k == args->mod)
204		return;
205	if (k > args->mod) {
206		if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
207			return;
208	} else {
209		if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
210		    (int)args->minlen)
211			return;
212	}
213	ASSERT(rlen >= args->minlen);
214	ASSERT(rlen <= args->maxlen);
215	args->len = rlen;
216}
217
218/*
219 * Fix up length if there is too little space left in the a.g.
220 * Return 1 if ok, 0 if too little, should give up.
221 */
222STATIC int
223xfs_alloc_fix_minleft(
224	xfs_alloc_arg_t	*args)		/* allocation argument structure */
225{
226	xfs_agf_t	*agf;		/* a.g. freelist header */
227	int		diff;		/* free space difference */
228
229	if (args->minleft == 0)
230		return 1;
231	agf = XFS_BUF_TO_AGF(args->agbp);
232	diff = be32_to_cpu(agf->agf_freeblks)
233		+ be32_to_cpu(agf->agf_flcount)
234		- args->len - args->minleft;
235	if (diff >= 0)
236		return 1;
237	args->len += diff;		/* shrink the allocated space */
238	if (args->len >= args->minlen)
239		return 1;
240	args->agbno = NULLAGBLOCK;
241	return 0;
242}
243
244/*
245 * Update the two btrees, logically removing from freespace the extent
246 * starting at rbno, rlen blocks.  The extent is contained within the
247 * actual (current) free extent fbno for flen blocks.
248 * Flags are passed in indicating whether the cursors are set to the
249 * relevant records.
250 */
251STATIC int				/* error code */
252xfs_alloc_fixup_trees(
253	xfs_btree_cur_t	*cnt_cur,	/* cursor for by-size btree */
254	xfs_btree_cur_t	*bno_cur,	/* cursor for by-block btree */
255	xfs_agblock_t	fbno,		/* starting block of free extent */
256	xfs_extlen_t	flen,		/* length of free extent */
257	xfs_agblock_t	rbno,		/* starting block of returned extent */
258	xfs_extlen_t	rlen,		/* length of returned extent */
259	int		flags)		/* flags, XFSA_FIXUP_... */
260{
261	int		error;		/* error code */
262	int		i;		/* operation results */
263	xfs_agblock_t	nfbno1;		/* first new free startblock */
264	xfs_agblock_t	nfbno2;		/* second new free startblock */
265	xfs_extlen_t	nflen1=0;	/* first new free length */
266	xfs_extlen_t	nflen2=0;	/* second new free length */
267
268	/*
269	 * Look up the record in the by-size tree if necessary.
270	 */
271	if (flags & XFSA_FIXUP_CNT_OK) {
272#ifdef DEBUG
273		if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
274			return error;
275		XFS_WANT_CORRUPTED_RETURN(
276			i == 1 && nfbno1 == fbno && nflen1 == flen);
277#endif
278	} else {
279		if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
280			return error;
281		XFS_WANT_CORRUPTED_RETURN(i == 1);
282	}
283	/*
284	 * Look up the record in the by-block tree if necessary.
285	 */
286	if (flags & XFSA_FIXUP_BNO_OK) {
287#ifdef DEBUG
288		if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
289			return error;
290		XFS_WANT_CORRUPTED_RETURN(
291			i == 1 && nfbno1 == fbno && nflen1 == flen);
292#endif
293	} else {
294		if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
295			return error;
296		XFS_WANT_CORRUPTED_RETURN(i == 1);
297	}
298#ifdef DEBUG
299	{
300		xfs_alloc_block_t	*bnoblock;
301		xfs_alloc_block_t	*cntblock;
302
303		if (bno_cur->bc_nlevels == 1 &&
304		    cnt_cur->bc_nlevels == 1) {
305			bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]);
306			cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]);
307			XFS_WANT_CORRUPTED_RETURN(
308				be16_to_cpu(bnoblock->bb_numrecs) ==
309				be16_to_cpu(cntblock->bb_numrecs));
310		}
311	}
312#endif
313	/*
314	 * Deal with all four cases: the allocated record is contained
315	 * within the freespace record, so we can have new freespace
316	 * at either (or both) end, or no freespace remaining.
317	 */
318	if (rbno == fbno && rlen == flen)
319		nfbno1 = nfbno2 = NULLAGBLOCK;
320	else if (rbno == fbno) {
321		nfbno1 = rbno + rlen;
322		nflen1 = flen - rlen;
323		nfbno2 = NULLAGBLOCK;
324	} else if (rbno + rlen == fbno + flen) {
325		nfbno1 = fbno;
326		nflen1 = flen - rlen;
327		nfbno2 = NULLAGBLOCK;
328	} else {
329		nfbno1 = fbno;
330		nflen1 = rbno - fbno;
331		nfbno2 = rbno + rlen;
332		nflen2 = (fbno + flen) - nfbno2;
333	}
334	/*
335	 * Delete the entry from the by-size btree.
336	 */
337	if ((error = xfs_alloc_delete(cnt_cur, &i)))
338		return error;
339	XFS_WANT_CORRUPTED_RETURN(i == 1);
340	/*
341	 * Add new by-size btree entry(s).
342	 */
343	if (nfbno1 != NULLAGBLOCK) {
344		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
345			return error;
346		XFS_WANT_CORRUPTED_RETURN(i == 0);
347		if ((error = xfs_alloc_insert(cnt_cur, &i)))
348			return error;
349		XFS_WANT_CORRUPTED_RETURN(i == 1);
350	}
351	if (nfbno2 != NULLAGBLOCK) {
352		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
353			return error;
354		XFS_WANT_CORRUPTED_RETURN(i == 0);
355		if ((error = xfs_alloc_insert(cnt_cur, &i)))
356			return error;
357		XFS_WANT_CORRUPTED_RETURN(i == 1);
358	}
359	/*
360	 * Fix up the by-block btree entry(s).
361	 */
362	if (nfbno1 == NULLAGBLOCK) {
363		/*
364		 * No remaining freespace, just delete the by-block tree entry.
365		 */
366		if ((error = xfs_alloc_delete(bno_cur, &i)))
367			return error;
368		XFS_WANT_CORRUPTED_RETURN(i == 1);
369	} else {
370		/*
371		 * Update the by-block entry to start later|be shorter.
372		 */
373		if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
374			return error;
375	}
376	if (nfbno2 != NULLAGBLOCK) {
377		/*
378		 * 2 resulting free entries, need to add one.
379		 */
380		if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
381			return error;
382		XFS_WANT_CORRUPTED_RETURN(i == 0);
383		if ((error = xfs_alloc_insert(bno_cur, &i)))
384			return error;
385		XFS_WANT_CORRUPTED_RETURN(i == 1);
386	}
387	return 0;
388}
389
390/*
391 * Read in the allocation group free block array.
392 */
393STATIC int				/* error */
394xfs_alloc_read_agfl(
395	xfs_mount_t	*mp,		/* mount point structure */
396	xfs_trans_t	*tp,		/* transaction pointer */
397	xfs_agnumber_t	agno,		/* allocation group number */
398	xfs_buf_t	**bpp)		/* buffer for the ag free block array */
399{
400	xfs_buf_t	*bp;		/* return value */
401	int		error;
402
403	ASSERT(agno != NULLAGNUMBER);
404	error = xfs_trans_read_buf(
405			mp, tp, mp->m_ddev_targp,
406			XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
407			XFS_FSS_TO_BB(mp, 1), 0, &bp);
408	if (error)
409		return error;
410	ASSERT(bp);
411	ASSERT(!XFS_BUF_GETERROR(bp));
412	XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
413	*bpp = bp;
414	return 0;
415}
416
417#if defined(XFS_ALLOC_TRACE)
418/*
419 * Add an allocation trace entry for an alloc call.
420 */
421STATIC void
422xfs_alloc_trace_alloc(
423	char		*name,		/* function tag string */
424	char		*str,		/* additional string */
425	xfs_alloc_arg_t	*args,		/* allocation argument structure */
426	int		line)		/* source line number */
427{
428	ktrace_enter(xfs_alloc_trace_buf,
429		(void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)),
430		(void *)name,
431		(void *)str,
432		(void *)args->mp,
433		(void *)(__psunsigned_t)args->agno,
434		(void *)(__psunsigned_t)args->agbno,
435		(void *)(__psunsigned_t)args->minlen,
436		(void *)(__psunsigned_t)args->maxlen,
437		(void *)(__psunsigned_t)args->mod,
438		(void *)(__psunsigned_t)args->prod,
439		(void *)(__psunsigned_t)args->minleft,
440		(void *)(__psunsigned_t)args->total,
441		(void *)(__psunsigned_t)args->alignment,
442		(void *)(__psunsigned_t)args->len,
443		(void *)((((__psint_t)args->type) << 16) |
444			 (__psint_t)args->otype),
445		(void *)(__psint_t)((args->wasdel << 3) |
446				    (args->wasfromfl << 2) |
447				    (args->isfl << 1) |
448				    (args->userdata << 0)));
449}
450
451/*
452 * Add an allocation trace entry for a free call.
453 */
454STATIC void
455xfs_alloc_trace_free(
456	char		*name,		/* function tag string */
457	char		*str,		/* additional string */
458	xfs_mount_t	*mp,		/* file system mount point */
459	xfs_agnumber_t	agno,		/* allocation group number */
460	xfs_agblock_t	agbno,		/* a.g. relative block number */
461	xfs_extlen_t	len,		/* length of extent */
462	int		isfl,		/* set if is freelist allocation/free */
463	int		line)		/* source line number */
464{
465	ktrace_enter(xfs_alloc_trace_buf,
466		(void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)),
467		(void *)name,
468		(void *)str,
469		(void *)mp,
470		(void *)(__psunsigned_t)agno,
471		(void *)(__psunsigned_t)agbno,
472		(void *)(__psunsigned_t)len,
473		(void *)(__psint_t)isfl,
474		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
475}
476
477/*
478 * Add an allocation trace entry for modifying an agf.
479 */
480STATIC void
481xfs_alloc_trace_modagf(
482	char		*name,		/* function tag string */
483	char		*str,		/* additional string */
484	xfs_mount_t	*mp,		/* file system mount point */
485	xfs_agf_t	*agf,		/* new agf value */
486	int		flags,		/* logging flags for agf */
487	int		line)		/* source line number */
488{
489	ktrace_enter(xfs_alloc_trace_buf,
490		(void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)),
491		(void *)name,
492		(void *)str,
493		(void *)mp,
494		(void *)(__psint_t)flags,
495		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_seqno),
496		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_length),
497		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
498		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
499		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
500		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
501		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_flfirst),
502		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_fllast),
503		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_flcount),
504		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_freeblks),
505		(void *)(__psunsigned_t)be32_to_cpu(agf->agf_longest));
506}
507
508STATIC void
509xfs_alloc_trace_busy(
510	char		*name,		/* function tag string */
511	char		*str,		/* additional string */
512	xfs_mount_t	*mp,		/* file system mount point */
513	xfs_agnumber_t	agno,		/* allocation group number */
514	xfs_agblock_t	agbno,		/* a.g. relative block number */
515	xfs_extlen_t	len,		/* length of extent */
516	int		slot,		/* perag Busy slot */
517	xfs_trans_t	*tp,
518	int		trtype,		/* type: add, delete, search */
519	int		line)		/* source line number */
520{
521	ktrace_enter(xfs_alloc_trace_buf,
522		(void *)(__psint_t)(trtype | (line << 16)),
523		(void *)name,
524		(void *)str,
525		(void *)mp,
526		(void *)(__psunsigned_t)agno,
527		(void *)(__psunsigned_t)agbno,
528		(void *)(__psunsigned_t)len,
529		(void *)(__psint_t)slot,
530		(void *)tp,
531		NULL, NULL, NULL, NULL, NULL, NULL, NULL);
532}
533#endif	/* XFS_ALLOC_TRACE */
534
535/*
536 * Allocation group level functions.
537 */
538
539/*
540 * Allocate a variable extent in the allocation group agno.
541 * Type and bno are used to determine where in the allocation group the
542 * extent will start.
543 * Extent's length (returned in *len) will be between minlen and maxlen,
544 * and of the form k * prod + mod unless there's nothing that large.
545 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
546 */
547STATIC int			/* error */
548xfs_alloc_ag_vextent(
549	xfs_alloc_arg_t	*args)	/* argument structure for allocation */
550{
551	int		error=0;
552#ifdef XFS_ALLOC_TRACE
553	static char	fname[] = "xfs_alloc_ag_vextent";
554#endif
555
556	ASSERT(args->minlen > 0);
557	ASSERT(args->maxlen > 0);
558	ASSERT(args->minlen <= args->maxlen);
559	ASSERT(args->mod < args->prod);
560	ASSERT(args->alignment > 0);
561	/*
562	 * Branch to correct routine based on the type.
563	 */
564	args->wasfromfl = 0;
565	switch (args->type) {
566	case XFS_ALLOCTYPE_THIS_AG:
567		error = xfs_alloc_ag_vextent_size(args);
568		break;
569	case XFS_ALLOCTYPE_NEAR_BNO:
570		error = xfs_alloc_ag_vextent_near(args);
571		break;
572	case XFS_ALLOCTYPE_THIS_BNO:
573		error = xfs_alloc_ag_vextent_exact(args);
574		break;
575	default:
576		ASSERT(0);
577		/* NOTREACHED */
578	}
579	if (error)
580		return error;
581	/*
582	 * If the allocation worked, need to change the agf structure
583	 * (and log it), and the superblock.
584	 */
585	if (args->agbno != NULLAGBLOCK) {
586		xfs_agf_t	*agf;	/* allocation group freelist header */
587#ifdef XFS_ALLOC_TRACE
588		xfs_mount_t	*mp = args->mp;
589#endif
590		long		slen = (long)args->len;
591
592		ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
593		ASSERT(!(args->wasfromfl) || !args->isfl);
594		ASSERT(args->agbno % args->alignment == 0);
595		if (!(args->wasfromfl)) {
596
597			agf = XFS_BUF_TO_AGF(args->agbp);
598			be32_add(&agf->agf_freeblks, -(args->len));
599			xfs_trans_agblocks_delta(args->tp,
600						 -((long)(args->len)));
601			args->pag->pagf_freeblks -= args->len;
602			ASSERT(be32_to_cpu(agf->agf_freeblks) <=
603				be32_to_cpu(agf->agf_length));
604			TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
605			xfs_alloc_log_agf(args->tp, args->agbp,
606						XFS_AGF_FREEBLKS);
607			/* search the busylist for these blocks */
608			xfs_alloc_search_busy(args->tp, args->agno,
609					args->agbno, args->len);
610		}
611		if (!args->isfl)
612			xfs_trans_mod_sb(args->tp,
613				args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
614					XFS_TRANS_SB_FDBLOCKS, -slen);
615		XFS_STATS_INC(xs_allocx);
616		XFS_STATS_ADD(xs_allocb, args->len);
617	}
618	return 0;
619}
620
621/*
622 * Allocate a variable extent at exactly agno/bno.
623 * Extent's length (returned in *len) will be between minlen and maxlen,
624 * and of the form k * prod + mod unless there's nothing that large.
625 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
626 */
627STATIC int			/* error */
628xfs_alloc_ag_vextent_exact(
629	xfs_alloc_arg_t	*args)	/* allocation argument structure */
630{
631	xfs_btree_cur_t	*bno_cur;/* by block-number btree cursor */
632	xfs_btree_cur_t	*cnt_cur;/* by count btree cursor */
633	xfs_agblock_t	end;	/* end of allocated extent */
634	int		error;
635	xfs_agblock_t	fbno;	/* start block of found extent */
636	xfs_agblock_t	fend;	/* end block of found extent */
637	xfs_extlen_t	flen;	/* length of found extent */
638#ifdef XFS_ALLOC_TRACE
639	static char	fname[] = "xfs_alloc_ag_vextent_exact";
640#endif
641	int		i;	/* success/failure of operation */
642	xfs_agblock_t	maxend;	/* end of maximal extent */
643	xfs_agblock_t	minend;	/* end of minimal extent */
644	xfs_extlen_t	rlen;	/* length of returned extent */
645
646	ASSERT(args->alignment == 1);
647	/*
648	 * Allocate/initialize a cursor for the by-number freespace btree.
649	 */
650	bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
651		args->agno, XFS_BTNUM_BNO, NULL, 0);
652	/*
653	 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
654	 * Look for the closest free block <= bno, it must contain bno
655	 * if any free block does.
656	 */
657	if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i)))
658		goto error0;
659	if (!i) {
660		/*
661		 * Didn't find it, return null.
662		 */
663		xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
664		args->agbno = NULLAGBLOCK;
665		return 0;
666	}
667	/*
668	 * Grab the freespace record.
669	 */
670	if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i)))
671		goto error0;
672	XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
673	ASSERT(fbno <= args->agbno);
674	minend = args->agbno + args->minlen;
675	maxend = args->agbno + args->maxlen;
676	fend = fbno + flen;
677	/*
678	 * Give up if the freespace isn't long enough for the minimum request.
679	 */
680	if (fend < minend) {
681		xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
682		args->agbno = NULLAGBLOCK;
683		return 0;
684	}
685	/*
686	 * End of extent will be smaller of the freespace end and the
687	 * maximal requested end.
688	 */
689	end = XFS_AGBLOCK_MIN(fend, maxend);
690	/*
691	 * Fix the length according to mod and prod if given.
692	 */
693	args->len = end - args->agbno;
694	xfs_alloc_fix_len(args);
695	if (!xfs_alloc_fix_minleft(args)) {
696		xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
697		return 0;
698	}
699	rlen = args->len;
700	ASSERT(args->agbno + rlen <= fend);
701	end = args->agbno + rlen;
702	/*
703	 * We are allocating agbno for rlen [agbno .. end]
704	 * Allocate/initialize a cursor for the by-size btree.
705	 */
706	cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
707		args->agno, XFS_BTNUM_CNT, NULL, 0);
708	ASSERT(args->agbno + args->len <=
709		be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
710	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
711			args->agbno, args->len, XFSA_FIXUP_BNO_OK))) {
712		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
713		goto error0;
714	}
715	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
716	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
717	TRACE_ALLOC("normal", args);
718	args->wasfromfl = 0;
719	return 0;
720
721error0:
722	xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
723	TRACE_ALLOC("error", args);
724	return error;
725}
726
727/*
728 * Allocate a variable extent near bno in the allocation group agno.
729 * Extent's length (returned in len) will be between minlen and maxlen,
730 * and of the form k * prod + mod unless there's nothing that large.
731 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
732 */
733STATIC int				/* error */
734xfs_alloc_ag_vextent_near(
735	xfs_alloc_arg_t	*args)		/* allocation argument structure */
736{
737	xfs_btree_cur_t	*bno_cur_gt;	/* cursor for bno btree, right side */
738	xfs_btree_cur_t	*bno_cur_lt;	/* cursor for bno btree, left side */
739	xfs_btree_cur_t	*cnt_cur;	/* cursor for count btree */
740#ifdef XFS_ALLOC_TRACE
741	static char	fname[] = "xfs_alloc_ag_vextent_near";
742#endif
743	xfs_agblock_t	gtbno;		/* start bno of right side entry */
744	xfs_agblock_t	gtbnoa;		/* aligned ... */
745	xfs_extlen_t	gtdiff;		/* difference to right side entry */
746	xfs_extlen_t	gtlen;		/* length of right side entry */
747	xfs_extlen_t	gtlena;		/* aligned ... */
748	xfs_agblock_t	gtnew;		/* useful start bno of right side */
749	int		error;		/* error code */
750	int		i;		/* result code, temporary */
751	int		j;		/* result code, temporary */
752	xfs_agblock_t	ltbno;		/* start bno of left side entry */
753	xfs_agblock_t	ltbnoa;		/* aligned ... */
754	xfs_extlen_t	ltdiff;		/* difference to left side entry */
755	/*REFERENCED*/
756	xfs_agblock_t	ltend;		/* end bno of left side entry */
757	xfs_extlen_t	ltlen;		/* length of left side entry */
758	xfs_extlen_t	ltlena;		/* aligned ... */
759	xfs_agblock_t	ltnew;		/* useful start bno of left side */
760	xfs_extlen_t	rlen;		/* length of returned extent */
761#if defined(DEBUG) && defined(__KERNEL__)
762	/*
763	 * Randomly don't execute the first algorithm.
764	 */
765	int		dofirst;	/* set to do first algorithm */
766
767	dofirst = random32() & 1;
768#endif
769	/*
770	 * Get a cursor for the by-size btree.
771	 */
772	cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
773		args->agno, XFS_BTNUM_CNT, NULL, 0);
774	ltlen = 0;
775	bno_cur_lt = bno_cur_gt = NULL;
776	/*
777	 * See if there are any free extents as big as maxlen.
778	 */
779	if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
780		goto error0;
781	/*
782	 * If none, then pick up the last entry in the tree unless the
783	 * tree is empty.
784	 */
785	if (!i) {
786		if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
787				&ltlen, &i)))
788			goto error0;
789		if (i == 0 || ltlen == 0) {
790			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
791			return 0;
792		}
793		ASSERT(i == 1);
794	}
795	args->wasfromfl = 0;
796	/*
797	 * First algorithm.
798	 * If the requested extent is large wrt the freespaces available
799	 * in this a.g., then the cursor will be pointing to a btree entry
800	 * near the right edge of the tree.  If it's in the last btree leaf
801	 * block, then we just examine all the entries in that block
802	 * that are big enough, and pick the best one.
803	 * This is written as a while loop so we can break out of it,
804	 * but we never loop back to the top.
805	 */
806	while (xfs_btree_islastblock(cnt_cur, 0)) {
807		xfs_extlen_t	bdiff;
808		int		besti=0;
809		xfs_extlen_t	blen=0;
810		xfs_agblock_t	bnew=0;
811
812#if defined(DEBUG) && defined(__KERNEL__)
813		if (!dofirst)
814			break;
815#endif
816		/*
817		 * Start from the entry that lookup found, sequence through
818		 * all larger free blocks.  If we're actually pointing at a
819		 * record smaller than maxlen, go to the start of this block,
820		 * and skip all those smaller than minlen.
821		 */
822		if (ltlen || args->alignment > 1) {
823			cnt_cur->bc_ptrs[0] = 1;
824			do {
825				if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
826						&ltlen, &i)))
827					goto error0;
828				XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
829				if (ltlen >= args->minlen)
830					break;
831				if ((error = xfs_alloc_increment(cnt_cur, 0, &i)))
832					goto error0;
833			} while (i);
834			ASSERT(ltlen >= args->minlen);
835			if (!i)
836				break;
837		}
838		i = cnt_cur->bc_ptrs[0];
839		for (j = 1, blen = 0, bdiff = 0;
840		     !error && j && (blen < args->maxlen || bdiff > 0);
841		     error = xfs_alloc_increment(cnt_cur, 0, &j)) {
842			/*
843			 * For each entry, decide if it's better than
844			 * the previous best entry.
845			 */
846			if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
847				goto error0;
848			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
849			if (!xfs_alloc_compute_aligned(ltbno, ltlen,
850					args->alignment, args->minlen,
851					&ltbnoa, &ltlena))
852				continue;
853			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
854			xfs_alloc_fix_len(args);
855			ASSERT(args->len >= args->minlen);
856			if (args->len < blen)
857				continue;
858			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
859				args->alignment, ltbno, ltlen, &ltnew);
860			if (ltnew != NULLAGBLOCK &&
861			    (args->len > blen || ltdiff < bdiff)) {
862				bdiff = ltdiff;
863				bnew = ltnew;
864				blen = args->len;
865				besti = cnt_cur->bc_ptrs[0];
866			}
867		}
868		/*
869		 * It didn't work.  We COULD be in a case where
870		 * there's a good record somewhere, so try again.
871		 */
872		if (blen == 0)
873			break;
874		/*
875		 * Point at the best entry, and retrieve it again.
876		 */
877		cnt_cur->bc_ptrs[0] = besti;
878		if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
879			goto error0;
880		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
881		ltend = ltbno + ltlen;
882		ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
883		args->len = blen;
884		if (!xfs_alloc_fix_minleft(args)) {
885			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
886			TRACE_ALLOC("nominleft", args);
887			return 0;
888		}
889		blen = args->len;
890		/*
891		 * We are allocating starting at bnew for blen blocks.
892		 */
893		args->agbno = bnew;
894		ASSERT(bnew >= ltbno);
895		ASSERT(bnew + blen <= ltend);
896		/*
897		 * Set up a cursor for the by-bno tree.
898		 */
899		bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp,
900			args->agbp, args->agno, XFS_BTNUM_BNO, NULL, 0);
901		/*
902		 * Fix up the btree entries.
903		 */
904		if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
905				ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
906			goto error0;
907		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
908		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
909		TRACE_ALLOC("first", args);
910		return 0;
911	}
912	/*
913	 * Second algorithm.
914	 * Search in the by-bno tree to the left and to the right
915	 * simultaneously, until in each case we find a space big enough,
916	 * or run into the edge of the tree.  When we run into the edge,
917	 * we deallocate that cursor.
918	 * If both searches succeed, we compare the two spaces and pick
919	 * the better one.
920	 * With alignment, it's possible for both to fail; the upper
921	 * level algorithm that picks allocation groups for allocations
922	 * is not supposed to do this.
923	 */
924	/*
925	 * Allocate and initialize the cursor for the leftward search.
926	 */
927	bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
928		args->agno, XFS_BTNUM_BNO, NULL, 0);
929	/*
930	 * Lookup <= bno to find the leftward search's starting point.
931	 */
932	if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
933		goto error0;
934	if (!i) {
935		/*
936		 * Didn't find anything; use this cursor for the rightward
937		 * search.
938		 */
939		bno_cur_gt = bno_cur_lt;
940		bno_cur_lt = NULL;
941	}
942	/*
943	 * Found something.  Duplicate the cursor for the rightward search.
944	 */
945	else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
946		goto error0;
947	/*
948	 * Increment the cursor, so we will point at the entry just right
949	 * of the leftward entry if any, or to the leftmost entry.
950	 */
951	if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
952		goto error0;
953	if (!i) {
954		/*
955		 * It failed, there are no rightward entries.
956		 */
957		xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
958		bno_cur_gt = NULL;
959	}
960	/*
961	 * Loop going left with the leftward cursor, right with the
962	 * rightward cursor, until either both directions give up or
963	 * we find an entry at least as big as minlen.
964	 */
965	do {
966		if (bno_cur_lt) {
967			if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
968				goto error0;
969			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
970			if (xfs_alloc_compute_aligned(ltbno, ltlen,
971					args->alignment, args->minlen,
972					&ltbnoa, &ltlena))
973				break;
974			if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i)))
975				goto error0;
976			if (!i) {
977				xfs_btree_del_cursor(bno_cur_lt,
978						     XFS_BTREE_NOERROR);
979				bno_cur_lt = NULL;
980			}
981		}
982		if (bno_cur_gt) {
983			if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
984				goto error0;
985			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
986			if (xfs_alloc_compute_aligned(gtbno, gtlen,
987					args->alignment, args->minlen,
988					&gtbnoa, &gtlena))
989				break;
990			if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
991				goto error0;
992			if (!i) {
993				xfs_btree_del_cursor(bno_cur_gt,
994						     XFS_BTREE_NOERROR);
995				bno_cur_gt = NULL;
996			}
997		}
998	} while (bno_cur_lt || bno_cur_gt);
999	/*
1000	 * Got both cursors still active, need to find better entry.
1001	 */
1002	if (bno_cur_lt && bno_cur_gt) {
1003		/*
1004		 * Left side is long enough, look for a right side entry.
1005		 */
1006		if (ltlena >= args->minlen) {
1007			/*
1008			 * Fix up the length.
1009			 */
1010			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1011			xfs_alloc_fix_len(args);
1012			rlen = args->len;
1013			ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
1014				args->alignment, ltbno, ltlen, &ltnew);
1015			/*
1016			 * Not perfect.
1017			 */
1018			if (ltdiff) {
1019				/*
1020				 * Look until we find a better one, run out of
1021				 * space, or run off the end.
1022				 */
1023				while (bno_cur_lt && bno_cur_gt) {
1024					if ((error = xfs_alloc_get_rec(
1025							bno_cur_gt, &gtbno,
1026							&gtlen, &i)))
1027						goto error0;
1028					XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1029					xfs_alloc_compute_aligned(gtbno, gtlen,
1030						args->alignment, args->minlen,
1031						&gtbnoa, &gtlena);
1032					/*
1033					 * The left one is clearly better.
1034					 */
1035					if (gtbnoa >= args->agbno + ltdiff) {
1036						xfs_btree_del_cursor(
1037							bno_cur_gt,
1038							XFS_BTREE_NOERROR);
1039						bno_cur_gt = NULL;
1040						break;
1041					}
1042					/*
1043					 * If we reach a big enough entry,
1044					 * compare the two and pick the best.
1045					 */
1046					if (gtlena >= args->minlen) {
1047						args->len =
1048							XFS_EXTLEN_MIN(gtlena,
1049								args->maxlen);
1050						xfs_alloc_fix_len(args);
1051						rlen = args->len;
1052						gtdiff = xfs_alloc_compute_diff(
1053							args->agbno, rlen,
1054							args->alignment,
1055							gtbno, gtlen, &gtnew);
1056						/*
1057						 * Right side is better.
1058						 */
1059						if (gtdiff < ltdiff) {
1060							xfs_btree_del_cursor(
1061								bno_cur_lt,
1062								XFS_BTREE_NOERROR);
1063							bno_cur_lt = NULL;
1064						}
1065						/*
1066						 * Left side is better.
1067						 */
1068						else {
1069							xfs_btree_del_cursor(
1070								bno_cur_gt,
1071								XFS_BTREE_NOERROR);
1072							bno_cur_gt = NULL;
1073						}
1074						break;
1075					}
1076					/*
1077					 * Fell off the right end.
1078					 */
1079					if ((error = xfs_alloc_increment(
1080							bno_cur_gt, 0, &i)))
1081						goto error0;
1082					if (!i) {
1083						xfs_btree_del_cursor(
1084							bno_cur_gt,
1085							XFS_BTREE_NOERROR);
1086						bno_cur_gt = NULL;
1087						break;
1088					}
1089				}
1090			}
1091			/*
1092			 * The left side is perfect, trash the right side.
1093			 */
1094			else {
1095				xfs_btree_del_cursor(bno_cur_gt,
1096						     XFS_BTREE_NOERROR);
1097				bno_cur_gt = NULL;
1098			}
1099		}
1100		/*
1101		 * It's the right side that was found first, look left.
1102		 */
1103		else {
1104			/*
1105			 * Fix up the length.
1106			 */
1107			args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1108			xfs_alloc_fix_len(args);
1109			rlen = args->len;
1110			gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
1111				args->alignment, gtbno, gtlen, &gtnew);
1112			/*
1113			 * Right side entry isn't perfect.
1114			 */
1115			if (gtdiff) {
1116				/*
1117				 * Look until we find a better one, run out of
1118				 * space, or run off the end.
1119				 */
1120				while (bno_cur_lt && bno_cur_gt) {
1121					if ((error = xfs_alloc_get_rec(
1122							bno_cur_lt, &ltbno,
1123							&ltlen, &i)))
1124						goto error0;
1125					XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1126					xfs_alloc_compute_aligned(ltbno, ltlen,
1127						args->alignment, args->minlen,
1128						&ltbnoa, &ltlena);
1129					/*
1130					 * The right one is clearly better.
1131					 */
1132					if (ltbnoa <= args->agbno - gtdiff) {
1133						xfs_btree_del_cursor(
1134							bno_cur_lt,
1135							XFS_BTREE_NOERROR);
1136						bno_cur_lt = NULL;
1137						break;
1138					}
1139					/*
1140					 * If we reach a big enough entry,
1141					 * compare the two and pick the best.
1142					 */
1143					if (ltlena >= args->minlen) {
1144						args->len = XFS_EXTLEN_MIN(
1145							ltlena, args->maxlen);
1146						xfs_alloc_fix_len(args);
1147						rlen = args->len;
1148						ltdiff = xfs_alloc_compute_diff(
1149							args->agbno, rlen,
1150							args->alignment,
1151							ltbno, ltlen, &ltnew);
1152						/*
1153						 * Left side is better.
1154						 */
1155						if (ltdiff < gtdiff) {
1156							xfs_btree_del_cursor(
1157								bno_cur_gt,
1158								XFS_BTREE_NOERROR);
1159							bno_cur_gt = NULL;
1160						}
1161						/*
1162						 * Right side is better.
1163						 */
1164						else {
1165							xfs_btree_del_cursor(
1166								bno_cur_lt,
1167								XFS_BTREE_NOERROR);
1168							bno_cur_lt = NULL;
1169						}
1170						break;
1171					}
1172					/*
1173					 * Fell off the left end.
1174					 */
1175					if ((error = xfs_alloc_decrement(
1176							bno_cur_lt, 0, &i)))
1177						goto error0;
1178					if (!i) {
1179						xfs_btree_del_cursor(bno_cur_lt,
1180							XFS_BTREE_NOERROR);
1181						bno_cur_lt = NULL;
1182						break;
1183					}
1184				}
1185			}
1186			/*
1187			 * The right side is perfect, trash the left side.
1188			 */
1189			else {
1190				xfs_btree_del_cursor(bno_cur_lt,
1191					XFS_BTREE_NOERROR);
1192				bno_cur_lt = NULL;
1193			}
1194		}
1195	}
1196	/*
1197	 * If we couldn't get anything, give up.
1198	 */
1199	if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1200		TRACE_ALLOC("neither", args);
1201		args->agbno = NULLAGBLOCK;
1202		return 0;
1203	}
1204	/*
1205	 * At this point we have selected a freespace entry, either to the
1206	 * left or to the right.  If it's on the right, copy all the
1207	 * useful variables to the "left" set so we only have one
1208	 * copy of this code.
1209	 */
1210	if (bno_cur_gt) {
1211		bno_cur_lt = bno_cur_gt;
1212		bno_cur_gt = NULL;
1213		ltbno = gtbno;
1214		ltbnoa = gtbnoa;
1215		ltlen = gtlen;
1216		ltlena = gtlena;
1217		j = 1;
1218	} else
1219		j = 0;
1220	/*
1221	 * Fix up the length and compute the useful address.
1222	 */
1223	ltend = ltbno + ltlen;
1224	args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1225	xfs_alloc_fix_len(args);
1226	if (!xfs_alloc_fix_minleft(args)) {
1227		TRACE_ALLOC("nominleft", args);
1228		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1229		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1230		return 0;
1231	}
1232	rlen = args->len;
1233	(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
1234		ltlen, &ltnew);
1235	ASSERT(ltnew >= ltbno);
1236	ASSERT(ltnew + rlen <= ltend);
1237	ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1238	args->agbno = ltnew;
1239	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1240			ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1241		goto error0;
1242	TRACE_ALLOC(j ? "gt" : "lt", args);
1243	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1244	xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1245	return 0;
1246
1247 error0:
1248	TRACE_ALLOC("error", args);
1249	if (cnt_cur != NULL)
1250		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1251	if (bno_cur_lt != NULL)
1252		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1253	if (bno_cur_gt != NULL)
1254		xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1255	return error;
1256}
1257
1258/*
1259 * Allocate a variable extent anywhere in the allocation group agno.
1260 * Extent's length (returned in len) will be between minlen and maxlen,
1261 * and of the form k * prod + mod unless there's nothing that large.
1262 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1263 */
1264STATIC int				/* error */
1265xfs_alloc_ag_vextent_size(
1266	xfs_alloc_arg_t	*args)		/* allocation argument structure */
1267{
1268	xfs_btree_cur_t	*bno_cur;	/* cursor for bno btree */
1269	xfs_btree_cur_t	*cnt_cur;	/* cursor for cnt btree */
1270	int		error;		/* error result */
1271	xfs_agblock_t	fbno;		/* start of found freespace */
1272	xfs_extlen_t	flen;		/* length of found freespace */
1273#ifdef XFS_ALLOC_TRACE
1274	static char	fname[] = "xfs_alloc_ag_vextent_size";
1275#endif
1276	int		i;		/* temp status variable */
1277	xfs_agblock_t	rbno;		/* returned block number */
1278	xfs_extlen_t	rlen;		/* length of returned extent */
1279
1280	/*
1281	 * Allocate and initialize a cursor for the by-size btree.
1282	 */
1283	cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
1284		args->agno, XFS_BTNUM_CNT, NULL, 0);
1285	bno_cur = NULL;
1286	/*
1287	 * Look for an entry >= maxlen+alignment-1 blocks.
1288	 */
1289	if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1290			args->maxlen + args->alignment - 1, &i)))
1291		goto error0;
1292	/*
1293	 * If none, then pick up the last entry in the tree unless the
1294	 * tree is empty.
1295	 */
1296	if (!i) {
1297		if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
1298				&flen, &i)))
1299			goto error0;
1300		if (i == 0 || flen == 0) {
1301			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1302			TRACE_ALLOC("noentry", args);
1303			return 0;
1304		}
1305		ASSERT(i == 1);
1306	}
1307	/*
1308	 * There's a freespace as big as maxlen+alignment-1, get it.
1309	 */
1310	else {
1311		if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
1312			goto error0;
1313		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1314	}
1315	/*
1316	 * In the first case above, we got the last entry in the
1317	 * by-size btree.  Now we check to see if the space hits maxlen
1318	 * once aligned; if not, we search left for something better.
1319	 * This can't happen in the second case above.
1320	 */
1321	xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen,
1322		&rbno, &rlen);
1323	rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1324	XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1325			(rlen <= flen && rbno + rlen <= fbno + flen), error0);
1326	if (rlen < args->maxlen) {
1327		xfs_agblock_t	bestfbno;
1328		xfs_extlen_t	bestflen;
1329		xfs_agblock_t	bestrbno;
1330		xfs_extlen_t	bestrlen;
1331
1332		bestrlen = rlen;
1333		bestrbno = rbno;
1334		bestflen = flen;
1335		bestfbno = fbno;
1336		for (;;) {
1337			if ((error = xfs_alloc_decrement(cnt_cur, 0, &i)))
1338				goto error0;
1339			if (i == 0)
1340				break;
1341			if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1342					&i)))
1343				goto error0;
1344			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1345			if (flen < bestrlen)
1346				break;
1347			xfs_alloc_compute_aligned(fbno, flen, args->alignment,
1348				args->minlen, &rbno, &rlen);
1349			rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1350			XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1351				(rlen <= flen && rbno + rlen <= fbno + flen),
1352				error0);
1353			if (rlen > bestrlen) {
1354				bestrlen = rlen;
1355				bestrbno = rbno;
1356				bestflen = flen;
1357				bestfbno = fbno;
1358				if (rlen == args->maxlen)
1359					break;
1360			}
1361		}
1362		if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1363				&i)))
1364			goto error0;
1365		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1366		rlen = bestrlen;
1367		rbno = bestrbno;
1368		flen = bestflen;
1369		fbno = bestfbno;
1370	}
1371	args->wasfromfl = 0;
1372	/*
1373	 * Fix up the length.
1374	 */
1375	args->len = rlen;
1376	xfs_alloc_fix_len(args);
1377	if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
1378		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1379		TRACE_ALLOC("nominleft", args);
1380		args->agbno = NULLAGBLOCK;
1381		return 0;
1382	}
1383	rlen = args->len;
1384	XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1385	/*
1386	 * Allocate and initialize a cursor for the by-block tree.
1387	 */
1388	bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
1389		args->agno, XFS_BTNUM_BNO, NULL, 0);
1390	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1391			rbno, rlen, XFSA_FIXUP_CNT_OK)))
1392		goto error0;
1393	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1394	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1395	cnt_cur = bno_cur = NULL;
1396	args->len = rlen;
1397	args->agbno = rbno;
1398	XFS_WANT_CORRUPTED_GOTO(
1399		args->agbno + args->len <=
1400			be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1401		error0);
1402	TRACE_ALLOC("normal", args);
1403	return 0;
1404
1405error0:
1406	TRACE_ALLOC("error", args);
1407	if (cnt_cur)
1408		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1409	if (bno_cur)
1410		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1411	return error;
1412}
1413
1414/*
1415 * Deal with the case where only small freespaces remain.
1416 * Either return the contents of the last freespace record,
1417 * or allocate space from the freelist if there is nothing in the tree.
1418 */
1419STATIC int			/* error */
1420xfs_alloc_ag_vextent_small(
1421	xfs_alloc_arg_t	*args,	/* allocation argument structure */
1422	xfs_btree_cur_t	*ccur,	/* by-size cursor */
1423	xfs_agblock_t	*fbnop,	/* result block number */
1424	xfs_extlen_t	*flenp,	/* result length */
1425	int		*stat)	/* status: 0-freelist, 1-normal/none */
1426{
1427	int		error;
1428	xfs_agblock_t	fbno;
1429	xfs_extlen_t	flen;
1430#ifdef XFS_ALLOC_TRACE
1431	static char	fname[] = "xfs_alloc_ag_vextent_small";
1432#endif
1433	int		i;
1434
1435	if ((error = xfs_alloc_decrement(ccur, 0, &i)))
1436		goto error0;
1437	if (i) {
1438		if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1439			goto error0;
1440		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1441	}
1442	/*
1443	 * Nothing in the btree, try the freelist.  Make sure
1444	 * to respect minleft even when pulling from the
1445	 * freelist.
1446	 */
1447	else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1448		 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1449		  > args->minleft)) {
1450		if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno)))
1451			goto error0;
1452		if (fbno != NULLAGBLOCK) {
1453			if (args->userdata) {
1454				xfs_buf_t	*bp;
1455
1456				bp = xfs_btree_get_bufs(args->mp, args->tp,
1457					args->agno, fbno, 0);
1458				xfs_trans_binval(args->tp, bp);
1459			}
1460			args->len = 1;
1461			args->agbno = fbno;
1462			XFS_WANT_CORRUPTED_GOTO(
1463				args->agbno + args->len <=
1464				be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1465				error0);
1466			args->wasfromfl = 1;
1467			TRACE_ALLOC("freelist", args);
1468			*stat = 0;
1469			return 0;
1470		}
1471		/*
1472		 * Nothing in the freelist.
1473		 */
1474		else
1475			flen = 0;
1476	}
1477	/*
1478	 * Can't allocate from the freelist for some reason.
1479	 */
1480	else {
1481		fbno = NULLAGBLOCK;
1482		flen = 0;
1483	}
1484	/*
1485	 * Can't do the allocation, give up.
1486	 */
1487	if (flen < args->minlen) {
1488		args->agbno = NULLAGBLOCK;
1489		TRACE_ALLOC("notenough", args);
1490		flen = 0;
1491	}
1492	*fbnop = fbno;
1493	*flenp = flen;
1494	*stat = 1;
1495	TRACE_ALLOC("normal", args);
1496	return 0;
1497
1498error0:
1499	TRACE_ALLOC("error", args);
1500	return error;
1501}
1502
1503/*
1504 * Free the extent starting at agno/bno for length.
1505 */
1506STATIC int			/* error */
1507xfs_free_ag_extent(
1508	xfs_trans_t	*tp,	/* transaction pointer */
1509	xfs_buf_t	*agbp,	/* buffer for a.g. freelist header */
1510	xfs_agnumber_t	agno,	/* allocation group number */
1511	xfs_agblock_t	bno,	/* starting block number */
1512	xfs_extlen_t	len,	/* length of extent */
1513	int		isfl)	/* set if is freelist blocks - no sb acctg */
1514{
1515	xfs_btree_cur_t	*bno_cur;	/* cursor for by-block btree */
1516	xfs_btree_cur_t	*cnt_cur;	/* cursor for by-size btree */
1517	int		error;		/* error return value */
1518#ifdef XFS_ALLOC_TRACE
1519	static char	fname[] = "xfs_free_ag_extent";
1520#endif
1521	xfs_agblock_t	gtbno;		/* start of right neighbor block */
1522	xfs_extlen_t	gtlen;		/* length of right neighbor block */
1523	int		haveleft;	/* have a left neighbor block */
1524	int		haveright;	/* have a right neighbor block */
1525	int		i;		/* temp, result code */
1526	xfs_agblock_t	ltbno;		/* start of left neighbor block */
1527	xfs_extlen_t	ltlen;		/* length of left neighbor block */
1528	xfs_mount_t	*mp;		/* mount point struct for filesystem */
1529	xfs_agblock_t	nbno;		/* new starting block of freespace */
1530	xfs_extlen_t	nlen;		/* new length of freespace */
1531
1532	mp = tp->t_mountp;
1533	/*
1534	 * Allocate and initialize a cursor for the by-block btree.
1535	 */
1536	bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, NULL,
1537		0);
1538	cnt_cur = NULL;
1539	/*
1540	 * Look for a neighboring block on the left (lower block numbers)
1541	 * that is contiguous with this space.
1542	 */
1543	if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1544		goto error0;
1545	if (haveleft) {
1546		/*
1547		 * There is a block to our left.
1548		 */
1549		if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1550			goto error0;
1551		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1552		/*
1553		 * It's not contiguous, though.
1554		 */
1555		if (ltbno + ltlen < bno)
1556			haveleft = 0;
1557		else {
1558			/*
1559			 * If this failure happens the request to free this
1560			 * space was invalid, it's (partly) already free.
1561			 * Very bad.
1562			 */
1563			XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
1564		}
1565	}
1566	/*
1567	 * Look for a neighboring block on the right (higher block numbers)
1568	 * that is contiguous with this space.
1569	 */
1570	if ((error = xfs_alloc_increment(bno_cur, 0, &haveright)))
1571		goto error0;
1572	if (haveright) {
1573		/*
1574		 * There is a block to our right.
1575		 */
1576		if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1577			goto error0;
1578		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1579		/*
1580		 * It's not contiguous, though.
1581		 */
1582		if (bno + len < gtbno)
1583			haveright = 0;
1584		else {
1585			/*
1586			 * If this failure happens the request to free this
1587			 * space was invalid, it's (partly) already free.
1588			 * Very bad.
1589			 */
1590			XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
1591		}
1592	}
1593	/*
1594	 * Now allocate and initialize a cursor for the by-size tree.
1595	 */
1596	cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, NULL,
1597		0);
1598	/*
1599	 * Have both left and right contiguous neighbors.
1600	 * Merge all three into a single free block.
1601	 */
1602	if (haveleft && haveright) {
1603		/*
1604		 * Delete the old by-size entry on the left.
1605		 */
1606		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1607			goto error0;
1608		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1609		if ((error = xfs_alloc_delete(cnt_cur, &i)))
1610			goto error0;
1611		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1612		/*
1613		 * Delete the old by-size entry on the right.
1614		 */
1615		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1616			goto error0;
1617		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1618		if ((error = xfs_alloc_delete(cnt_cur, &i)))
1619			goto error0;
1620		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1621		/*
1622		 * Delete the old by-block entry for the right block.
1623		 */
1624		if ((error = xfs_alloc_delete(bno_cur, &i)))
1625			goto error0;
1626		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1627		/*
1628		 * Move the by-block cursor back to the left neighbor.
1629		 */
1630		if ((error = xfs_alloc_decrement(bno_cur, 0, &i)))
1631			goto error0;
1632		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1633#ifdef DEBUG
1634		/*
1635		 * Check that this is the right record: delete didn't
1636		 * mangle the cursor.
1637		 */
1638		{
1639			xfs_agblock_t	xxbno;
1640			xfs_extlen_t	xxlen;
1641
1642			if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1643					&i)))
1644				goto error0;
1645			XFS_WANT_CORRUPTED_GOTO(
1646				i == 1 && xxbno == ltbno && xxlen == ltlen,
1647				error0);
1648		}
1649#endif
1650		/*
1651		 * Update remaining by-block entry to the new, joined block.
1652		 */
1653		nbno = ltbno;
1654		nlen = len + ltlen + gtlen;
1655		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1656			goto error0;
1657	}
1658	/*
1659	 * Have only a left contiguous neighbor.
1660	 * Merge it together with the new freespace.
1661	 */
1662	else if (haveleft) {
1663		/*
1664		 * Delete the old by-size entry on the left.
1665		 */
1666		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1667			goto error0;
1668		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1669		if ((error = xfs_alloc_delete(cnt_cur, &i)))
1670			goto error0;
1671		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1672		/*
1673		 * Back up the by-block cursor to the left neighbor, and
1674		 * update its length.
1675		 */
1676		if ((error = xfs_alloc_decrement(bno_cur, 0, &i)))
1677			goto error0;
1678		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1679		nbno = ltbno;
1680		nlen = len + ltlen;
1681		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1682			goto error0;
1683	}
1684	/*
1685	 * Have only a right contiguous neighbor.
1686	 * Merge it together with the new freespace.
1687	 */
1688	else if (haveright) {
1689		/*
1690		 * Delete the old by-size entry on the right.
1691		 */
1692		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1693			goto error0;
1694		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1695		if ((error = xfs_alloc_delete(cnt_cur, &i)))
1696			goto error0;
1697		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1698		/*
1699		 * Update the starting block and length of the right
1700		 * neighbor in the by-block tree.
1701		 */
1702		nbno = bno;
1703		nlen = len + gtlen;
1704		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1705			goto error0;
1706	}
1707	/*
1708	 * No contiguous neighbors.
1709	 * Insert the new freespace into the by-block tree.
1710	 */
1711	else {
1712		nbno = bno;
1713		nlen = len;
1714		if ((error = xfs_alloc_insert(bno_cur, &i)))
1715			goto error0;
1716		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1717	}
1718	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1719	bno_cur = NULL;
1720	/*
1721	 * In all cases we need to insert the new freespace in the by-size tree.
1722	 */
1723	if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1724		goto error0;
1725	XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
1726	if ((error = xfs_alloc_insert(cnt_cur, &i)))
1727		goto error0;
1728	XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1729	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1730	cnt_cur = NULL;
1731	/*
1732	 * Update the freespace totals in the ag and superblock.
1733	 */
1734	{
1735		xfs_agf_t	*agf;
1736		xfs_perag_t	*pag;		/* per allocation group data */
1737
1738		agf = XFS_BUF_TO_AGF(agbp);
1739		pag = &mp->m_perag[agno];
1740		be32_add(&agf->agf_freeblks, len);
1741		xfs_trans_agblocks_delta(tp, len);
1742		pag->pagf_freeblks += len;
1743		XFS_WANT_CORRUPTED_GOTO(
1744			be32_to_cpu(agf->agf_freeblks) <=
1745			be32_to_cpu(agf->agf_length),
1746			error0);
1747		TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
1748		xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
1749		if (!isfl)
1750			xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1751		XFS_STATS_INC(xs_freex);
1752		XFS_STATS_ADD(xs_freeb, len);
1753	}
1754	TRACE_FREE(haveleft ?
1755			(haveright ? "both" : "left") :
1756			(haveright ? "right" : "none"),
1757		agno, bno, len, isfl);
1758
1759	/*
1760	 * Since blocks move to the free list without the coordination
1761	 * used in xfs_bmap_finish, we can't allow block to be available
1762	 * for reallocation and non-transaction writing (user data)
1763	 * until we know that the transaction that moved it to the free
1764	 * list is permanently on disk.  We track the blocks by declaring
1765	 * these blocks as "busy"; the busy list is maintained on a per-ag
1766	 * basis and each transaction records which entries should be removed
1767	 * when the iclog commits to disk.  If a busy block is allocated,
1768	 * the iclog is pushed up to the LSN that freed the block.
1769	 */
1770	xfs_alloc_mark_busy(tp, agno, bno, len);
1771	return 0;
1772
1773 error0:
1774	TRACE_FREE("error", agno, bno, len, isfl);
1775	if (bno_cur)
1776		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1777	if (cnt_cur)
1778		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1779	return error;
1780}
1781
1782/*
1783 * Visible (exported) allocation/free functions.
1784 * Some of these are used just by xfs_alloc_btree.c and this file.
1785 */
1786
1787/*
1788 * Compute and fill in value of m_ag_maxlevels.
1789 */
1790void
1791xfs_alloc_compute_maxlevels(
1792	xfs_mount_t	*mp)	/* file system mount structure */
1793{
1794	int		level;
1795	uint		maxblocks;
1796	uint		maxleafents;
1797	int		minleafrecs;
1798	int		minnoderecs;
1799
1800	maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1801	minleafrecs = mp->m_alloc_mnr[0];
1802	minnoderecs = mp->m_alloc_mnr[1];
1803	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1804	for (level = 1; maxblocks > 1; level++)
1805		maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1806	mp->m_ag_maxlevels = level;
1807}
1808
1809/*
1810 * Decide whether to use this allocation group for this allocation.
1811 * If so, fix up the btree freelist's size.
1812 */
1813STATIC int			/* error */
1814xfs_alloc_fix_freelist(
1815	xfs_alloc_arg_t	*args,	/* allocation argument structure */
1816	int		flags)	/* XFS_ALLOC_FLAG_... */
1817{
1818	xfs_buf_t	*agbp;	/* agf buffer pointer */
1819	xfs_agf_t	*agf;	/* a.g. freespace structure pointer */
1820	xfs_buf_t	*agflbp;/* agfl buffer pointer */
1821	xfs_agblock_t	bno;	/* freelist block */
1822	xfs_extlen_t	delta;	/* new blocks needed in freelist */
1823	int		error;	/* error result code */
1824	xfs_extlen_t	longest;/* longest extent in allocation group */
1825	xfs_mount_t	*mp;	/* file system mount point structure */
1826	xfs_extlen_t	need;	/* total blocks needed in freelist */
1827	xfs_perag_t	*pag;	/* per-ag information structure */
1828	xfs_alloc_arg_t	targs;	/* local allocation arguments */
1829	xfs_trans_t	*tp;	/* transaction pointer */
1830
1831	mp = args->mp;
1832
1833	pag = args->pag;
1834	tp = args->tp;
1835	if (!pag->pagf_init) {
1836		if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1837				&agbp)))
1838			return error;
1839		if (!pag->pagf_init) {
1840			ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1841			ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1842			args->agbp = NULL;
1843			return 0;
1844		}
1845	} else
1846		agbp = NULL;
1847
1848	/*
1849	 * If this is a metadata preferred pag and we are user data
1850	 * then try somewhere else if we are not being asked to
1851	 * try harder at this point
1852	 */
1853	if (pag->pagf_metadata && args->userdata &&
1854	    (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1855		ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1856		args->agbp = NULL;
1857		return 0;
1858	}
1859
1860	if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1861		need = XFS_MIN_FREELIST_PAG(pag, mp);
1862		delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
1863		/*
1864		 * If it looks like there isn't a long enough extent, or enough
1865		 * total blocks, reject it.
1866		 */
1867		longest = (pag->pagf_longest > delta) ?
1868			(pag->pagf_longest - delta) :
1869			(pag->pagf_flcount > 0 || pag->pagf_longest > 0);
1870		if ((args->minlen + args->alignment + args->minalignslop - 1) >
1871				longest ||
1872		    ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1873			   need - args->total) < (int)args->minleft)) {
1874			if (agbp)
1875				xfs_trans_brelse(tp, agbp);
1876			args->agbp = NULL;
1877			return 0;
1878		}
1879	}
1880
1881	/*
1882	 * Get the a.g. freespace buffer.
1883	 * Can fail if we're not blocking on locks, and it's held.
1884	 */
1885	if (agbp == NULL) {
1886		if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1887				&agbp)))
1888			return error;
1889		if (agbp == NULL) {
1890			ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1891			ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1892			args->agbp = NULL;
1893			return 0;
1894		}
1895	}
1896	/*
1897	 * Figure out how many blocks we should have in the freelist.
1898	 */
1899	agf = XFS_BUF_TO_AGF(agbp);
1900	need = XFS_MIN_FREELIST(agf, mp);
1901	/*
1902	 * If there isn't enough total or single-extent, reject it.
1903	 */
1904	if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1905		delta = need > be32_to_cpu(agf->agf_flcount) ?
1906			(need - be32_to_cpu(agf->agf_flcount)) : 0;
1907		longest = be32_to_cpu(agf->agf_longest);
1908		longest = (longest > delta) ? (longest - delta) :
1909			(be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1910		if ((args->minlen + args->alignment + args->minalignslop - 1) >
1911				longest ||
1912		    ((int)(be32_to_cpu(agf->agf_freeblks) +
1913		     be32_to_cpu(agf->agf_flcount) - need - args->total) <
1914				(int)args->minleft)) {
1915			xfs_trans_brelse(tp, agbp);
1916			args->agbp = NULL;
1917			return 0;
1918		}
1919	}
1920	/*
1921	 * Make the freelist shorter if it's too long.
1922	 */
1923	while (be32_to_cpu(agf->agf_flcount) > need) {
1924		xfs_buf_t	*bp;
1925
1926		if ((error = xfs_alloc_get_freelist(tp, agbp, &bno)))
1927			return error;
1928		if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1929			return error;
1930		bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1931		xfs_trans_binval(tp, bp);
1932	}
1933	/*
1934	 * Initialize the args structure.
1935	 */
1936	targs.tp = tp;
1937	targs.mp = mp;
1938	targs.agbp = agbp;
1939	targs.agno = args->agno;
1940	targs.mod = targs.minleft = targs.wasdel = targs.userdata =
1941		targs.minalignslop = 0;
1942	targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1943	targs.type = XFS_ALLOCTYPE_THIS_AG;
1944	targs.pag = pag;
1945	if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
1946		return error;
1947	/*
1948	 * Make the freelist longer if it's too short.
1949	 */
1950	while (be32_to_cpu(agf->agf_flcount) < need) {
1951		targs.agbno = 0;
1952		targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
1953		/*
1954		 * Allocate as many blocks as possible at once.
1955		 */
1956		if ((error = xfs_alloc_ag_vextent(&targs))) {
1957			xfs_trans_brelse(tp, agflbp);
1958			return error;
1959		}
1960		/*
1961		 * Stop if we run out.  Won't happen if callers are obeying
1962		 * the restrictions correctly.  Can happen for free calls
1963		 * on a completely full ag.
1964		 */
1965		if (targs.agbno == NULLAGBLOCK) {
1966			if (flags & XFS_ALLOC_FLAG_FREEING)
1967				break;
1968			xfs_trans_brelse(tp, agflbp);
1969			args->agbp = NULL;
1970			return 0;
1971		}
1972		/*
1973		 * Put each allocated block on the list.
1974		 */
1975		for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
1976			if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp,
1977					bno)))
1978				return error;
1979		}
1980	}
1981	xfs_trans_brelse(tp, agflbp);
1982	args->agbp = agbp;
1983	return 0;
1984}
1985
1986/*
1987 * Get a block from the freelist.
1988 * Returns with the buffer for the block gotten.
1989 */
1990int				/* error */
1991xfs_alloc_get_freelist(
1992	xfs_trans_t	*tp,	/* transaction pointer */
1993	xfs_buf_t	*agbp,	/* buffer containing the agf structure */
1994	xfs_agblock_t	*bnop)	/* block address retrieved from freelist */
1995{
1996	xfs_agf_t	*agf;	/* a.g. freespace structure */
1997	xfs_agfl_t	*agfl;	/* a.g. freelist structure */
1998	xfs_buf_t	*agflbp;/* buffer for a.g. freelist structure */
1999	xfs_agblock_t	bno;	/* block number returned */
2000	int		error;
2001#ifdef XFS_ALLOC_TRACE
2002	static char	fname[] = "xfs_alloc_get_freelist";
2003#endif
2004	xfs_mount_t	*mp;	/* mount structure */
2005	xfs_perag_t	*pag;	/* per allocation group data */
2006
2007	agf = XFS_BUF_TO_AGF(agbp);
2008	/*
2009	 * Freelist is empty, give up.
2010	 */
2011	if (!agf->agf_flcount) {
2012		*bnop = NULLAGBLOCK;
2013		return 0;
2014	}
2015	/*
2016	 * Read the array of free blocks.
2017	 */
2018	mp = tp->t_mountp;
2019	if ((error = xfs_alloc_read_agfl(mp, tp,
2020			be32_to_cpu(agf->agf_seqno), &agflbp)))
2021		return error;
2022	agfl = XFS_BUF_TO_AGFL(agflbp);
2023	/*
2024	 * Get the block number and update the data structures.
2025	 */
2026	bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2027	be32_add(&agf->agf_flfirst, 1);
2028	xfs_trans_brelse(tp, agflbp);
2029	if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2030		agf->agf_flfirst = 0;
2031	pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
2032	be32_add(&agf->agf_flcount, -1);
2033	xfs_trans_agflist_delta(tp, -1);
2034	pag->pagf_flcount--;
2035	TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
2036	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
2037	*bnop = bno;
2038
2039	/*
2040	 * As blocks are freed, they are added to the per-ag busy list
2041	 * and remain there until the freeing transaction is committed to
2042	 * disk.  Now that we have allocated blocks, this list must be
2043	 * searched to see if a block is being reused.  If one is, then
2044	 * the freeing transaction must be pushed to disk NOW by forcing
2045	 * to disk all iclogs up that transaction's LSN.
2046	 */
2047	xfs_alloc_search_busy(tp, be32_to_cpu(agf->agf_seqno), bno, 1);
2048	return 0;
2049}
2050
2051/*
2052 * Log the given fields from the agf structure.
2053 */
2054void
2055xfs_alloc_log_agf(
2056	xfs_trans_t	*tp,	/* transaction pointer */
2057	xfs_buf_t	*bp,	/* buffer for a.g. freelist header */
2058	int		fields)	/* mask of fields to be logged (XFS_AGF_...) */
2059{
2060	int	first;		/* first byte offset */
2061	int	last;		/* last byte offset */
2062	static const short	offsets[] = {
2063		offsetof(xfs_agf_t, agf_magicnum),
2064		offsetof(xfs_agf_t, agf_versionnum),
2065		offsetof(xfs_agf_t, agf_seqno),
2066		offsetof(xfs_agf_t, agf_length),
2067		offsetof(xfs_agf_t, agf_roots[0]),
2068		offsetof(xfs_agf_t, agf_levels[0]),
2069		offsetof(xfs_agf_t, agf_flfirst),
2070		offsetof(xfs_agf_t, agf_fllast),
2071		offsetof(xfs_agf_t, agf_flcount),
2072		offsetof(xfs_agf_t, agf_freeblks),
2073		offsetof(xfs_agf_t, agf_longest),
2074		sizeof(xfs_agf_t)
2075	};
2076
2077	xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2078	xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2079}
2080
2081/*
2082 * Interface for inode allocation to force the pag data to be initialized.
2083 */
2084int					/* error */
2085xfs_alloc_pagf_init(
2086	xfs_mount_t		*mp,	/* file system mount structure */
2087	xfs_trans_t		*tp,	/* transaction pointer */
2088	xfs_agnumber_t		agno,	/* allocation group number */
2089	int			flags)	/* XFS_ALLOC_FLAGS_... */
2090{
2091	xfs_buf_t		*bp;
2092	int			error;
2093
2094	if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2095		return error;
2096	if (bp)
2097		xfs_trans_brelse(tp, bp);
2098	return 0;
2099}
2100
2101/*
2102 * Put the block on the freelist for the allocation group.
2103 */
2104int					/* error */
2105xfs_alloc_put_freelist(
2106	xfs_trans_t		*tp,	/* transaction pointer */
2107	xfs_buf_t		*agbp,	/* buffer for a.g. freelist header */
2108	xfs_buf_t		*agflbp,/* buffer for a.g. free block array */
2109	xfs_agblock_t		bno)	/* block being freed */
2110{
2111	xfs_agf_t		*agf;	/* a.g. freespace structure */
2112	xfs_agfl_t		*agfl;	/* a.g. free block array */
2113	__be32			*blockp;/* pointer to array entry */
2114	int			error;
2115#ifdef XFS_ALLOC_TRACE
2116	static char		fname[] = "xfs_alloc_put_freelist";
2117#endif
2118	xfs_mount_t		*mp;	/* mount structure */
2119	xfs_perag_t		*pag;	/* per allocation group data */
2120
2121	agf = XFS_BUF_TO_AGF(agbp);
2122	mp = tp->t_mountp;
2123
2124	if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2125			be32_to_cpu(agf->agf_seqno), &agflbp)))
2126		return error;
2127	agfl = XFS_BUF_TO_AGFL(agflbp);
2128	be32_add(&agf->agf_fllast, 1);
2129	if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2130		agf->agf_fllast = 0;
2131	pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
2132	be32_add(&agf->agf_flcount, 1);
2133	xfs_trans_agflist_delta(tp, 1);
2134	pag->pagf_flcount++;
2135	ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2136	blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
2137	*blockp = cpu_to_be32(bno);
2138	TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
2139	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
2140	xfs_trans_log_buf(tp, agflbp,
2141		(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
2142		(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
2143			sizeof(xfs_agblock_t) - 1));
2144	return 0;
2145}
2146
2147/*
2148 * Read in the allocation group header (free/alloc section).
2149 */
2150int					/* error */
2151xfs_alloc_read_agf(
2152	xfs_mount_t	*mp,		/* mount point structure */
2153	xfs_trans_t	*tp,		/* transaction pointer */
2154	xfs_agnumber_t	agno,		/* allocation group number */
2155	int		flags,		/* XFS_ALLOC_FLAG_... */
2156	xfs_buf_t	**bpp)		/* buffer for the ag freelist header */
2157{
2158	xfs_agf_t	*agf;		/* ag freelist header */
2159	int		agf_ok;		/* set if agf is consistent */
2160	xfs_buf_t	*bp;		/* return value */
2161	xfs_perag_t	*pag;		/* per allocation group data */
2162	int		error;
2163
2164	ASSERT(agno != NULLAGNUMBER);
2165	error = xfs_trans_read_buf(
2166			mp, tp, mp->m_ddev_targp,
2167			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2168			XFS_FSS_TO_BB(mp, 1),
2169			(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U,
2170			&bp);
2171	if (error)
2172		return error;
2173	ASSERT(!bp || !XFS_BUF_GETERROR(bp));
2174	if (!bp) {
2175		*bpp = NULL;
2176		return 0;
2177	}
2178	/*
2179	 * Validate the magic number of the agf block.
2180	 */
2181	agf = XFS_BUF_TO_AGF(bp);
2182	agf_ok =
2183		be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
2184		XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2185		be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2186		be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2187		be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2188		be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
2189	if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2190			XFS_RANDOM_ALLOC_READ_AGF))) {
2191		XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
2192				     XFS_ERRLEVEL_LOW, mp, agf);
2193		xfs_trans_brelse(tp, bp);
2194		return XFS_ERROR(EFSCORRUPTED);
2195	}
2196	pag = &mp->m_perag[agno];
2197	if (!pag->pagf_init) {
2198		pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2199		pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2200		pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2201		pag->pagf_levels[XFS_BTNUM_BNOi] =
2202			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2203		pag->pagf_levels[XFS_BTNUM_CNTi] =
2204			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2205		spinlock_init(&pag->pagb_lock, "xfspagb");
2206		pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS *
2207					sizeof(xfs_perag_busy_t), KM_SLEEP);
2208		pag->pagf_init = 1;
2209	}
2210#ifdef DEBUG
2211	else if (!XFS_FORCED_SHUTDOWN(mp)) {
2212		ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2213		ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2214		ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2215		ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2216		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2217		ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2218		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2219	}
2220#endif
2221	XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF);
2222	*bpp = bp;
2223	return 0;
2224}
2225
2226/*
2227 * Allocate an extent (variable-size).
2228 * Depending on the allocation type, we either look in a single allocation
2229 * group or loop over the allocation groups to find the result.
2230 */
2231int				/* error */
2232xfs_alloc_vextent(
2233	xfs_alloc_arg_t	*args)	/* allocation argument structure */
2234{
2235	xfs_agblock_t	agsize;	/* allocation group size */
2236	int		error;
2237	int		flags;	/* XFS_ALLOC_FLAG_... locking flags */
2238#ifdef XFS_ALLOC_TRACE
2239	static char	fname[] = "xfs_alloc_vextent";
2240#endif
2241	xfs_extlen_t	minleft;/* minimum left value, temp copy */
2242	xfs_mount_t	*mp;	/* mount structure pointer */
2243	xfs_agnumber_t	sagno;	/* starting allocation group number */
2244	xfs_alloctype_t	type;	/* input allocation type */
2245	int		bump_rotor = 0;
2246	int		no_min = 0;
2247	xfs_agnumber_t	rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2248
2249	mp = args->mp;
2250	type = args->otype = args->type;
2251	args->agbno = NULLAGBLOCK;
2252	/*
2253	 * Just fix this up, for the case where the last a.g. is shorter
2254	 * (or there's only one a.g.) and the caller couldn't easily figure
2255	 * that out (xfs_bmap_alloc).
2256	 */
2257	agsize = mp->m_sb.sb_agblocks;
2258	if (args->maxlen > agsize)
2259		args->maxlen = agsize;
2260	if (args->alignment == 0)
2261		args->alignment = 1;
2262	ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2263	ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2264	ASSERT(args->minlen <= args->maxlen);
2265	ASSERT(args->minlen <= agsize);
2266	ASSERT(args->mod < args->prod);
2267	if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2268	    XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2269	    args->minlen > args->maxlen || args->minlen > agsize ||
2270	    args->mod >= args->prod) {
2271		args->fsbno = NULLFSBLOCK;
2272		TRACE_ALLOC("badargs", args);
2273		return 0;
2274	}
2275	minleft = args->minleft;
2276
2277	switch (type) {
2278	case XFS_ALLOCTYPE_THIS_AG:
2279	case XFS_ALLOCTYPE_NEAR_BNO:
2280	case XFS_ALLOCTYPE_THIS_BNO:
2281		/*
2282		 * These three force us into a single a.g.
2283		 */
2284		args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2285		down_read(&mp->m_peraglock);
2286		args->pag = &mp->m_perag[args->agno];
2287		args->minleft = 0;
2288		error = xfs_alloc_fix_freelist(args, 0);
2289		args->minleft = minleft;
2290		if (error) {
2291			TRACE_ALLOC("nofix", args);
2292			goto error0;
2293		}
2294		if (!args->agbp) {
2295			up_read(&mp->m_peraglock);
2296			TRACE_ALLOC("noagbp", args);
2297			break;
2298		}
2299		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2300		if ((error = xfs_alloc_ag_vextent(args)))
2301			goto error0;
2302		up_read(&mp->m_peraglock);
2303		break;
2304	case XFS_ALLOCTYPE_START_BNO:
2305		/*
2306		 * Try near allocation first, then anywhere-in-ag after
2307		 * the first a.g. fails.
2308		 */
2309		if ((args->userdata  == XFS_ALLOC_INITIAL_USER_DATA) &&
2310		    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2311			args->fsbno = XFS_AGB_TO_FSB(mp,
2312					((mp->m_agfrotor / rotorstep) %
2313					mp->m_sb.sb_agcount), 0);
2314			bump_rotor = 1;
2315		}
2316		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2317		args->type = XFS_ALLOCTYPE_NEAR_BNO;
2318		/* FALLTHROUGH */
2319	case XFS_ALLOCTYPE_ANY_AG:
2320	case XFS_ALLOCTYPE_START_AG:
2321	case XFS_ALLOCTYPE_FIRST_AG:
2322		/*
2323		 * Rotate through the allocation groups looking for a winner.
2324		 */
2325		if (type == XFS_ALLOCTYPE_ANY_AG) {
2326			/*
2327			 * Start with the last place we left off.
2328			 */
2329			args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2330					mp->m_sb.sb_agcount;
2331			args->type = XFS_ALLOCTYPE_THIS_AG;
2332			flags = XFS_ALLOC_FLAG_TRYLOCK;
2333		} else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2334			/*
2335			 * Start with allocation group given by bno.
2336			 */
2337			args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2338			args->type = XFS_ALLOCTYPE_THIS_AG;
2339			sagno = 0;
2340			flags = 0;
2341		} else {
2342			if (type == XFS_ALLOCTYPE_START_AG)
2343				args->type = XFS_ALLOCTYPE_THIS_AG;
2344			/*
2345			 * Start with the given allocation group.
2346			 */
2347			args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2348			flags = XFS_ALLOC_FLAG_TRYLOCK;
2349		}
2350		/*
2351		 * Loop over allocation groups twice; first time with
2352		 * trylock set, second time without.
2353		 */
2354		down_read(&mp->m_peraglock);
2355		for (;;) {
2356			args->pag = &mp->m_perag[args->agno];
2357			if (no_min) args->minleft = 0;
2358			error = xfs_alloc_fix_freelist(args, flags);
2359			args->minleft = minleft;
2360			if (error) {
2361				TRACE_ALLOC("nofix", args);
2362				goto error0;
2363			}
2364			/*
2365			 * If we get a buffer back then the allocation will fly.
2366			 */
2367			if (args->agbp) {
2368				if ((error = xfs_alloc_ag_vextent(args)))
2369					goto error0;
2370				break;
2371			}
2372			TRACE_ALLOC("loopfailed", args);
2373			/*
2374			 * Didn't work, figure out the next iteration.
2375			 */
2376			if (args->agno == sagno &&
2377			    type == XFS_ALLOCTYPE_START_BNO)
2378				args->type = XFS_ALLOCTYPE_THIS_AG;
2379			/*
2380			* For the first allocation, we can try any AG to get
2381			* space.  However, if we already have allocated a
2382			* block, we don't want to try AGs whose number is below
2383			* sagno. Otherwise, we may end up with out-of-order
2384			* locking of AGF, which might cause deadlock.
2385			*/
2386			if (++(args->agno) == mp->m_sb.sb_agcount) {
2387				if (args->firstblock != NULLFSBLOCK)
2388					args->agno = sagno;
2389				else
2390					args->agno = 0;
2391			}
2392			/*
2393			 * Reached the starting a.g., must either be done
2394			 * or switch to non-trylock mode.
2395			 */
2396			if (args->agno == sagno) {
2397				if (no_min == 1) {
2398					args->agbno = NULLAGBLOCK;
2399					TRACE_ALLOC("allfailed", args);
2400					break;
2401				}
2402				if (flags == 0) {
2403					no_min = 1;
2404				} else {
2405					flags = 0;
2406					if (type == XFS_ALLOCTYPE_START_BNO) {
2407						args->agbno = XFS_FSB_TO_AGBNO(mp,
2408							args->fsbno);
2409						args->type = XFS_ALLOCTYPE_NEAR_BNO;
2410					}
2411				}
2412			}
2413		}
2414		up_read(&mp->m_peraglock);
2415		if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2416			if (args->agno == sagno)
2417				mp->m_agfrotor = (mp->m_agfrotor + 1) %
2418					(mp->m_sb.sb_agcount * rotorstep);
2419			else
2420				mp->m_agfrotor = (args->agno * rotorstep + 1) %
2421					(mp->m_sb.sb_agcount * rotorstep);
2422		}
2423		break;
2424	default:
2425		ASSERT(0);
2426		/* NOTREACHED */
2427	}
2428	if (args->agbno == NULLAGBLOCK)
2429		args->fsbno = NULLFSBLOCK;
2430	else {
2431		args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2432#ifdef DEBUG
2433		ASSERT(args->len >= args->minlen);
2434		ASSERT(args->len <= args->maxlen);
2435		ASSERT(args->agbno % args->alignment == 0);
2436		XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2437			args->len);
2438#endif
2439	}
2440	return 0;
2441error0:
2442	up_read(&mp->m_peraglock);
2443	return error;
2444}
2445
2446/*
2447 * Free an extent.
2448 * Just break up the extent address and hand off to xfs_free_ag_extent
2449 * after fixing up the freelist.
2450 */
2451int				/* error */
2452xfs_free_extent(
2453	xfs_trans_t	*tp,	/* transaction pointer */
2454	xfs_fsblock_t	bno,	/* starting block number of extent */
2455	xfs_extlen_t	len)	/* length of extent */
2456{
2457	xfs_alloc_arg_t	args;
2458	int		error;
2459
2460	ASSERT(len != 0);
2461	memset(&args, 0, sizeof(xfs_alloc_arg_t));
2462	args.tp = tp;
2463	args.mp = tp->t_mountp;
2464	args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2465	ASSERT(args.agno < args.mp->m_sb.sb_agcount);
2466	args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2467	down_read(&args.mp->m_peraglock);
2468	args.pag = &args.mp->m_perag[args.agno];
2469	if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
2470		goto error0;
2471#ifdef DEBUG
2472	ASSERT(args.agbp != NULL);
2473	ASSERT((args.agbno + len) <=
2474		be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length));
2475#endif
2476	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2477error0:
2478	up_read(&args.mp->m_peraglock);
2479	return error;
2480}
2481
2482
2483/*
2484 * AG Busy list management
2485 * The busy list contains block ranges that have been freed but whose
2486 * transactions have not yet hit disk.  If any block listed in a busy
2487 * list is reused, the transaction that freed it must be forced to disk
2488 * before continuing to use the block.
2489 *
2490 * xfs_alloc_mark_busy - add to the per-ag busy list
2491 * xfs_alloc_clear_busy - remove an item from the per-ag busy list
2492 */
2493void
2494xfs_alloc_mark_busy(xfs_trans_t *tp,
2495		    xfs_agnumber_t agno,
2496		    xfs_agblock_t bno,
2497		    xfs_extlen_t len)
2498{
2499	xfs_mount_t		*mp;
2500	xfs_perag_busy_t	*bsy;
2501	int			n;
2502	SPLDECL(s);
2503
2504	mp = tp->t_mountp;
2505	s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
2506
2507	/* search pagb_list for an open slot */
2508	for (bsy = mp->m_perag[agno].pagb_list, n = 0;
2509	     n < XFS_PAGB_NUM_SLOTS;
2510	     bsy++, n++) {
2511		if (bsy->busy_tp == NULL) {
2512			break;
2513		}
2514	}
2515
2516	if (n < XFS_PAGB_NUM_SLOTS) {
2517		bsy = &mp->m_perag[agno].pagb_list[n];
2518		mp->m_perag[agno].pagb_count++;
2519		TRACE_BUSY("xfs_alloc_mark_busy", "got", agno, bno, len, n, tp);
2520		bsy->busy_start = bno;
2521		bsy->busy_length = len;
2522		bsy->busy_tp = tp;
2523		xfs_trans_add_busy(tp, agno, n);
2524	} else {
2525		TRACE_BUSY("xfs_alloc_mark_busy", "FULL", agno, bno, len, -1, tp);
2526		/*
2527		 * The busy list is full!  Since it is now not possible to
2528		 * track the free block, make this a synchronous transaction
2529		 * to insure that the block is not reused before this
2530		 * transaction commits.
2531		 */
2532		xfs_trans_set_sync(tp);
2533	}
2534
2535	mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
2536}
2537
2538void
2539xfs_alloc_clear_busy(xfs_trans_t *tp,
2540		     xfs_agnumber_t agno,
2541		     int idx)
2542{
2543	xfs_mount_t		*mp;
2544	xfs_perag_busy_t	*list;
2545	SPLDECL(s);
2546
2547	mp = tp->t_mountp;
2548
2549	s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
2550	list = mp->m_perag[agno].pagb_list;
2551
2552	ASSERT(idx < XFS_PAGB_NUM_SLOTS);
2553	if (list[idx].busy_tp == tp) {
2554		TRACE_UNBUSY("xfs_alloc_clear_busy", "found", agno, idx, tp);
2555		list[idx].busy_tp = NULL;
2556		mp->m_perag[agno].pagb_count--;
2557	} else {
2558		TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp);
2559	}
2560
2561	mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
2562}
2563
2564
2565/*
2566 * returns non-zero if any of (agno,bno):len is in a busy list
2567 */
2568STATIC int
2569xfs_alloc_search_busy(xfs_trans_t *tp,
2570		    xfs_agnumber_t agno,
2571		    xfs_agblock_t bno,
2572		    xfs_extlen_t len)
2573{
2574	xfs_mount_t		*mp;
2575	xfs_perag_busy_t	*bsy;
2576	int			n;
2577	xfs_agblock_t		uend, bend;
2578	xfs_lsn_t		lsn;
2579	int			cnt;
2580	SPLDECL(s);
2581
2582	mp = tp->t_mountp;
2583
2584	s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
2585	cnt = mp->m_perag[agno].pagb_count;
2586
2587	uend = bno + len - 1;
2588
2589	/* search pagb_list for this slot, skipping open slots */
2590	for (bsy = mp->m_perag[agno].pagb_list, n = 0;
2591	     cnt; bsy++, n++) {
2592
2593		/*
2594		 * (start1,length1) within (start2, length2)
2595		 */
2596		if (bsy->busy_tp != NULL) {
2597			bend = bsy->busy_start + bsy->busy_length - 1;
2598			if ((bno > bend) ||
2599			    (uend < bsy->busy_start)) {
2600				cnt--;
2601			} else {
2602				TRACE_BUSYSEARCH("xfs_alloc_search_busy",
2603						 "found1", agno, bno, len, n,
2604						 tp);
2605				break;
2606			}
2607		}
2608	}
2609
2610	/*
2611	 * If a block was found, force the log through the LSN of the
2612	 * transaction that freed the block
2613	 */
2614	if (cnt) {
2615		TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp);
2616		lsn = bsy->busy_tp->t_commit_lsn;
2617		mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
2618		xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
2619	} else {
2620		TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp);
2621		n = -1;
2622		mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
2623	}
2624
2625	return n;
2626}
2627