1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h"
15#include "xfs_defer.h"
16#include "xfs_dir2.h"
17#include "xfs_inode.h"
18#include "xfs_btree.h"
19#include "xfs_trans.h"
20#include "xfs_alloc.h"
21#include "xfs_bmap.h"
22#include "xfs_bmap_util.h"
23#include "xfs_bmap_btree.h"
24#include "xfs_rtbitmap.h"
25#include "xfs_errortag.h"
26#include "xfs_error.h"
27#include "xfs_quota.h"
28#include "xfs_trans_space.h"
29#include "xfs_buf_item.h"
30#include "xfs_trace.h"
31#include "xfs_attr_leaf.h"
32#include "xfs_filestream.h"
33#include "xfs_rmap.h"
34#include "xfs_ag.h"
35#include "xfs_ag_resv.h"
36#include "xfs_refcount.h"
37#include "xfs_icache.h"
38#include "xfs_iomap.h"
39#include "xfs_health.h"
40#include "xfs_bmap_item.h"
41#include "xfs_symlink_remote.h"
42
43struct kmem_cache		*xfs_bmap_intent_cache;
44
45/*
46 * Miscellaneous helper functions
47 */
48
49/*
50 * Compute and fill in the value of the maximum depth of a bmap btree
51 * in this filesystem.  Done once, during mount.
52 */
53void
54xfs_bmap_compute_maxlevels(
55	xfs_mount_t	*mp,		/* file system mount structure */
56	int		whichfork)	/* data or attr fork */
57{
58	uint64_t	maxblocks;	/* max blocks at this level */
59	xfs_extnum_t	maxleafents;	/* max leaf entries possible */
60	int		level;		/* btree level */
61	int		maxrootrecs;	/* max records in root block */
62	int		minleafrecs;	/* min records in leaf block */
63	int		minnoderecs;	/* min records in node block */
64	int		sz;		/* root block size */
65
66	/*
67	 * The maximum number of extents in a fork, hence the maximum number of
68	 * leaf entries, is controlled by the size of the on-disk extent count.
69	 *
70	 * Note that we can no longer assume that if we are in ATTR1 that the
71	 * fork offset of all the inodes will be
72	 * (xfs_default_attroffset(ip) >> 3) because we could have mounted with
73	 * ATTR2 and then mounted back with ATTR1, keeping the i_forkoff's fixed
74	 * but probably at various positions. Therefore, for both ATTR1 and
75	 * ATTR2 we have to assume the worst case scenario of a minimum size
76	 * available.
77	 */
78	maxleafents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
79				whichfork);
80	if (whichfork == XFS_DATA_FORK)
81		sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
82	else
83		sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
84
85	maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
86	minleafrecs = mp->m_bmap_dmnr[0];
87	minnoderecs = mp->m_bmap_dmnr[1];
88	maxblocks = howmany_64(maxleafents, minleafrecs);
89	for (level = 1; maxblocks > 1; level++) {
90		if (maxblocks <= maxrootrecs)
91			maxblocks = 1;
92		else
93			maxblocks = howmany_64(maxblocks, minnoderecs);
94	}
95	mp->m_bm_maxlevels[whichfork] = level;
96	ASSERT(mp->m_bm_maxlevels[whichfork] <= xfs_bmbt_maxlevels_ondisk());
97}
98
99unsigned int
100xfs_bmap_compute_attr_offset(
101	struct xfs_mount	*mp)
102{
103	if (mp->m_sb.sb_inodesize == 256)
104		return XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
105	return XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
106}
107
108STATIC int				/* error */
109xfs_bmbt_lookup_eq(
110	struct xfs_btree_cur	*cur,
111	struct xfs_bmbt_irec	*irec,
112	int			*stat)	/* success/failure */
113{
114	cur->bc_rec.b = *irec;
115	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
116}
117
118STATIC int				/* error */
119xfs_bmbt_lookup_first(
120	struct xfs_btree_cur	*cur,
121	int			*stat)	/* success/failure */
122{
123	cur->bc_rec.b.br_startoff = 0;
124	cur->bc_rec.b.br_startblock = 0;
125	cur->bc_rec.b.br_blockcount = 0;
126	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
127}
128
129/*
130 * Check if the inode needs to be converted to btree format.
131 */
132static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
133{
134	struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
135
136	return whichfork != XFS_COW_FORK &&
137		ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
138		ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
139}
140
141/*
142 * Check if the inode should be converted to extent format.
143 */
144static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
145{
146	struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
147
148	return whichfork != XFS_COW_FORK &&
149		ifp->if_format == XFS_DINODE_FMT_BTREE &&
150		ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
151}
152
153/*
154 * Update the record referred to by cur to the value given by irec
155 * This either works (return 0) or gets an EFSCORRUPTED error.
156 */
157STATIC int
158xfs_bmbt_update(
159	struct xfs_btree_cur	*cur,
160	struct xfs_bmbt_irec	*irec)
161{
162	union xfs_btree_rec	rec;
163
164	xfs_bmbt_disk_set_all(&rec.bmbt, irec);
165	return xfs_btree_update(cur, &rec);
166}
167
168/*
169 * Compute the worst-case number of indirect blocks that will be used
170 * for ip's delayed extent of length "len".
171 */
172STATIC xfs_filblks_t
173xfs_bmap_worst_indlen(
174	xfs_inode_t	*ip,		/* incore inode pointer */
175	xfs_filblks_t	len)		/* delayed extent length */
176{
177	int		level;		/* btree level number */
178	int		maxrecs;	/* maximum record count at this level */
179	xfs_mount_t	*mp;		/* mount structure */
180	xfs_filblks_t	rval;		/* return value */
181
182	mp = ip->i_mount;
183	maxrecs = mp->m_bmap_dmxr[0];
184	for (level = 0, rval = 0;
185	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
186	     level++) {
187		len += maxrecs - 1;
188		do_div(len, maxrecs);
189		rval += len;
190		if (len == 1)
191			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
192				level - 1;
193		if (level == 0)
194			maxrecs = mp->m_bmap_dmxr[1];
195	}
196	return rval;
197}
198
199/*
200 * Calculate the default attribute fork offset for newly created inodes.
201 */
202uint
203xfs_default_attroffset(
204	struct xfs_inode	*ip)
205{
206	if (ip->i_df.if_format == XFS_DINODE_FMT_DEV)
207		return roundup(sizeof(xfs_dev_t), 8);
208	return M_IGEO(ip->i_mount)->attr_fork_offset;
209}
210
211/*
212 * Helper routine to reset inode i_forkoff field when switching attribute fork
213 * from local to extent format - we reset it where possible to make space
214 * available for inline data fork extents.
215 */
216STATIC void
217xfs_bmap_forkoff_reset(
218	xfs_inode_t	*ip,
219	int		whichfork)
220{
221	if (whichfork == XFS_ATTR_FORK &&
222	    ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
223	    ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
224		uint	dfl_forkoff = xfs_default_attroffset(ip) >> 3;
225
226		if (dfl_forkoff > ip->i_forkoff)
227			ip->i_forkoff = dfl_forkoff;
228	}
229}
230
231static int
232xfs_bmap_read_buf(
233	struct xfs_mount	*mp,		/* file system mount point */
234	struct xfs_trans	*tp,		/* transaction pointer */
235	xfs_fsblock_t		fsbno,		/* file system block number */
236	struct xfs_buf		**bpp)		/* buffer for fsbno */
237{
238	struct xfs_buf		*bp;		/* return value */
239	int			error;
240
241	if (!xfs_verify_fsbno(mp, fsbno))
242		return -EFSCORRUPTED;
243	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
244			XFS_FSB_TO_DADDR(mp, fsbno), mp->m_bsize, 0, &bp,
245			&xfs_bmbt_buf_ops);
246	if (!error) {
247		xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
248		*bpp = bp;
249	}
250	return error;
251}
252
253#ifdef DEBUG
254STATIC struct xfs_buf *
255xfs_bmap_get_bp(
256	struct xfs_btree_cur	*cur,
257	xfs_fsblock_t		bno)
258{
259	struct xfs_log_item	*lip;
260	int			i;
261
262	if (!cur)
263		return NULL;
264
265	for (i = 0; i < cur->bc_maxlevels; i++) {
266		if (!cur->bc_levels[i].bp)
267			break;
268		if (xfs_buf_daddr(cur->bc_levels[i].bp) == bno)
269			return cur->bc_levels[i].bp;
270	}
271
272	/* Chase down all the log items to see if the bp is there */
273	list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
274		struct xfs_buf_log_item	*bip = (struct xfs_buf_log_item *)lip;
275
276		if (bip->bli_item.li_type == XFS_LI_BUF &&
277		    xfs_buf_daddr(bip->bli_buf) == bno)
278			return bip->bli_buf;
279	}
280
281	return NULL;
282}
283
284STATIC void
285xfs_check_block(
286	struct xfs_btree_block	*block,
287	xfs_mount_t		*mp,
288	int			root,
289	short			sz)
290{
291	int			i, j, dmxr;
292	__be64			*pp, *thispa;	/* pointer to block address */
293	xfs_bmbt_key_t		*prevp, *keyp;
294
295	ASSERT(be16_to_cpu(block->bb_level) > 0);
296
297	prevp = NULL;
298	for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
299		dmxr = mp->m_bmap_dmxr[0];
300		keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
301
302		if (prevp) {
303			ASSERT(be64_to_cpu(prevp->br_startoff) <
304			       be64_to_cpu(keyp->br_startoff));
305		}
306		prevp = keyp;
307
308		/*
309		 * Compare the block numbers to see if there are dups.
310		 */
311		if (root)
312			pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
313		else
314			pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
315
316		for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
317			if (root)
318				thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
319			else
320				thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
321			if (*thispa == *pp) {
322				xfs_warn(mp, "%s: thispa(%d) == pp(%d) %lld",
323					__func__, j, i,
324					(unsigned long long)be64_to_cpu(*thispa));
325				xfs_err(mp, "%s: ptrs are equal in node\n",
326					__func__);
327				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
328			}
329		}
330	}
331}
332
333/*
334 * Check that the extents for the inode ip are in the right order in all
335 * btree leaves. THis becomes prohibitively expensive for large extent count
336 * files, so don't bother with inodes that have more than 10,000 extents in
337 * them. The btree record ordering checks will still be done, so for such large
338 * bmapbt constructs that is going to catch most corruptions.
339 */
340STATIC void
341xfs_bmap_check_leaf_extents(
342	struct xfs_btree_cur	*cur,	/* btree cursor or null */
343	xfs_inode_t		*ip,		/* incore inode pointer */
344	int			whichfork)	/* data or attr fork */
345{
346	struct xfs_mount	*mp = ip->i_mount;
347	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
348	struct xfs_btree_block	*block;	/* current btree block */
349	xfs_fsblock_t		bno;	/* block # of "block" */
350	struct xfs_buf		*bp;	/* buffer for "block" */
351	int			error;	/* error return value */
352	xfs_extnum_t		i=0, j;	/* index into the extents list */
353	int			level;	/* btree level, for checking */
354	__be64			*pp;	/* pointer to block address */
355	xfs_bmbt_rec_t		*ep;	/* pointer to current extent */
356	xfs_bmbt_rec_t		last = {0, 0}; /* last extent in prev block */
357	xfs_bmbt_rec_t		*nextp;	/* pointer to next extent */
358	int			bp_release = 0;
359
360	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
361		return;
362
363	/* skip large extent count inodes */
364	if (ip->i_df.if_nextents > 10000)
365		return;
366
367	bno = NULLFSBLOCK;
368	block = ifp->if_broot;
369	/*
370	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
371	 */
372	level = be16_to_cpu(block->bb_level);
373	ASSERT(level > 0);
374	xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
375	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
376	bno = be64_to_cpu(*pp);
377
378	ASSERT(bno != NULLFSBLOCK);
379	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
380	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
381
382	/*
383	 * Go down the tree until leaf level is reached, following the first
384	 * pointer (leftmost) at each level.
385	 */
386	while (level-- > 0) {
387		/* See if buf is in cur first */
388		bp_release = 0;
389		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
390		if (!bp) {
391			bp_release = 1;
392			error = xfs_bmap_read_buf(mp, NULL, bno, &bp);
393			if (xfs_metadata_is_sick(error))
394				xfs_btree_mark_sick(cur);
395			if (error)
396				goto error_norelse;
397		}
398		block = XFS_BUF_TO_BLOCK(bp);
399		if (level == 0)
400			break;
401
402		/*
403		 * Check this block for basic sanity (increasing keys and
404		 * no duplicate blocks).
405		 */
406
407		xfs_check_block(block, mp, 0, 0);
408		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
409		bno = be64_to_cpu(*pp);
410		if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
411			xfs_btree_mark_sick(cur);
412			error = -EFSCORRUPTED;
413			goto error0;
414		}
415		if (bp_release) {
416			bp_release = 0;
417			xfs_trans_brelse(NULL, bp);
418		}
419	}
420
421	/*
422	 * Here with bp and block set to the leftmost leaf node in the tree.
423	 */
424	i = 0;
425
426	/*
427	 * Loop over all leaf nodes checking that all extents are in the right order.
428	 */
429	for (;;) {
430		xfs_fsblock_t	nextbno;
431		xfs_extnum_t	num_recs;
432
433
434		num_recs = xfs_btree_get_numrecs(block);
435
436		/*
437		 * Read-ahead the next leaf block, if any.
438		 */
439
440		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
441
442		/*
443		 * Check all the extents to make sure they are OK.
444		 * If we had a previous block, the last entry should
445		 * conform with the first entry in this one.
446		 */
447
448		ep = XFS_BMBT_REC_ADDR(mp, block, 1);
449		if (i) {
450			ASSERT(xfs_bmbt_disk_get_startoff(&last) +
451			       xfs_bmbt_disk_get_blockcount(&last) <=
452			       xfs_bmbt_disk_get_startoff(ep));
453		}
454		for (j = 1; j < num_recs; j++) {
455			nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
456			ASSERT(xfs_bmbt_disk_get_startoff(ep) +
457			       xfs_bmbt_disk_get_blockcount(ep) <=
458			       xfs_bmbt_disk_get_startoff(nextp));
459			ep = nextp;
460		}
461
462		last = *ep;
463		i += num_recs;
464		if (bp_release) {
465			bp_release = 0;
466			xfs_trans_brelse(NULL, bp);
467		}
468		bno = nextbno;
469		/*
470		 * If we've reached the end, stop.
471		 */
472		if (bno == NULLFSBLOCK)
473			break;
474
475		bp_release = 0;
476		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
477		if (!bp) {
478			bp_release = 1;
479			error = xfs_bmap_read_buf(mp, NULL, bno, &bp);
480			if (xfs_metadata_is_sick(error))
481				xfs_btree_mark_sick(cur);
482			if (error)
483				goto error_norelse;
484		}
485		block = XFS_BUF_TO_BLOCK(bp);
486	}
487
488	return;
489
490error0:
491	xfs_warn(mp, "%s: at error0", __func__);
492	if (bp_release)
493		xfs_trans_brelse(NULL, bp);
494error_norelse:
495	xfs_warn(mp, "%s: BAD after btree leaves for %llu extents",
496		__func__, i);
497	xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
498	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
499	return;
500}
501
502/*
503 * Validate that the bmbt_irecs being returned from bmapi are valid
504 * given the caller's original parameters.  Specifically check the
505 * ranges of the returned irecs to ensure that they only extend beyond
506 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
507 */
508STATIC void
509xfs_bmap_validate_ret(
510	xfs_fileoff_t		bno,
511	xfs_filblks_t		len,
512	uint32_t		flags,
513	xfs_bmbt_irec_t		*mval,
514	int			nmap,
515	int			ret_nmap)
516{
517	int			i;		/* index to map values */
518
519	ASSERT(ret_nmap <= nmap);
520
521	for (i = 0; i < ret_nmap; i++) {
522		ASSERT(mval[i].br_blockcount > 0);
523		if (!(flags & XFS_BMAPI_ENTIRE)) {
524			ASSERT(mval[i].br_startoff >= bno);
525			ASSERT(mval[i].br_blockcount <= len);
526			ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
527			       bno + len);
528		} else {
529			ASSERT(mval[i].br_startoff < bno + len);
530			ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
531			       bno);
532		}
533		ASSERT(i == 0 ||
534		       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
535		       mval[i].br_startoff);
536		ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
537		       mval[i].br_startblock != HOLESTARTBLOCK);
538		ASSERT(mval[i].br_state == XFS_EXT_NORM ||
539		       mval[i].br_state == XFS_EXT_UNWRITTEN);
540	}
541}
542
543#else
544#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)		do { } while (0)
545#define	xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)	do { } while (0)
546#endif /* DEBUG */
547
548/*
549 * Inode fork format manipulation functions
550 */
551
552/*
553 * Convert the inode format to extent format if it currently is in btree format,
554 * but the extent list is small enough that it fits into the extent format.
555 *
556 * Since the extents are already in-core, all we have to do is give up the space
557 * for the btree root and pitch the leaf block.
558 */
559STATIC int				/* error */
560xfs_bmap_btree_to_extents(
561	struct xfs_trans	*tp,	/* transaction pointer */
562	struct xfs_inode	*ip,	/* incore inode pointer */
563	struct xfs_btree_cur	*cur,	/* btree cursor */
564	int			*logflagsp, /* inode logging flags */
565	int			whichfork)  /* data or attr fork */
566{
567	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
568	struct xfs_mount	*mp = ip->i_mount;
569	struct xfs_btree_block	*rblock = ifp->if_broot;
570	struct xfs_btree_block	*cblock;/* child btree block */
571	xfs_fsblock_t		cbno;	/* child block number */
572	struct xfs_buf		*cbp;	/* child block's buffer */
573	int			error;	/* error return value */
574	__be64			*pp;	/* ptr to block address */
575	struct xfs_owner_info	oinfo;
576
577	/* check if we actually need the extent format first: */
578	if (!xfs_bmap_wants_extents(ip, whichfork))
579		return 0;
580
581	ASSERT(cur);
582	ASSERT(whichfork != XFS_COW_FORK);
583	ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
584	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
585	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
586	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
587
588	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
589	cbno = be64_to_cpu(*pp);
590#ifdef DEBUG
591	if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_verify_fsbno(mp, cbno))) {
592		xfs_btree_mark_sick(cur);
593		return -EFSCORRUPTED;
594	}
595#endif
596	error = xfs_bmap_read_buf(mp, tp, cbno, &cbp);
597	if (xfs_metadata_is_sick(error))
598		xfs_btree_mark_sick(cur);
599	if (error)
600		return error;
601	cblock = XFS_BUF_TO_BLOCK(cbp);
602	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
603		return error;
604
605	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
606	error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
607			XFS_AG_RESV_NONE, false);
608	if (error)
609		return error;
610
611	ip->i_nblocks--;
612	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
613	xfs_trans_binval(tp, cbp);
614	if (cur->bc_levels[0].bp == cbp)
615		cur->bc_levels[0].bp = NULL;
616	xfs_iroot_realloc(ip, -1, whichfork);
617	ASSERT(ifp->if_broot == NULL);
618	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
619	*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
620	return 0;
621}
622
623/*
624 * Convert an extents-format file into a btree-format file.
625 * The new file will have a root block (in the inode) and a single child block.
626 */
627STATIC int					/* error */
628xfs_bmap_extents_to_btree(
629	struct xfs_trans	*tp,		/* transaction pointer */
630	struct xfs_inode	*ip,		/* incore inode pointer */
631	struct xfs_btree_cur	**curp,		/* cursor returned to caller */
632	int			wasdel,		/* converting a delayed alloc */
633	int			*logflagsp,	/* inode logging flags */
634	int			whichfork)	/* data or attr fork */
635{
636	struct xfs_btree_block	*ablock;	/* allocated (child) bt block */
637	struct xfs_buf		*abp;		/* buffer for ablock */
638	struct xfs_alloc_arg	args;		/* allocation arguments */
639	struct xfs_bmbt_rec	*arp;		/* child record pointer */
640	struct xfs_btree_block	*block;		/* btree root block */
641	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
642	int			error;		/* error return value */
643	struct xfs_ifork	*ifp;		/* inode fork pointer */
644	struct xfs_bmbt_key	*kp;		/* root block key pointer */
645	struct xfs_mount	*mp;		/* mount structure */
646	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
647	struct xfs_iext_cursor	icur;
648	struct xfs_bmbt_irec	rec;
649	xfs_extnum_t		cnt = 0;
650
651	mp = ip->i_mount;
652	ASSERT(whichfork != XFS_COW_FORK);
653	ifp = xfs_ifork_ptr(ip, whichfork);
654	ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
655
656	/*
657	 * Make space in the inode incore. This needs to be undone if we fail
658	 * to expand the root.
659	 */
660	xfs_iroot_realloc(ip, 1, whichfork);
661
662	/*
663	 * Fill in the root.
664	 */
665	block = ifp->if_broot;
666	xfs_bmbt_init_block(ip, block, NULL, 1, 1);
667	/*
668	 * Need a cursor.  Can't allocate until bb_level is filled in.
669	 */
670	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
671	if (wasdel)
672		cur->bc_flags |= XFS_BTREE_BMBT_WASDEL;
673	/*
674	 * Convert to a btree with two levels, one record in root.
675	 */
676	ifp->if_format = XFS_DINODE_FMT_BTREE;
677	memset(&args, 0, sizeof(args));
678	args.tp = tp;
679	args.mp = mp;
680	xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
681
682	args.minlen = args.maxlen = args.prod = 1;
683	args.wasdel = wasdel;
684	*logflagsp = 0;
685	error = xfs_alloc_vextent_start_ag(&args,
686				XFS_INO_TO_FSB(mp, ip->i_ino));
687	if (error)
688		goto out_root_realloc;
689
690	/*
691	 * Allocation can't fail, the space was reserved.
692	 */
693	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
694		error = -ENOSPC;
695		goto out_root_realloc;
696	}
697
698	cur->bc_bmap.allocated++;
699	ip->i_nblocks++;
700	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
701	error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
702			XFS_FSB_TO_DADDR(mp, args.fsbno),
703			mp->m_bsize, 0, &abp);
704	if (error)
705		goto out_unreserve_dquot;
706
707	/*
708	 * Fill in the child block.
709	 */
710	ablock = XFS_BUF_TO_BLOCK(abp);
711	xfs_bmbt_init_block(ip, ablock, abp, 0, 0);
712
713	for_each_xfs_iext(ifp, &icur, &rec) {
714		if (isnullstartblock(rec.br_startblock))
715			continue;
716		arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
717		xfs_bmbt_disk_set_all(arp, &rec);
718		cnt++;
719	}
720	ASSERT(cnt == ifp->if_nextents);
721	xfs_btree_set_numrecs(ablock, cnt);
722
723	/*
724	 * Fill in the root key and pointer.
725	 */
726	kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
727	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
728	kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
729	pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
730						be16_to_cpu(block->bb_level)));
731	*pp = cpu_to_be64(args.fsbno);
732
733	/*
734	 * Do all this logging at the end so that
735	 * the root is at the right level.
736	 */
737	xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
738	xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
739	ASSERT(*curp == NULL);
740	*curp = cur;
741	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
742	return 0;
743
744out_unreserve_dquot:
745	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
746out_root_realloc:
747	xfs_iroot_realloc(ip, -1, whichfork);
748	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
749	ASSERT(ifp->if_broot == NULL);
750	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
751
752	return error;
753}
754
755/*
756 * Convert a local file to an extents file.
757 * This code is out of bounds for data forks of regular files,
758 * since the file data needs to get logged so things will stay consistent.
759 * (The bmap-level manipulations are ok, though).
760 */
761void
762xfs_bmap_local_to_extents_empty(
763	struct xfs_trans	*tp,
764	struct xfs_inode	*ip,
765	int			whichfork)
766{
767	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
768
769	ASSERT(whichfork != XFS_COW_FORK);
770	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
771	ASSERT(ifp->if_bytes == 0);
772	ASSERT(ifp->if_nextents == 0);
773
774	xfs_bmap_forkoff_reset(ip, whichfork);
775	ifp->if_data = NULL;
776	ifp->if_height = 0;
777	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
778	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
779}
780
781
782STATIC int				/* error */
783xfs_bmap_local_to_extents(
784	xfs_trans_t	*tp,		/* transaction pointer */
785	xfs_inode_t	*ip,		/* incore inode pointer */
786	xfs_extlen_t	total,		/* total blocks needed by transaction */
787	int		*logflagsp,	/* inode logging flags */
788	int		whichfork,
789	void		(*init_fn)(struct xfs_trans *tp,
790				   struct xfs_buf *bp,
791				   struct xfs_inode *ip,
792				   struct xfs_ifork *ifp))
793{
794	int		error = 0;
795	int		flags;		/* logging flags returned */
796	struct xfs_ifork *ifp;		/* inode fork pointer */
797	xfs_alloc_arg_t	args;		/* allocation arguments */
798	struct xfs_buf	*bp;		/* buffer for extent block */
799	struct xfs_bmbt_irec rec;
800	struct xfs_iext_cursor icur;
801
802	/*
803	 * We don't want to deal with the case of keeping inode data inline yet.
804	 * So sending the data fork of a regular inode is invalid.
805	 */
806	ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
807	ifp = xfs_ifork_ptr(ip, whichfork);
808	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
809
810	if (!ifp->if_bytes) {
811		xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
812		flags = XFS_ILOG_CORE;
813		goto done;
814	}
815
816	flags = 0;
817	error = 0;
818	memset(&args, 0, sizeof(args));
819	args.tp = tp;
820	args.mp = ip->i_mount;
821	args.total = total;
822	args.minlen = args.maxlen = args.prod = 1;
823	xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
824
825	/*
826	 * Allocate a block.  We know we need only one, since the
827	 * file currently fits in an inode.
828	 */
829	args.total = total;
830	args.minlen = args.maxlen = args.prod = 1;
831	error = xfs_alloc_vextent_start_ag(&args,
832			XFS_INO_TO_FSB(args.mp, ip->i_ino));
833	if (error)
834		goto done;
835
836	/* Can't fail, the space was reserved. */
837	ASSERT(args.fsbno != NULLFSBLOCK);
838	ASSERT(args.len == 1);
839	error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
840			XFS_FSB_TO_DADDR(args.mp, args.fsbno),
841			args.mp->m_bsize, 0, &bp);
842	if (error)
843		goto done;
844
845	/*
846	 * Initialize the block, copy the data and log the remote buffer.
847	 *
848	 * The callout is responsible for logging because the remote format
849	 * might differ from the local format and thus we don't know how much to
850	 * log here. Note that init_fn must also set the buffer log item type
851	 * correctly.
852	 */
853	init_fn(tp, bp, ip, ifp);
854
855	/* account for the change in fork size */
856	xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
857	xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
858	flags |= XFS_ILOG_CORE;
859
860	ifp->if_data = NULL;
861	ifp->if_height = 0;
862
863	rec.br_startoff = 0;
864	rec.br_startblock = args.fsbno;
865	rec.br_blockcount = 1;
866	rec.br_state = XFS_EXT_NORM;
867	xfs_iext_first(ifp, &icur);
868	xfs_iext_insert(ip, &icur, &rec, 0);
869
870	ifp->if_nextents = 1;
871	ip->i_nblocks = 1;
872	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
873	flags |= xfs_ilog_fext(whichfork);
874
875done:
876	*logflagsp = flags;
877	return error;
878}
879
880/*
881 * Called from xfs_bmap_add_attrfork to handle btree format files.
882 */
883STATIC int					/* error */
884xfs_bmap_add_attrfork_btree(
885	xfs_trans_t		*tp,		/* transaction pointer */
886	xfs_inode_t		*ip,		/* incore inode pointer */
887	int			*flags)		/* inode logging flags */
888{
889	struct xfs_btree_block	*block = ip->i_df.if_broot;
890	struct xfs_btree_cur	*cur;		/* btree cursor */
891	int			error;		/* error return value */
892	xfs_mount_t		*mp;		/* file system mount struct */
893	int			stat;		/* newroot status */
894
895	mp = ip->i_mount;
896
897	if (XFS_BMAP_BMDR_SPACE(block) <= xfs_inode_data_fork_size(ip))
898		*flags |= XFS_ILOG_DBROOT;
899	else {
900		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
901		error = xfs_bmbt_lookup_first(cur, &stat);
902		if (error)
903			goto error0;
904		/* must be at least one entry */
905		if (XFS_IS_CORRUPT(mp, stat != 1)) {
906			xfs_btree_mark_sick(cur);
907			error = -EFSCORRUPTED;
908			goto error0;
909		}
910		if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
911			goto error0;
912		if (stat == 0) {
913			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
914			return -ENOSPC;
915		}
916		cur->bc_bmap.allocated = 0;
917		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
918	}
919	return 0;
920error0:
921	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
922	return error;
923}
924
925/*
926 * Called from xfs_bmap_add_attrfork to handle extents format files.
927 */
928STATIC int					/* error */
929xfs_bmap_add_attrfork_extents(
930	struct xfs_trans	*tp,		/* transaction pointer */
931	struct xfs_inode	*ip,		/* incore inode pointer */
932	int			*flags)		/* inode logging flags */
933{
934	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
935	int			error;		/* error return value */
936
937	if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
938	    xfs_inode_data_fork_size(ip))
939		return 0;
940	cur = NULL;
941	error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
942					  XFS_DATA_FORK);
943	if (cur) {
944		cur->bc_bmap.allocated = 0;
945		xfs_btree_del_cursor(cur, error);
946	}
947	return error;
948}
949
950/*
951 * Called from xfs_bmap_add_attrfork to handle local format files. Each
952 * different data fork content type needs a different callout to do the
953 * conversion. Some are basic and only require special block initialisation
954 * callouts for the data formating, others (directories) are so specialised they
955 * handle everything themselves.
956 *
957 * XXX (dgc): investigate whether directory conversion can use the generic
958 * formatting callout. It should be possible - it's just a very complex
959 * formatter.
960 */
961STATIC int					/* error */
962xfs_bmap_add_attrfork_local(
963	struct xfs_trans	*tp,		/* transaction pointer */
964	struct xfs_inode	*ip,		/* incore inode pointer */
965	int			*flags)		/* inode logging flags */
966{
967	struct xfs_da_args	dargs;		/* args for dir/attr code */
968
969	if (ip->i_df.if_bytes <= xfs_inode_data_fork_size(ip))
970		return 0;
971
972	if (S_ISDIR(VFS_I(ip)->i_mode)) {
973		memset(&dargs, 0, sizeof(dargs));
974		dargs.geo = ip->i_mount->m_dir_geo;
975		dargs.dp = ip;
976		dargs.total = dargs.geo->fsbcount;
977		dargs.whichfork = XFS_DATA_FORK;
978		dargs.trans = tp;
979		return xfs_dir2_sf_to_block(&dargs);
980	}
981
982	if (S_ISLNK(VFS_I(ip)->i_mode))
983		return xfs_bmap_local_to_extents(tp, ip, 1, flags,
984						 XFS_DATA_FORK,
985						 xfs_symlink_local_to_remote);
986
987	/* should only be called for types that support local format data */
988	ASSERT(0);
989	xfs_bmap_mark_sick(ip, XFS_ATTR_FORK);
990	return -EFSCORRUPTED;
991}
992
993/*
994 * Set an inode attr fork offset based on the format of the data fork.
995 */
996static int
997xfs_bmap_set_attrforkoff(
998	struct xfs_inode	*ip,
999	int			size,
1000	int			*version)
1001{
1002	int			default_size = xfs_default_attroffset(ip) >> 3;
1003
1004	switch (ip->i_df.if_format) {
1005	case XFS_DINODE_FMT_DEV:
1006		ip->i_forkoff = default_size;
1007		break;
1008	case XFS_DINODE_FMT_LOCAL:
1009	case XFS_DINODE_FMT_EXTENTS:
1010	case XFS_DINODE_FMT_BTREE:
1011		ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1012		if (!ip->i_forkoff)
1013			ip->i_forkoff = default_size;
1014		else if (xfs_has_attr2(ip->i_mount) && version)
1015			*version = 2;
1016		break;
1017	default:
1018		ASSERT(0);
1019		return -EINVAL;
1020	}
1021
1022	return 0;
1023}
1024
1025/*
1026 * Convert inode from non-attributed to attributed.
1027 * Must not be in a transaction, ip must not be locked.
1028 */
1029int						/* error code */
1030xfs_bmap_add_attrfork(
1031	xfs_inode_t		*ip,		/* incore inode pointer */
1032	int			size,		/* space new attribute needs */
1033	int			rsvd)		/* xact may use reserved blks */
1034{
1035	xfs_mount_t		*mp;		/* mount structure */
1036	xfs_trans_t		*tp;		/* transaction pointer */
1037	int			blks;		/* space reservation */
1038	int			version = 1;	/* superblock attr version */
1039	int			logflags;	/* logging flags */
1040	int			error;		/* error return value */
1041
1042	ASSERT(xfs_inode_has_attr_fork(ip) == 0);
1043
1044	mp = ip->i_mount;
1045	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1046
1047	blks = XFS_ADDAFORK_SPACE_RES(mp);
1048
1049	error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
1050			rsvd, &tp);
1051	if (error)
1052		return error;
1053	if (xfs_inode_has_attr_fork(ip))
1054		goto trans_cancel;
1055
1056	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1057	error = xfs_bmap_set_attrforkoff(ip, size, &version);
1058	if (error)
1059		goto trans_cancel;
1060
1061	xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
1062	logflags = 0;
1063	switch (ip->i_df.if_format) {
1064	case XFS_DINODE_FMT_LOCAL:
1065		error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
1066		break;
1067	case XFS_DINODE_FMT_EXTENTS:
1068		error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
1069		break;
1070	case XFS_DINODE_FMT_BTREE:
1071		error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
1072		break;
1073	default:
1074		error = 0;
1075		break;
1076	}
1077	if (logflags)
1078		xfs_trans_log_inode(tp, ip, logflags);
1079	if (error)
1080		goto trans_cancel;
1081	if (!xfs_has_attr(mp) ||
1082	   (!xfs_has_attr2(mp) && version == 2)) {
1083		bool log_sb = false;
1084
1085		spin_lock(&mp->m_sb_lock);
1086		if (!xfs_has_attr(mp)) {
1087			xfs_add_attr(mp);
1088			log_sb = true;
1089		}
1090		if (!xfs_has_attr2(mp) && version == 2) {
1091			xfs_add_attr2(mp);
1092			log_sb = true;
1093		}
1094		spin_unlock(&mp->m_sb_lock);
1095		if (log_sb)
1096			xfs_log_sb(tp);
1097	}
1098
1099	error = xfs_trans_commit(tp);
1100	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1101	return error;
1102
1103trans_cancel:
1104	xfs_trans_cancel(tp);
1105	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1106	return error;
1107}
1108
1109/*
1110 * Internal and external extent tree search functions.
1111 */
1112
1113struct xfs_iread_state {
1114	struct xfs_iext_cursor	icur;
1115	xfs_extnum_t		loaded;
1116};
1117
1118int
1119xfs_bmap_complain_bad_rec(
1120	struct xfs_inode		*ip,
1121	int				whichfork,
1122	xfs_failaddr_t			fa,
1123	const struct xfs_bmbt_irec	*irec)
1124{
1125	struct xfs_mount		*mp = ip->i_mount;
1126	const char			*forkname;
1127
1128	switch (whichfork) {
1129	case XFS_DATA_FORK:	forkname = "data"; break;
1130	case XFS_ATTR_FORK:	forkname = "attr"; break;
1131	case XFS_COW_FORK:	forkname = "CoW"; break;
1132	default:		forkname = "???"; break;
1133	}
1134
1135	xfs_warn(mp,
1136 "Bmap BTree record corruption in inode 0x%llx %s fork detected at %pS!",
1137				ip->i_ino, forkname, fa);
1138	xfs_warn(mp,
1139		"Offset 0x%llx, start block 0x%llx, block count 0x%llx state 0x%x",
1140		irec->br_startoff, irec->br_startblock, irec->br_blockcount,
1141		irec->br_state);
1142
1143	return -EFSCORRUPTED;
1144}
1145
1146/* Stuff every bmbt record from this block into the incore extent map. */
1147static int
1148xfs_iread_bmbt_block(
1149	struct xfs_btree_cur	*cur,
1150	int			level,
1151	void			*priv)
1152{
1153	struct xfs_iread_state	*ir = priv;
1154	struct xfs_mount	*mp = cur->bc_mp;
1155	struct xfs_inode	*ip = cur->bc_ino.ip;
1156	struct xfs_btree_block	*block;
1157	struct xfs_buf		*bp;
1158	struct xfs_bmbt_rec	*frp;
1159	xfs_extnum_t		num_recs;
1160	xfs_extnum_t		j;
1161	int			whichfork = cur->bc_ino.whichfork;
1162	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1163
1164	block = xfs_btree_get_block(cur, level, &bp);
1165
1166	/* Abort if we find more records than nextents. */
1167	num_recs = xfs_btree_get_numrecs(block);
1168	if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
1169		xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
1170				(unsigned long long)ip->i_ino);
1171		xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
1172				sizeof(*block), __this_address);
1173		xfs_bmap_mark_sick(ip, whichfork);
1174		return -EFSCORRUPTED;
1175	}
1176
1177	/* Copy records into the incore cache. */
1178	frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1179	for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
1180		struct xfs_bmbt_irec	new;
1181		xfs_failaddr_t		fa;
1182
1183		xfs_bmbt_disk_get_all(frp, &new);
1184		fa = xfs_bmap_validate_extent(ip, whichfork, &new);
1185		if (fa) {
1186			xfs_inode_verifier_error(ip, -EFSCORRUPTED,
1187					"xfs_iread_extents(2)", frp,
1188					sizeof(*frp), fa);
1189			xfs_bmap_mark_sick(ip, whichfork);
1190			return xfs_bmap_complain_bad_rec(ip, whichfork, fa,
1191					&new);
1192		}
1193		xfs_iext_insert(ip, &ir->icur, &new,
1194				xfs_bmap_fork_to_state(whichfork));
1195		trace_xfs_read_extent(ip, &ir->icur,
1196				xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
1197		xfs_iext_next(ifp, &ir->icur);
1198	}
1199
1200	return 0;
1201}
1202
1203/*
1204 * Read in extents from a btree-format inode.
1205 */
1206int
1207xfs_iread_extents(
1208	struct xfs_trans	*tp,
1209	struct xfs_inode	*ip,
1210	int			whichfork)
1211{
1212	struct xfs_iread_state	ir;
1213	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1214	struct xfs_mount	*mp = ip->i_mount;
1215	struct xfs_btree_cur	*cur;
1216	int			error;
1217
1218	if (!xfs_need_iread_extents(ifp))
1219		return 0;
1220
1221	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
1222
1223	ir.loaded = 0;
1224	xfs_iext_first(ifp, &ir.icur);
1225	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1226	error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
1227			XFS_BTREE_VISIT_RECORDS, &ir);
1228	xfs_btree_del_cursor(cur, error);
1229	if (error)
1230		goto out;
1231
1232	if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
1233		xfs_bmap_mark_sick(ip, whichfork);
1234		error = -EFSCORRUPTED;
1235		goto out;
1236	}
1237	ASSERT(ir.loaded == xfs_iext_count(ifp));
1238	/*
1239	 * Use release semantics so that we can use acquire semantics in
1240	 * xfs_need_iread_extents and be guaranteed to see a valid mapping tree
1241	 * after that load.
1242	 */
1243	smp_store_release(&ifp->if_needextents, 0);
1244	return 0;
1245out:
1246	if (xfs_metadata_is_sick(error))
1247		xfs_bmap_mark_sick(ip, whichfork);
1248	xfs_iext_destroy(ifp);
1249	return error;
1250}
1251
1252/*
1253 * Returns the relative block number of the first unused block(s) in the given
1254 * fork with at least "len" logically contiguous blocks free.  This is the
1255 * lowest-address hole if the fork has holes, else the first block past the end
1256 * of fork.  Return 0 if the fork is currently local (in-inode).
1257 */
1258int						/* error */
1259xfs_bmap_first_unused(
1260	struct xfs_trans	*tp,		/* transaction pointer */
1261	struct xfs_inode	*ip,		/* incore inode */
1262	xfs_extlen_t		len,		/* size of hole to find */
1263	xfs_fileoff_t		*first_unused,	/* unused block */
1264	int			whichfork)	/* data or attr fork */
1265{
1266	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1267	struct xfs_bmbt_irec	got;
1268	struct xfs_iext_cursor	icur;
1269	xfs_fileoff_t		lastaddr = 0;
1270	xfs_fileoff_t		lowest, max;
1271	int			error;
1272
1273	if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
1274		*first_unused = 0;
1275		return 0;
1276	}
1277
1278	ASSERT(xfs_ifork_has_extents(ifp));
1279
1280	error = xfs_iread_extents(tp, ip, whichfork);
1281	if (error)
1282		return error;
1283
1284	lowest = max = *first_unused;
1285	for_each_xfs_iext(ifp, &icur, &got) {
1286		/*
1287		 * See if the hole before this extent will work.
1288		 */
1289		if (got.br_startoff >= lowest + len &&
1290		    got.br_startoff - max >= len)
1291			break;
1292		lastaddr = got.br_startoff + got.br_blockcount;
1293		max = XFS_FILEOFF_MAX(lastaddr, lowest);
1294	}
1295
1296	*first_unused = max;
1297	return 0;
1298}
1299
1300/*
1301 * Returns the file-relative block number of the last block - 1 before
1302 * last_block (input value) in the file.
1303 * This is not based on i_size, it is based on the extent records.
1304 * Returns 0 for local files, as they do not have extent records.
1305 */
1306int						/* error */
1307xfs_bmap_last_before(
1308	struct xfs_trans	*tp,		/* transaction pointer */
1309	struct xfs_inode	*ip,		/* incore inode */
1310	xfs_fileoff_t		*last_block,	/* last block */
1311	int			whichfork)	/* data or attr fork */
1312{
1313	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1314	struct xfs_bmbt_irec	got;
1315	struct xfs_iext_cursor	icur;
1316	int			error;
1317
1318	switch (ifp->if_format) {
1319	case XFS_DINODE_FMT_LOCAL:
1320		*last_block = 0;
1321		return 0;
1322	case XFS_DINODE_FMT_BTREE:
1323	case XFS_DINODE_FMT_EXTENTS:
1324		break;
1325	default:
1326		ASSERT(0);
1327		xfs_bmap_mark_sick(ip, whichfork);
1328		return -EFSCORRUPTED;
1329	}
1330
1331	error = xfs_iread_extents(tp, ip, whichfork);
1332	if (error)
1333		return error;
1334
1335	if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1336		*last_block = 0;
1337	return 0;
1338}
1339
1340int
1341xfs_bmap_last_extent(
1342	struct xfs_trans	*tp,
1343	struct xfs_inode	*ip,
1344	int			whichfork,
1345	struct xfs_bmbt_irec	*rec,
1346	int			*is_empty)
1347{
1348	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1349	struct xfs_iext_cursor	icur;
1350	int			error;
1351
1352	error = xfs_iread_extents(tp, ip, whichfork);
1353	if (error)
1354		return error;
1355
1356	xfs_iext_last(ifp, &icur);
1357	if (!xfs_iext_get_extent(ifp, &icur, rec))
1358		*is_empty = 1;
1359	else
1360		*is_empty = 0;
1361	return 0;
1362}
1363
1364/*
1365 * Check the last inode extent to determine whether this allocation will result
1366 * in blocks being allocated at the end of the file. When we allocate new data
1367 * blocks at the end of the file which do not start at the previous data block,
1368 * we will try to align the new blocks at stripe unit boundaries.
1369 *
1370 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1371 * at, or past the EOF.
1372 */
1373STATIC int
1374xfs_bmap_isaeof(
1375	struct xfs_bmalloca	*bma,
1376	int			whichfork)
1377{
1378	struct xfs_bmbt_irec	rec;
1379	int			is_empty;
1380	int			error;
1381
1382	bma->aeof = false;
1383	error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1384				     &is_empty);
1385	if (error)
1386		return error;
1387
1388	if (is_empty) {
1389		bma->aeof = true;
1390		return 0;
1391	}
1392
1393	/*
1394	 * Check if we are allocation or past the last extent, or at least into
1395	 * the last delayed allocated extent.
1396	 */
1397	bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1398		(bma->offset >= rec.br_startoff &&
1399		 isnullstartblock(rec.br_startblock));
1400	return 0;
1401}
1402
1403/*
1404 * Returns the file-relative block number of the first block past eof in
1405 * the file.  This is not based on i_size, it is based on the extent records.
1406 * Returns 0 for local files, as they do not have extent records.
1407 */
1408int
1409xfs_bmap_last_offset(
1410	struct xfs_inode	*ip,
1411	xfs_fileoff_t		*last_block,
1412	int			whichfork)
1413{
1414	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
1415	struct xfs_bmbt_irec	rec;
1416	int			is_empty;
1417	int			error;
1418
1419	*last_block = 0;
1420
1421	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
1422		return 0;
1423
1424	if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp))) {
1425		xfs_bmap_mark_sick(ip, whichfork);
1426		return -EFSCORRUPTED;
1427	}
1428
1429	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1430	if (error || is_empty)
1431		return error;
1432
1433	*last_block = rec.br_startoff + rec.br_blockcount;
1434	return 0;
1435}
1436
1437/*
1438 * Extent tree manipulation functions used during allocation.
1439 */
1440
1441/*
1442 * Convert a delayed allocation to a real allocation.
1443 */
1444STATIC int				/* error */
1445xfs_bmap_add_extent_delay_real(
1446	struct xfs_bmalloca	*bma,
1447	int			whichfork)
1448{
1449	struct xfs_mount	*mp = bma->ip->i_mount;
1450	struct xfs_ifork	*ifp = xfs_ifork_ptr(bma->ip, whichfork);
1451	struct xfs_bmbt_irec	*new = &bma->got;
1452	int			error;	/* error return value */
1453	int			i;	/* temp state */
1454	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
1455	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
1456					/* left is 0, right is 1, prev is 2 */
1457	int			rval=0;	/* return value (logging flags) */
1458	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
1459	xfs_filblks_t		da_new; /* new count del alloc blocks used */
1460	xfs_filblks_t		da_old; /* old count del alloc blocks used */
1461	xfs_filblks_t		temp=0;	/* value for da_new calculations */
1462	int			tmp_rval;	/* partial logging flags */
1463	struct xfs_bmbt_irec	old;
1464
1465	ASSERT(whichfork != XFS_ATTR_FORK);
1466	ASSERT(!isnullstartblock(new->br_startblock));
1467	ASSERT(!bma->cur || (bma->cur->bc_flags & XFS_BTREE_BMBT_WASDEL));
1468
1469	XFS_STATS_INC(mp, xs_add_exlist);
1470
1471#define	LEFT		r[0]
1472#define	RIGHT		r[1]
1473#define	PREV		r[2]
1474
1475	/*
1476	 * Set up a bunch of variables to make the tests simpler.
1477	 */
1478	xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1479	new_endoff = new->br_startoff + new->br_blockcount;
1480	ASSERT(isnullstartblock(PREV.br_startblock));
1481	ASSERT(PREV.br_startoff <= new->br_startoff);
1482	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1483
1484	da_old = startblockval(PREV.br_startblock);
1485	da_new = 0;
1486
1487	/*
1488	 * Set flags determining what part of the previous delayed allocation
1489	 * extent is being replaced by a real allocation.
1490	 */
1491	if (PREV.br_startoff == new->br_startoff)
1492		state |= BMAP_LEFT_FILLING;
1493	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1494		state |= BMAP_RIGHT_FILLING;
1495
1496	/*
1497	 * Check and set flags if this segment has a left neighbor.
1498	 * Don't set contiguous if the combined extent would be too large.
1499	 */
1500	if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1501		state |= BMAP_LEFT_VALID;
1502		if (isnullstartblock(LEFT.br_startblock))
1503			state |= BMAP_LEFT_DELAY;
1504	}
1505
1506	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1507	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1508	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1509	    LEFT.br_state == new->br_state &&
1510	    LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
1511		state |= BMAP_LEFT_CONTIG;
1512
1513	/*
1514	 * Check and set flags if this segment has a right neighbor.
1515	 * Don't set contiguous if the combined extent would be too large.
1516	 * Also check for all-three-contiguous being too large.
1517	 */
1518	if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1519		state |= BMAP_RIGHT_VALID;
1520		if (isnullstartblock(RIGHT.br_startblock))
1521			state |= BMAP_RIGHT_DELAY;
1522	}
1523
1524	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1525	    new_endoff == RIGHT.br_startoff &&
1526	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1527	    new->br_state == RIGHT.br_state &&
1528	    new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
1529	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1530		       BMAP_RIGHT_FILLING)) !=
1531		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1532		       BMAP_RIGHT_FILLING) ||
1533	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1534			<= XFS_MAX_BMBT_EXTLEN))
1535		state |= BMAP_RIGHT_CONTIG;
1536
1537	error = 0;
1538	/*
1539	 * Switch out based on the FILLING and CONTIG state bits.
1540	 */
1541	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1542			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1543	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1544	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1545		/*
1546		 * Filling in all of a previously delayed allocation extent.
1547		 * The left and right neighbors are both contiguous with new.
1548		 */
1549		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1550
1551		xfs_iext_remove(bma->ip, &bma->icur, state);
1552		xfs_iext_remove(bma->ip, &bma->icur, state);
1553		xfs_iext_prev(ifp, &bma->icur);
1554		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1555		ifp->if_nextents--;
1556
1557		if (bma->cur == NULL)
1558			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1559		else {
1560			rval = XFS_ILOG_CORE;
1561			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1562			if (error)
1563				goto done;
1564			if (XFS_IS_CORRUPT(mp, i != 1)) {
1565				xfs_btree_mark_sick(bma->cur);
1566				error = -EFSCORRUPTED;
1567				goto done;
1568			}
1569			error = xfs_btree_delete(bma->cur, &i);
1570			if (error)
1571				goto done;
1572			if (XFS_IS_CORRUPT(mp, i != 1)) {
1573				xfs_btree_mark_sick(bma->cur);
1574				error = -EFSCORRUPTED;
1575				goto done;
1576			}
1577			error = xfs_btree_decrement(bma->cur, 0, &i);
1578			if (error)
1579				goto done;
1580			if (XFS_IS_CORRUPT(mp, i != 1)) {
1581				xfs_btree_mark_sick(bma->cur);
1582				error = -EFSCORRUPTED;
1583				goto done;
1584			}
1585			error = xfs_bmbt_update(bma->cur, &LEFT);
1586			if (error)
1587				goto done;
1588		}
1589		break;
1590
1591	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1592		/*
1593		 * Filling in all of a previously delayed allocation extent.
1594		 * The left neighbor is contiguous, the right is not.
1595		 */
1596		old = LEFT;
1597		LEFT.br_blockcount += PREV.br_blockcount;
1598
1599		xfs_iext_remove(bma->ip, &bma->icur, state);
1600		xfs_iext_prev(ifp, &bma->icur);
1601		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1602
1603		if (bma->cur == NULL)
1604			rval = XFS_ILOG_DEXT;
1605		else {
1606			rval = 0;
1607			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1608			if (error)
1609				goto done;
1610			if (XFS_IS_CORRUPT(mp, i != 1)) {
1611				xfs_btree_mark_sick(bma->cur);
1612				error = -EFSCORRUPTED;
1613				goto done;
1614			}
1615			error = xfs_bmbt_update(bma->cur, &LEFT);
1616			if (error)
1617				goto done;
1618		}
1619		break;
1620
1621	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1622		/*
1623		 * Filling in all of a previously delayed allocation extent.
1624		 * The right neighbor is contiguous, the left is not. Take care
1625		 * with delay -> unwritten extent allocation here because the
1626		 * delalloc record we are overwriting is always written.
1627		 */
1628		PREV.br_startblock = new->br_startblock;
1629		PREV.br_blockcount += RIGHT.br_blockcount;
1630		PREV.br_state = new->br_state;
1631
1632		xfs_iext_next(ifp, &bma->icur);
1633		xfs_iext_remove(bma->ip, &bma->icur, state);
1634		xfs_iext_prev(ifp, &bma->icur);
1635		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1636
1637		if (bma->cur == NULL)
1638			rval = XFS_ILOG_DEXT;
1639		else {
1640			rval = 0;
1641			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1642			if (error)
1643				goto done;
1644			if (XFS_IS_CORRUPT(mp, i != 1)) {
1645				xfs_btree_mark_sick(bma->cur);
1646				error = -EFSCORRUPTED;
1647				goto done;
1648			}
1649			error = xfs_bmbt_update(bma->cur, &PREV);
1650			if (error)
1651				goto done;
1652		}
1653		break;
1654
1655	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1656		/*
1657		 * Filling in all of a previously delayed allocation extent.
1658		 * Neither the left nor right neighbors are contiguous with
1659		 * the new one.
1660		 */
1661		PREV.br_startblock = new->br_startblock;
1662		PREV.br_state = new->br_state;
1663		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1664		ifp->if_nextents++;
1665
1666		if (bma->cur == NULL)
1667			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1668		else {
1669			rval = XFS_ILOG_CORE;
1670			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1671			if (error)
1672				goto done;
1673			if (XFS_IS_CORRUPT(mp, i != 0)) {
1674				xfs_btree_mark_sick(bma->cur);
1675				error = -EFSCORRUPTED;
1676				goto done;
1677			}
1678			error = xfs_btree_insert(bma->cur, &i);
1679			if (error)
1680				goto done;
1681			if (XFS_IS_CORRUPT(mp, i != 1)) {
1682				xfs_btree_mark_sick(bma->cur);
1683				error = -EFSCORRUPTED;
1684				goto done;
1685			}
1686		}
1687		break;
1688
1689	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1690		/*
1691		 * Filling in the first part of a previous delayed allocation.
1692		 * The left neighbor is contiguous.
1693		 */
1694		old = LEFT;
1695		temp = PREV.br_blockcount - new->br_blockcount;
1696		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1697				startblockval(PREV.br_startblock));
1698
1699		LEFT.br_blockcount += new->br_blockcount;
1700
1701		PREV.br_blockcount = temp;
1702		PREV.br_startoff += new->br_blockcount;
1703		PREV.br_startblock = nullstartblock(da_new);
1704
1705		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1706		xfs_iext_prev(ifp, &bma->icur);
1707		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1708
1709		if (bma->cur == NULL)
1710			rval = XFS_ILOG_DEXT;
1711		else {
1712			rval = 0;
1713			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1714			if (error)
1715				goto done;
1716			if (XFS_IS_CORRUPT(mp, i != 1)) {
1717				xfs_btree_mark_sick(bma->cur);
1718				error = -EFSCORRUPTED;
1719				goto done;
1720			}
1721			error = xfs_bmbt_update(bma->cur, &LEFT);
1722			if (error)
1723				goto done;
1724		}
1725		break;
1726
1727	case BMAP_LEFT_FILLING:
1728		/*
1729		 * Filling in the first part of a previous delayed allocation.
1730		 * The left neighbor is not contiguous.
1731		 */
1732		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1733		ifp->if_nextents++;
1734
1735		if (bma->cur == NULL)
1736			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1737		else {
1738			rval = XFS_ILOG_CORE;
1739			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1740			if (error)
1741				goto done;
1742			if (XFS_IS_CORRUPT(mp, i != 0)) {
1743				xfs_btree_mark_sick(bma->cur);
1744				error = -EFSCORRUPTED;
1745				goto done;
1746			}
1747			error = xfs_btree_insert(bma->cur, &i);
1748			if (error)
1749				goto done;
1750			if (XFS_IS_CORRUPT(mp, i != 1)) {
1751				xfs_btree_mark_sick(bma->cur);
1752				error = -EFSCORRUPTED;
1753				goto done;
1754			}
1755		}
1756
1757		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1758			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1759					&bma->cur, 1, &tmp_rval, whichfork);
1760			rval |= tmp_rval;
1761			if (error)
1762				goto done;
1763		}
1764
1765		temp = PREV.br_blockcount - new->br_blockcount;
1766		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1767			startblockval(PREV.br_startblock) -
1768			(bma->cur ? bma->cur->bc_bmap.allocated : 0));
1769
1770		PREV.br_startoff = new_endoff;
1771		PREV.br_blockcount = temp;
1772		PREV.br_startblock = nullstartblock(da_new);
1773		xfs_iext_next(ifp, &bma->icur);
1774		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1775		xfs_iext_prev(ifp, &bma->icur);
1776		break;
1777
1778	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1779		/*
1780		 * Filling in the last part of a previous delayed allocation.
1781		 * The right neighbor is contiguous with the new allocation.
1782		 */
1783		old = RIGHT;
1784		RIGHT.br_startoff = new->br_startoff;
1785		RIGHT.br_startblock = new->br_startblock;
1786		RIGHT.br_blockcount += new->br_blockcount;
1787
1788		if (bma->cur == NULL)
1789			rval = XFS_ILOG_DEXT;
1790		else {
1791			rval = 0;
1792			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1793			if (error)
1794				goto done;
1795			if (XFS_IS_CORRUPT(mp, i != 1)) {
1796				xfs_btree_mark_sick(bma->cur);
1797				error = -EFSCORRUPTED;
1798				goto done;
1799			}
1800			error = xfs_bmbt_update(bma->cur, &RIGHT);
1801			if (error)
1802				goto done;
1803		}
1804
1805		temp = PREV.br_blockcount - new->br_blockcount;
1806		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1807			startblockval(PREV.br_startblock));
1808
1809		PREV.br_blockcount = temp;
1810		PREV.br_startblock = nullstartblock(da_new);
1811
1812		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1813		xfs_iext_next(ifp, &bma->icur);
1814		xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1815		break;
1816
1817	case BMAP_RIGHT_FILLING:
1818		/*
1819		 * Filling in the last part of a previous delayed allocation.
1820		 * The right neighbor is not contiguous.
1821		 */
1822		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1823		ifp->if_nextents++;
1824
1825		if (bma->cur == NULL)
1826			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1827		else {
1828			rval = XFS_ILOG_CORE;
1829			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1830			if (error)
1831				goto done;
1832			if (XFS_IS_CORRUPT(mp, i != 0)) {
1833				xfs_btree_mark_sick(bma->cur);
1834				error = -EFSCORRUPTED;
1835				goto done;
1836			}
1837			error = xfs_btree_insert(bma->cur, &i);
1838			if (error)
1839				goto done;
1840			if (XFS_IS_CORRUPT(mp, i != 1)) {
1841				xfs_btree_mark_sick(bma->cur);
1842				error = -EFSCORRUPTED;
1843				goto done;
1844			}
1845		}
1846
1847		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1848			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1849				&bma->cur, 1, &tmp_rval, whichfork);
1850			rval |= tmp_rval;
1851			if (error)
1852				goto done;
1853		}
1854
1855		temp = PREV.br_blockcount - new->br_blockcount;
1856		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1857			startblockval(PREV.br_startblock) -
1858			(bma->cur ? bma->cur->bc_bmap.allocated : 0));
1859
1860		PREV.br_startblock = nullstartblock(da_new);
1861		PREV.br_blockcount = temp;
1862		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1863		xfs_iext_next(ifp, &bma->icur);
1864		break;
1865
1866	case 0:
1867		/*
1868		 * Filling in the middle part of a previous delayed allocation.
1869		 * Contiguity is impossible here.
1870		 * This case is avoided almost all the time.
1871		 *
1872		 * We start with a delayed allocation:
1873		 *
1874		 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1875		 *  PREV @ idx
1876		 *
1877	         * and we are allocating:
1878		 *                     +rrrrrrrrrrrrrrrrr+
1879		 *			      new
1880		 *
1881		 * and we set it up for insertion as:
1882		 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1883		 *                            new
1884		 *  PREV @ idx          LEFT              RIGHT
1885		 *                      inserted at idx + 1
1886		 */
1887		old = PREV;
1888
1889		/* LEFT is the new middle */
1890		LEFT = *new;
1891
1892		/* RIGHT is the new right */
1893		RIGHT.br_state = PREV.br_state;
1894		RIGHT.br_startoff = new_endoff;
1895		RIGHT.br_blockcount =
1896			PREV.br_startoff + PREV.br_blockcount - new_endoff;
1897		RIGHT.br_startblock =
1898			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1899					RIGHT.br_blockcount));
1900
1901		/* truncate PREV */
1902		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1903		PREV.br_startblock =
1904			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1905					PREV.br_blockcount));
1906		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1907
1908		xfs_iext_next(ifp, &bma->icur);
1909		xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1910		xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1911		ifp->if_nextents++;
1912
1913		if (bma->cur == NULL)
1914			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1915		else {
1916			rval = XFS_ILOG_CORE;
1917			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1918			if (error)
1919				goto done;
1920			if (XFS_IS_CORRUPT(mp, i != 0)) {
1921				xfs_btree_mark_sick(bma->cur);
1922				error = -EFSCORRUPTED;
1923				goto done;
1924			}
1925			error = xfs_btree_insert(bma->cur, &i);
1926			if (error)
1927				goto done;
1928			if (XFS_IS_CORRUPT(mp, i != 1)) {
1929				xfs_btree_mark_sick(bma->cur);
1930				error = -EFSCORRUPTED;
1931				goto done;
1932			}
1933		}
1934
1935		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1936			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1937					&bma->cur, 1, &tmp_rval, whichfork);
1938			rval |= tmp_rval;
1939			if (error)
1940				goto done;
1941		}
1942
1943		da_new = startblockval(PREV.br_startblock) +
1944			 startblockval(RIGHT.br_startblock);
1945		break;
1946
1947	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1948	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1949	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1950	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1951	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1952	case BMAP_LEFT_CONTIG:
1953	case BMAP_RIGHT_CONTIG:
1954		/*
1955		 * These cases are all impossible.
1956		 */
1957		ASSERT(0);
1958	}
1959
1960	/* add reverse mapping unless caller opted out */
1961	if (!(bma->flags & XFS_BMAPI_NORMAP))
1962		xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
1963
1964	/* convert to a btree if necessary */
1965	if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1966		int	tmp_logflags;	/* partial log flag return val */
1967
1968		ASSERT(bma->cur == NULL);
1969		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1970				&bma->cur, da_old > 0, &tmp_logflags,
1971				whichfork);
1972		bma->logflags |= tmp_logflags;
1973		if (error)
1974			goto done;
1975	}
1976
1977	if (da_new != da_old)
1978		xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
1979
1980	if (bma->cur) {
1981		da_new += bma->cur->bc_bmap.allocated;
1982		bma->cur->bc_bmap.allocated = 0;
1983	}
1984
1985	/* adjust for changes in reserved delayed indirect blocks */
1986	if (da_new != da_old) {
1987		ASSERT(state == 0 || da_new < da_old);
1988		error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
1989				false);
1990	}
1991
1992	xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
1993done:
1994	if (whichfork != XFS_COW_FORK)
1995		bma->logflags |= rval;
1996	return error;
1997#undef	LEFT
1998#undef	RIGHT
1999#undef	PREV
2000}
2001
2002/*
2003 * Convert an unwritten allocation to a real allocation or vice versa.
2004 */
2005int					/* error */
2006xfs_bmap_add_extent_unwritten_real(
2007	struct xfs_trans	*tp,
2008	xfs_inode_t		*ip,	/* incore inode pointer */
2009	int			whichfork,
2010	struct xfs_iext_cursor	*icur,
2011	struct xfs_btree_cur	**curp,	/* if *curp is null, not a btree */
2012	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
2013	int			*logflagsp) /* inode logging flags */
2014{
2015	struct xfs_btree_cur	*cur;	/* btree cursor */
2016	int			error;	/* error return value */
2017	int			i;	/* temp state */
2018	struct xfs_ifork	*ifp;	/* inode fork pointer */
2019	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
2020	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
2021					/* left is 0, right is 1, prev is 2 */
2022	int			rval=0;	/* return value (logging flags) */
2023	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
2024	struct xfs_mount	*mp = ip->i_mount;
2025	struct xfs_bmbt_irec	old;
2026
2027	*logflagsp = 0;
2028
2029	cur = *curp;
2030	ifp = xfs_ifork_ptr(ip, whichfork);
2031
2032	ASSERT(!isnullstartblock(new->br_startblock));
2033
2034	XFS_STATS_INC(mp, xs_add_exlist);
2035
2036#define	LEFT		r[0]
2037#define	RIGHT		r[1]
2038#define	PREV		r[2]
2039
2040	/*
2041	 * Set up a bunch of variables to make the tests simpler.
2042	 */
2043	error = 0;
2044	xfs_iext_get_extent(ifp, icur, &PREV);
2045	ASSERT(new->br_state != PREV.br_state);
2046	new_endoff = new->br_startoff + new->br_blockcount;
2047	ASSERT(PREV.br_startoff <= new->br_startoff);
2048	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2049
2050	/*
2051	 * Set flags determining what part of the previous oldext allocation
2052	 * extent is being replaced by a newext allocation.
2053	 */
2054	if (PREV.br_startoff == new->br_startoff)
2055		state |= BMAP_LEFT_FILLING;
2056	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2057		state |= BMAP_RIGHT_FILLING;
2058
2059	/*
2060	 * Check and set flags if this segment has a left neighbor.
2061	 * Don't set contiguous if the combined extent would be too large.
2062	 */
2063	if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2064		state |= BMAP_LEFT_VALID;
2065		if (isnullstartblock(LEFT.br_startblock))
2066			state |= BMAP_LEFT_DELAY;
2067	}
2068
2069	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2070	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2071	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2072	    LEFT.br_state == new->br_state &&
2073	    LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2074		state |= BMAP_LEFT_CONTIG;
2075
2076	/*
2077	 * Check and set flags if this segment has a right neighbor.
2078	 * Don't set contiguous if the combined extent would be too large.
2079	 * Also check for all-three-contiguous being too large.
2080	 */
2081	if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2082		state |= BMAP_RIGHT_VALID;
2083		if (isnullstartblock(RIGHT.br_startblock))
2084			state |= BMAP_RIGHT_DELAY;
2085	}
2086
2087	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2088	    new_endoff == RIGHT.br_startoff &&
2089	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2090	    new->br_state == RIGHT.br_state &&
2091	    new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2092	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2093		       BMAP_RIGHT_FILLING)) !=
2094		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2095		       BMAP_RIGHT_FILLING) ||
2096	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2097			<= XFS_MAX_BMBT_EXTLEN))
2098		state |= BMAP_RIGHT_CONTIG;
2099
2100	/*
2101	 * Switch out based on the FILLING and CONTIG state bits.
2102	 */
2103	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2104			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2105	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2106	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2107		/*
2108		 * Setting all of a previous oldext extent to newext.
2109		 * The left and right neighbors are both contiguous with new.
2110		 */
2111		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2112
2113		xfs_iext_remove(ip, icur, state);
2114		xfs_iext_remove(ip, icur, state);
2115		xfs_iext_prev(ifp, icur);
2116		xfs_iext_update_extent(ip, state, icur, &LEFT);
2117		ifp->if_nextents -= 2;
2118		if (cur == NULL)
2119			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2120		else {
2121			rval = XFS_ILOG_CORE;
2122			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2123			if (error)
2124				goto done;
2125			if (XFS_IS_CORRUPT(mp, i != 1)) {
2126				xfs_btree_mark_sick(cur);
2127				error = -EFSCORRUPTED;
2128				goto done;
2129			}
2130			if ((error = xfs_btree_delete(cur, &i)))
2131				goto done;
2132			if (XFS_IS_CORRUPT(mp, i != 1)) {
2133				xfs_btree_mark_sick(cur);
2134				error = -EFSCORRUPTED;
2135				goto done;
2136			}
2137			if ((error = xfs_btree_decrement(cur, 0, &i)))
2138				goto done;
2139			if (XFS_IS_CORRUPT(mp, i != 1)) {
2140				xfs_btree_mark_sick(cur);
2141				error = -EFSCORRUPTED;
2142				goto done;
2143			}
2144			if ((error = xfs_btree_delete(cur, &i)))
2145				goto done;
2146			if (XFS_IS_CORRUPT(mp, i != 1)) {
2147				xfs_btree_mark_sick(cur);
2148				error = -EFSCORRUPTED;
2149				goto done;
2150			}
2151			if ((error = xfs_btree_decrement(cur, 0, &i)))
2152				goto done;
2153			if (XFS_IS_CORRUPT(mp, i != 1)) {
2154				xfs_btree_mark_sick(cur);
2155				error = -EFSCORRUPTED;
2156				goto done;
2157			}
2158			error = xfs_bmbt_update(cur, &LEFT);
2159			if (error)
2160				goto done;
2161		}
2162		break;
2163
2164	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2165		/*
2166		 * Setting all of a previous oldext extent to newext.
2167		 * The left neighbor is contiguous, the right is not.
2168		 */
2169		LEFT.br_blockcount += PREV.br_blockcount;
2170
2171		xfs_iext_remove(ip, icur, state);
2172		xfs_iext_prev(ifp, icur);
2173		xfs_iext_update_extent(ip, state, icur, &LEFT);
2174		ifp->if_nextents--;
2175		if (cur == NULL)
2176			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2177		else {
2178			rval = XFS_ILOG_CORE;
2179			error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2180			if (error)
2181				goto done;
2182			if (XFS_IS_CORRUPT(mp, i != 1)) {
2183				xfs_btree_mark_sick(cur);
2184				error = -EFSCORRUPTED;
2185				goto done;
2186			}
2187			if ((error = xfs_btree_delete(cur, &i)))
2188				goto done;
2189			if (XFS_IS_CORRUPT(mp, i != 1)) {
2190				xfs_btree_mark_sick(cur);
2191				error = -EFSCORRUPTED;
2192				goto done;
2193			}
2194			if ((error = xfs_btree_decrement(cur, 0, &i)))
2195				goto done;
2196			if (XFS_IS_CORRUPT(mp, i != 1)) {
2197				xfs_btree_mark_sick(cur);
2198				error = -EFSCORRUPTED;
2199				goto done;
2200			}
2201			error = xfs_bmbt_update(cur, &LEFT);
2202			if (error)
2203				goto done;
2204		}
2205		break;
2206
2207	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2208		/*
2209		 * Setting all of a previous oldext extent to newext.
2210		 * The right neighbor is contiguous, the left is not.
2211		 */
2212		PREV.br_blockcount += RIGHT.br_blockcount;
2213		PREV.br_state = new->br_state;
2214
2215		xfs_iext_next(ifp, icur);
2216		xfs_iext_remove(ip, icur, state);
2217		xfs_iext_prev(ifp, icur);
2218		xfs_iext_update_extent(ip, state, icur, &PREV);
2219		ifp->if_nextents--;
2220
2221		if (cur == NULL)
2222			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2223		else {
2224			rval = XFS_ILOG_CORE;
2225			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2226			if (error)
2227				goto done;
2228			if (XFS_IS_CORRUPT(mp, i != 1)) {
2229				xfs_btree_mark_sick(cur);
2230				error = -EFSCORRUPTED;
2231				goto done;
2232			}
2233			if ((error = xfs_btree_delete(cur, &i)))
2234				goto done;
2235			if (XFS_IS_CORRUPT(mp, i != 1)) {
2236				xfs_btree_mark_sick(cur);
2237				error = -EFSCORRUPTED;
2238				goto done;
2239			}
2240			if ((error = xfs_btree_decrement(cur, 0, &i)))
2241				goto done;
2242			if (XFS_IS_CORRUPT(mp, i != 1)) {
2243				xfs_btree_mark_sick(cur);
2244				error = -EFSCORRUPTED;
2245				goto done;
2246			}
2247			error = xfs_bmbt_update(cur, &PREV);
2248			if (error)
2249				goto done;
2250		}
2251		break;
2252
2253	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2254		/*
2255		 * Setting all of a previous oldext extent to newext.
2256		 * Neither the left nor right neighbors are contiguous with
2257		 * the new one.
2258		 */
2259		PREV.br_state = new->br_state;
2260		xfs_iext_update_extent(ip, state, icur, &PREV);
2261
2262		if (cur == NULL)
2263			rval = XFS_ILOG_DEXT;
2264		else {
2265			rval = 0;
2266			error = xfs_bmbt_lookup_eq(cur, new, &i);
2267			if (error)
2268				goto done;
2269			if (XFS_IS_CORRUPT(mp, i != 1)) {
2270				xfs_btree_mark_sick(cur);
2271				error = -EFSCORRUPTED;
2272				goto done;
2273			}
2274			error = xfs_bmbt_update(cur, &PREV);
2275			if (error)
2276				goto done;
2277		}
2278		break;
2279
2280	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2281		/*
2282		 * Setting the first part of a previous oldext extent to newext.
2283		 * The left neighbor is contiguous.
2284		 */
2285		LEFT.br_blockcount += new->br_blockcount;
2286
2287		old = PREV;
2288		PREV.br_startoff += new->br_blockcount;
2289		PREV.br_startblock += new->br_blockcount;
2290		PREV.br_blockcount -= new->br_blockcount;
2291
2292		xfs_iext_update_extent(ip, state, icur, &PREV);
2293		xfs_iext_prev(ifp, icur);
2294		xfs_iext_update_extent(ip, state, icur, &LEFT);
2295
2296		if (cur == NULL)
2297			rval = XFS_ILOG_DEXT;
2298		else {
2299			rval = 0;
2300			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2301			if (error)
2302				goto done;
2303			if (XFS_IS_CORRUPT(mp, i != 1)) {
2304				xfs_btree_mark_sick(cur);
2305				error = -EFSCORRUPTED;
2306				goto done;
2307			}
2308			error = xfs_bmbt_update(cur, &PREV);
2309			if (error)
2310				goto done;
2311			error = xfs_btree_decrement(cur, 0, &i);
2312			if (error)
2313				goto done;
2314			error = xfs_bmbt_update(cur, &LEFT);
2315			if (error)
2316				goto done;
2317		}
2318		break;
2319
2320	case BMAP_LEFT_FILLING:
2321		/*
2322		 * Setting the first part of a previous oldext extent to newext.
2323		 * The left neighbor is not contiguous.
2324		 */
2325		old = PREV;
2326		PREV.br_startoff += new->br_blockcount;
2327		PREV.br_startblock += new->br_blockcount;
2328		PREV.br_blockcount -= new->br_blockcount;
2329
2330		xfs_iext_update_extent(ip, state, icur, &PREV);
2331		xfs_iext_insert(ip, icur, new, state);
2332		ifp->if_nextents++;
2333
2334		if (cur == NULL)
2335			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2336		else {
2337			rval = XFS_ILOG_CORE;
2338			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2339			if (error)
2340				goto done;
2341			if (XFS_IS_CORRUPT(mp, i != 1)) {
2342				xfs_btree_mark_sick(cur);
2343				error = -EFSCORRUPTED;
2344				goto done;
2345			}
2346			error = xfs_bmbt_update(cur, &PREV);
2347			if (error)
2348				goto done;
2349			cur->bc_rec.b = *new;
2350			if ((error = xfs_btree_insert(cur, &i)))
2351				goto done;
2352			if (XFS_IS_CORRUPT(mp, i != 1)) {
2353				xfs_btree_mark_sick(cur);
2354				error = -EFSCORRUPTED;
2355				goto done;
2356			}
2357		}
2358		break;
2359
2360	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2361		/*
2362		 * Setting the last part of a previous oldext extent to newext.
2363		 * The right neighbor is contiguous with the new allocation.
2364		 */
2365		old = PREV;
2366		PREV.br_blockcount -= new->br_blockcount;
2367
2368		RIGHT.br_startoff = new->br_startoff;
2369		RIGHT.br_startblock = new->br_startblock;
2370		RIGHT.br_blockcount += new->br_blockcount;
2371
2372		xfs_iext_update_extent(ip, state, icur, &PREV);
2373		xfs_iext_next(ifp, icur);
2374		xfs_iext_update_extent(ip, state, icur, &RIGHT);
2375
2376		if (cur == NULL)
2377			rval = XFS_ILOG_DEXT;
2378		else {
2379			rval = 0;
2380			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2381			if (error)
2382				goto done;
2383			if (XFS_IS_CORRUPT(mp, i != 1)) {
2384				xfs_btree_mark_sick(cur);
2385				error = -EFSCORRUPTED;
2386				goto done;
2387			}
2388			error = xfs_bmbt_update(cur, &PREV);
2389			if (error)
2390				goto done;
2391			error = xfs_btree_increment(cur, 0, &i);
2392			if (error)
2393				goto done;
2394			error = xfs_bmbt_update(cur, &RIGHT);
2395			if (error)
2396				goto done;
2397		}
2398		break;
2399
2400	case BMAP_RIGHT_FILLING:
2401		/*
2402		 * Setting the last part of a previous oldext extent to newext.
2403		 * The right neighbor is not contiguous.
2404		 */
2405		old = PREV;
2406		PREV.br_blockcount -= new->br_blockcount;
2407
2408		xfs_iext_update_extent(ip, state, icur, &PREV);
2409		xfs_iext_next(ifp, icur);
2410		xfs_iext_insert(ip, icur, new, state);
2411		ifp->if_nextents++;
2412
2413		if (cur == NULL)
2414			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2415		else {
2416			rval = XFS_ILOG_CORE;
2417			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2418			if (error)
2419				goto done;
2420			if (XFS_IS_CORRUPT(mp, i != 1)) {
2421				xfs_btree_mark_sick(cur);
2422				error = -EFSCORRUPTED;
2423				goto done;
2424			}
2425			error = xfs_bmbt_update(cur, &PREV);
2426			if (error)
2427				goto done;
2428			error = xfs_bmbt_lookup_eq(cur, new, &i);
2429			if (error)
2430				goto done;
2431			if (XFS_IS_CORRUPT(mp, i != 0)) {
2432				xfs_btree_mark_sick(cur);
2433				error = -EFSCORRUPTED;
2434				goto done;
2435			}
2436			if ((error = xfs_btree_insert(cur, &i)))
2437				goto done;
2438			if (XFS_IS_CORRUPT(mp, i != 1)) {
2439				xfs_btree_mark_sick(cur);
2440				error = -EFSCORRUPTED;
2441				goto done;
2442			}
2443		}
2444		break;
2445
2446	case 0:
2447		/*
2448		 * Setting the middle part of a previous oldext extent to
2449		 * newext.  Contiguity is impossible here.
2450		 * One extent becomes three extents.
2451		 */
2452		old = PREV;
2453		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2454
2455		r[0] = *new;
2456		r[1].br_startoff = new_endoff;
2457		r[1].br_blockcount =
2458			old.br_startoff + old.br_blockcount - new_endoff;
2459		r[1].br_startblock = new->br_startblock + new->br_blockcount;
2460		r[1].br_state = PREV.br_state;
2461
2462		xfs_iext_update_extent(ip, state, icur, &PREV);
2463		xfs_iext_next(ifp, icur);
2464		xfs_iext_insert(ip, icur, &r[1], state);
2465		xfs_iext_insert(ip, icur, &r[0], state);
2466		ifp->if_nextents += 2;
2467
2468		if (cur == NULL)
2469			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2470		else {
2471			rval = XFS_ILOG_CORE;
2472			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2473			if (error)
2474				goto done;
2475			if (XFS_IS_CORRUPT(mp, i != 1)) {
2476				xfs_btree_mark_sick(cur);
2477				error = -EFSCORRUPTED;
2478				goto done;
2479			}
2480			/* new right extent - oldext */
2481			error = xfs_bmbt_update(cur, &r[1]);
2482			if (error)
2483				goto done;
2484			/* new left extent - oldext */
2485			cur->bc_rec.b = PREV;
2486			if ((error = xfs_btree_insert(cur, &i)))
2487				goto done;
2488			if (XFS_IS_CORRUPT(mp, i != 1)) {
2489				xfs_btree_mark_sick(cur);
2490				error = -EFSCORRUPTED;
2491				goto done;
2492			}
2493			/*
2494			 * Reset the cursor to the position of the new extent
2495			 * we are about to insert as we can't trust it after
2496			 * the previous insert.
2497			 */
2498			error = xfs_bmbt_lookup_eq(cur, new, &i);
2499			if (error)
2500				goto done;
2501			if (XFS_IS_CORRUPT(mp, i != 0)) {
2502				xfs_btree_mark_sick(cur);
2503				error = -EFSCORRUPTED;
2504				goto done;
2505			}
2506			/* new middle extent - newext */
2507			if ((error = xfs_btree_insert(cur, &i)))
2508				goto done;
2509			if (XFS_IS_CORRUPT(mp, i != 1)) {
2510				xfs_btree_mark_sick(cur);
2511				error = -EFSCORRUPTED;
2512				goto done;
2513			}
2514		}
2515		break;
2516
2517	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2518	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2519	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2520	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2521	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2522	case BMAP_LEFT_CONTIG:
2523	case BMAP_RIGHT_CONTIG:
2524		/*
2525		 * These cases are all impossible.
2526		 */
2527		ASSERT(0);
2528	}
2529
2530	/* update reverse mappings */
2531	xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
2532
2533	/* convert to a btree if necessary */
2534	if (xfs_bmap_needs_btree(ip, whichfork)) {
2535		int	tmp_logflags;	/* partial log flag return val */
2536
2537		ASSERT(cur == NULL);
2538		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
2539				&tmp_logflags, whichfork);
2540		*logflagsp |= tmp_logflags;
2541		if (error)
2542			goto done;
2543	}
2544
2545	/* clear out the allocated field, done with it now in any case. */
2546	if (cur) {
2547		cur->bc_bmap.allocated = 0;
2548		*curp = cur;
2549	}
2550
2551	xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2552done:
2553	*logflagsp |= rval;
2554	return error;
2555#undef	LEFT
2556#undef	RIGHT
2557#undef	PREV
2558}
2559
2560/*
2561 * Convert a hole to a delayed allocation.
2562 */
2563STATIC void
2564xfs_bmap_add_extent_hole_delay(
2565	xfs_inode_t		*ip,	/* incore inode pointer */
2566	int			whichfork,
2567	struct xfs_iext_cursor	*icur,
2568	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
2569{
2570	struct xfs_ifork	*ifp;	/* inode fork pointer */
2571	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2572	xfs_filblks_t		newlen=0;	/* new indirect size */
2573	xfs_filblks_t		oldlen=0;	/* old indirect size */
2574	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2575	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
2576	xfs_filblks_t		temp;	 /* temp for indirect calculations */
2577
2578	ifp = xfs_ifork_ptr(ip, whichfork);
2579	ASSERT(isnullstartblock(new->br_startblock));
2580
2581	/*
2582	 * Check and set flags if this segment has a left neighbor
2583	 */
2584	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2585		state |= BMAP_LEFT_VALID;
2586		if (isnullstartblock(left.br_startblock))
2587			state |= BMAP_LEFT_DELAY;
2588	}
2589
2590	/*
2591	 * Check and set flags if the current (right) segment exists.
2592	 * If it doesn't exist, we're converting the hole at end-of-file.
2593	 */
2594	if (xfs_iext_get_extent(ifp, icur, &right)) {
2595		state |= BMAP_RIGHT_VALID;
2596		if (isnullstartblock(right.br_startblock))
2597			state |= BMAP_RIGHT_DELAY;
2598	}
2599
2600	/*
2601	 * Set contiguity flags on the left and right neighbors.
2602	 * Don't let extents get too large, even if the pieces are contiguous.
2603	 */
2604	if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2605	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2606	    left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2607		state |= BMAP_LEFT_CONTIG;
2608
2609	if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2610	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2611	    new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2612	    (!(state & BMAP_LEFT_CONTIG) ||
2613	     (left.br_blockcount + new->br_blockcount +
2614	      right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)))
2615		state |= BMAP_RIGHT_CONTIG;
2616
2617	/*
2618	 * Switch out based on the contiguity flags.
2619	 */
2620	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2621	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2622		/*
2623		 * New allocation is contiguous with delayed allocations
2624		 * on the left and on the right.
2625		 * Merge all three into a single extent record.
2626		 */
2627		temp = left.br_blockcount + new->br_blockcount +
2628			right.br_blockcount;
2629
2630		oldlen = startblockval(left.br_startblock) +
2631			startblockval(new->br_startblock) +
2632			startblockval(right.br_startblock);
2633		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2634					 oldlen);
2635		left.br_startblock = nullstartblock(newlen);
2636		left.br_blockcount = temp;
2637
2638		xfs_iext_remove(ip, icur, state);
2639		xfs_iext_prev(ifp, icur);
2640		xfs_iext_update_extent(ip, state, icur, &left);
2641		break;
2642
2643	case BMAP_LEFT_CONTIG:
2644		/*
2645		 * New allocation is contiguous with a delayed allocation
2646		 * on the left.
2647		 * Merge the new allocation with the left neighbor.
2648		 */
2649		temp = left.br_blockcount + new->br_blockcount;
2650
2651		oldlen = startblockval(left.br_startblock) +
2652			startblockval(new->br_startblock);
2653		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2654					 oldlen);
2655		left.br_blockcount = temp;
2656		left.br_startblock = nullstartblock(newlen);
2657
2658		xfs_iext_prev(ifp, icur);
2659		xfs_iext_update_extent(ip, state, icur, &left);
2660		break;
2661
2662	case BMAP_RIGHT_CONTIG:
2663		/*
2664		 * New allocation is contiguous with a delayed allocation
2665		 * on the right.
2666		 * Merge the new allocation with the right neighbor.
2667		 */
2668		temp = new->br_blockcount + right.br_blockcount;
2669		oldlen = startblockval(new->br_startblock) +
2670			startblockval(right.br_startblock);
2671		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2672					 oldlen);
2673		right.br_startoff = new->br_startoff;
2674		right.br_startblock = nullstartblock(newlen);
2675		right.br_blockcount = temp;
2676		xfs_iext_update_extent(ip, state, icur, &right);
2677		break;
2678
2679	case 0:
2680		/*
2681		 * New allocation is not contiguous with another
2682		 * delayed allocation.
2683		 * Insert a new entry.
2684		 */
2685		oldlen = newlen = 0;
2686		xfs_iext_insert(ip, icur, new, state);
2687		break;
2688	}
2689	if (oldlen != newlen) {
2690		ASSERT(oldlen > newlen);
2691		xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2692				 false);
2693		/*
2694		 * Nothing to do for disk quota accounting here.
2695		 */
2696		xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
2697	}
2698}
2699
2700/*
2701 * Convert a hole to a real allocation.
2702 */
2703STATIC int				/* error */
2704xfs_bmap_add_extent_hole_real(
2705	struct xfs_trans	*tp,
2706	struct xfs_inode	*ip,
2707	int			whichfork,
2708	struct xfs_iext_cursor	*icur,
2709	struct xfs_btree_cur	**curp,
2710	struct xfs_bmbt_irec	*new,
2711	int			*logflagsp,
2712	uint32_t		flags)
2713{
2714	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
2715	struct xfs_mount	*mp = ip->i_mount;
2716	struct xfs_btree_cur	*cur = *curp;
2717	int			error;	/* error return value */
2718	int			i;	/* temp state */
2719	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2720	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2721	int			rval=0;	/* return value (logging flags) */
2722	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
2723	struct xfs_bmbt_irec	old;
2724
2725	ASSERT(!isnullstartblock(new->br_startblock));
2726	ASSERT(!cur || !(cur->bc_flags & XFS_BTREE_BMBT_WASDEL));
2727
2728	XFS_STATS_INC(mp, xs_add_exlist);
2729
2730	/*
2731	 * Check and set flags if this segment has a left neighbor.
2732	 */
2733	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2734		state |= BMAP_LEFT_VALID;
2735		if (isnullstartblock(left.br_startblock))
2736			state |= BMAP_LEFT_DELAY;
2737	}
2738
2739	/*
2740	 * Check and set flags if this segment has a current value.
2741	 * Not true if we're inserting into the "hole" at eof.
2742	 */
2743	if (xfs_iext_get_extent(ifp, icur, &right)) {
2744		state |= BMAP_RIGHT_VALID;
2745		if (isnullstartblock(right.br_startblock))
2746			state |= BMAP_RIGHT_DELAY;
2747	}
2748
2749	/*
2750	 * We're inserting a real allocation between "left" and "right".
2751	 * Set the contiguity flags.  Don't let extents get too large.
2752	 */
2753	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2754	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2755	    left.br_startblock + left.br_blockcount == new->br_startblock &&
2756	    left.br_state == new->br_state &&
2757	    left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2758		state |= BMAP_LEFT_CONTIG;
2759
2760	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2761	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2762	    new->br_startblock + new->br_blockcount == right.br_startblock &&
2763	    new->br_state == right.br_state &&
2764	    new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2765	    (!(state & BMAP_LEFT_CONTIG) ||
2766	     left.br_blockcount + new->br_blockcount +
2767	     right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))
2768		state |= BMAP_RIGHT_CONTIG;
2769
2770	error = 0;
2771	/*
2772	 * Select which case we're in here, and implement it.
2773	 */
2774	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2775	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2776		/*
2777		 * New allocation is contiguous with real allocations on the
2778		 * left and on the right.
2779		 * Merge all three into a single extent record.
2780		 */
2781		left.br_blockcount += new->br_blockcount + right.br_blockcount;
2782
2783		xfs_iext_remove(ip, icur, state);
2784		xfs_iext_prev(ifp, icur);
2785		xfs_iext_update_extent(ip, state, icur, &left);
2786		ifp->if_nextents--;
2787
2788		if (cur == NULL) {
2789			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2790		} else {
2791			rval = XFS_ILOG_CORE;
2792			error = xfs_bmbt_lookup_eq(cur, &right, &i);
2793			if (error)
2794				goto done;
2795			if (XFS_IS_CORRUPT(mp, i != 1)) {
2796				xfs_btree_mark_sick(cur);
2797				error = -EFSCORRUPTED;
2798				goto done;
2799			}
2800			error = xfs_btree_delete(cur, &i);
2801			if (error)
2802				goto done;
2803			if (XFS_IS_CORRUPT(mp, i != 1)) {
2804				xfs_btree_mark_sick(cur);
2805				error = -EFSCORRUPTED;
2806				goto done;
2807			}
2808			error = xfs_btree_decrement(cur, 0, &i);
2809			if (error)
2810				goto done;
2811			if (XFS_IS_CORRUPT(mp, i != 1)) {
2812				xfs_btree_mark_sick(cur);
2813				error = -EFSCORRUPTED;
2814				goto done;
2815			}
2816			error = xfs_bmbt_update(cur, &left);
2817			if (error)
2818				goto done;
2819		}
2820		break;
2821
2822	case BMAP_LEFT_CONTIG:
2823		/*
2824		 * New allocation is contiguous with a real allocation
2825		 * on the left.
2826		 * Merge the new allocation with the left neighbor.
2827		 */
2828		old = left;
2829		left.br_blockcount += new->br_blockcount;
2830
2831		xfs_iext_prev(ifp, icur);
2832		xfs_iext_update_extent(ip, state, icur, &left);
2833
2834		if (cur == NULL) {
2835			rval = xfs_ilog_fext(whichfork);
2836		} else {
2837			rval = 0;
2838			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2839			if (error)
2840				goto done;
2841			if (XFS_IS_CORRUPT(mp, i != 1)) {
2842				xfs_btree_mark_sick(cur);
2843				error = -EFSCORRUPTED;
2844				goto done;
2845			}
2846			error = xfs_bmbt_update(cur, &left);
2847			if (error)
2848				goto done;
2849		}
2850		break;
2851
2852	case BMAP_RIGHT_CONTIG:
2853		/*
2854		 * New allocation is contiguous with a real allocation
2855		 * on the right.
2856		 * Merge the new allocation with the right neighbor.
2857		 */
2858		old = right;
2859
2860		right.br_startoff = new->br_startoff;
2861		right.br_startblock = new->br_startblock;
2862		right.br_blockcount += new->br_blockcount;
2863		xfs_iext_update_extent(ip, state, icur, &right);
2864
2865		if (cur == NULL) {
2866			rval = xfs_ilog_fext(whichfork);
2867		} else {
2868			rval = 0;
2869			error = xfs_bmbt_lookup_eq(cur, &old, &i);
2870			if (error)
2871				goto done;
2872			if (XFS_IS_CORRUPT(mp, i != 1)) {
2873				xfs_btree_mark_sick(cur);
2874				error = -EFSCORRUPTED;
2875				goto done;
2876			}
2877			error = xfs_bmbt_update(cur, &right);
2878			if (error)
2879				goto done;
2880		}
2881		break;
2882
2883	case 0:
2884		/*
2885		 * New allocation is not contiguous with another
2886		 * real allocation.
2887		 * Insert a new entry.
2888		 */
2889		xfs_iext_insert(ip, icur, new, state);
2890		ifp->if_nextents++;
2891
2892		if (cur == NULL) {
2893			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2894		} else {
2895			rval = XFS_ILOG_CORE;
2896			error = xfs_bmbt_lookup_eq(cur, new, &i);
2897			if (error)
2898				goto done;
2899			if (XFS_IS_CORRUPT(mp, i != 0)) {
2900				xfs_btree_mark_sick(cur);
2901				error = -EFSCORRUPTED;
2902				goto done;
2903			}
2904			error = xfs_btree_insert(cur, &i);
2905			if (error)
2906				goto done;
2907			if (XFS_IS_CORRUPT(mp, i != 1)) {
2908				xfs_btree_mark_sick(cur);
2909				error = -EFSCORRUPTED;
2910				goto done;
2911			}
2912		}
2913		break;
2914	}
2915
2916	/* add reverse mapping unless caller opted out */
2917	if (!(flags & XFS_BMAPI_NORMAP))
2918		xfs_rmap_map_extent(tp, ip, whichfork, new);
2919
2920	/* convert to a btree if necessary */
2921	if (xfs_bmap_needs_btree(ip, whichfork)) {
2922		int	tmp_logflags;	/* partial log flag return val */
2923
2924		ASSERT(cur == NULL);
2925		error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
2926				&tmp_logflags, whichfork);
2927		*logflagsp |= tmp_logflags;
2928		cur = *curp;
2929		if (error)
2930			goto done;
2931	}
2932
2933	/* clear out the allocated field, done with it now in any case. */
2934	if (cur)
2935		cur->bc_bmap.allocated = 0;
2936
2937	xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2938done:
2939	*logflagsp |= rval;
2940	return error;
2941}
2942
2943/*
2944 * Functions used in the extent read, allocate and remove paths
2945 */
2946
2947/*
2948 * Adjust the size of the new extent based on i_extsize and rt extsize.
2949 */
2950int
2951xfs_bmap_extsize_align(
2952	xfs_mount_t	*mp,
2953	xfs_bmbt_irec_t	*gotp,		/* next extent pointer */
2954	xfs_bmbt_irec_t	*prevp,		/* previous extent pointer */
2955	xfs_extlen_t	extsz,		/* align to this extent size */
2956	int		rt,		/* is this a realtime inode? */
2957	int		eof,		/* is extent at end-of-file? */
2958	int		delay,		/* creating delalloc extent? */
2959	int		convert,	/* overwriting unwritten extent? */
2960	xfs_fileoff_t	*offp,		/* in/out: aligned offset */
2961	xfs_extlen_t	*lenp)		/* in/out: aligned length */
2962{
2963	xfs_fileoff_t	orig_off;	/* original offset */
2964	xfs_extlen_t	orig_alen;	/* original length */
2965	xfs_fileoff_t	orig_end;	/* original off+len */
2966	xfs_fileoff_t	nexto;		/* next file offset */
2967	xfs_fileoff_t	prevo;		/* previous file offset */
2968	xfs_fileoff_t	align_off;	/* temp for offset */
2969	xfs_extlen_t	align_alen;	/* temp for length */
2970	xfs_extlen_t	temp;		/* temp for calculations */
2971
2972	if (convert)
2973		return 0;
2974
2975	orig_off = align_off = *offp;
2976	orig_alen = align_alen = *lenp;
2977	orig_end = orig_off + orig_alen;
2978
2979	/*
2980	 * If this request overlaps an existing extent, then don't
2981	 * attempt to perform any additional alignment.
2982	 */
2983	if (!delay && !eof &&
2984	    (orig_off >= gotp->br_startoff) &&
2985	    (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2986		return 0;
2987	}
2988
2989	/*
2990	 * If the file offset is unaligned vs. the extent size
2991	 * we need to align it.  This will be possible unless
2992	 * the file was previously written with a kernel that didn't
2993	 * perform this alignment, or if a truncate shot us in the
2994	 * foot.
2995	 */
2996	div_u64_rem(orig_off, extsz, &temp);
2997	if (temp) {
2998		align_alen += temp;
2999		align_off -= temp;
3000	}
3001
3002	/* Same adjustment for the end of the requested area. */
3003	temp = (align_alen % extsz);
3004	if (temp)
3005		align_alen += extsz - temp;
3006
3007	/*
3008	 * For large extent hint sizes, the aligned extent might be larger than
3009	 * XFS_BMBT_MAX_EXTLEN. In that case, reduce the size by an extsz so
3010	 * that it pulls the length back under XFS_BMBT_MAX_EXTLEN. The outer
3011	 * allocation loops handle short allocation just fine, so it is safe to
3012	 * do this. We only want to do it when we are forced to, though, because
3013	 * it means more allocation operations are required.
3014	 */
3015	while (align_alen > XFS_MAX_BMBT_EXTLEN)
3016		align_alen -= extsz;
3017	ASSERT(align_alen <= XFS_MAX_BMBT_EXTLEN);
3018
3019	/*
3020	 * If the previous block overlaps with this proposed allocation
3021	 * then move the start forward without adjusting the length.
3022	 */
3023	if (prevp->br_startoff != NULLFILEOFF) {
3024		if (prevp->br_startblock == HOLESTARTBLOCK)
3025			prevo = prevp->br_startoff;
3026		else
3027			prevo = prevp->br_startoff + prevp->br_blockcount;
3028	} else
3029		prevo = 0;
3030	if (align_off != orig_off && align_off < prevo)
3031		align_off = prevo;
3032	/*
3033	 * If the next block overlaps with this proposed allocation
3034	 * then move the start back without adjusting the length,
3035	 * but not before offset 0.
3036	 * This may of course make the start overlap previous block,
3037	 * and if we hit the offset 0 limit then the next block
3038	 * can still overlap too.
3039	 */
3040	if (!eof && gotp->br_startoff != NULLFILEOFF) {
3041		if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3042		    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3043			nexto = gotp->br_startoff + gotp->br_blockcount;
3044		else
3045			nexto = gotp->br_startoff;
3046	} else
3047		nexto = NULLFILEOFF;
3048	if (!eof &&
3049	    align_off + align_alen != orig_end &&
3050	    align_off + align_alen > nexto)
3051		align_off = nexto > align_alen ? nexto - align_alen : 0;
3052	/*
3053	 * If we're now overlapping the next or previous extent that
3054	 * means we can't fit an extsz piece in this hole.  Just move
3055	 * the start forward to the first valid spot and set
3056	 * the length so we hit the end.
3057	 */
3058	if (align_off != orig_off && align_off < prevo)
3059		align_off = prevo;
3060	if (align_off + align_alen != orig_end &&
3061	    align_off + align_alen > nexto &&
3062	    nexto != NULLFILEOFF) {
3063		ASSERT(nexto > prevo);
3064		align_alen = nexto - align_off;
3065	}
3066
3067	/*
3068	 * If realtime, and the result isn't a multiple of the realtime
3069	 * extent size we need to remove blocks until it is.
3070	 */
3071	if (rt && (temp = xfs_extlen_to_rtxmod(mp, align_alen))) {
3072		/*
3073		 * We're not covering the original request, or
3074		 * we won't be able to once we fix the length.
3075		 */
3076		if (orig_off < align_off ||
3077		    orig_end > align_off + align_alen ||
3078		    align_alen - temp < orig_alen)
3079			return -EINVAL;
3080		/*
3081		 * Try to fix it by moving the start up.
3082		 */
3083		if (align_off + temp <= orig_off) {
3084			align_alen -= temp;
3085			align_off += temp;
3086		}
3087		/*
3088		 * Try to fix it by moving the end in.
3089		 */
3090		else if (align_off + align_alen - temp >= orig_end)
3091			align_alen -= temp;
3092		/*
3093		 * Set the start to the minimum then trim the length.
3094		 */
3095		else {
3096			align_alen -= orig_off - align_off;
3097			align_off = orig_off;
3098			align_alen -= xfs_extlen_to_rtxmod(mp, align_alen);
3099		}
3100		/*
3101		 * Result doesn't cover the request, fail it.
3102		 */
3103		if (orig_off < align_off || orig_end > align_off + align_alen)
3104			return -EINVAL;
3105	} else {
3106		ASSERT(orig_off >= align_off);
3107		/* see XFS_BMBT_MAX_EXTLEN handling above */
3108		ASSERT(orig_end <= align_off + align_alen ||
3109		       align_alen + extsz > XFS_MAX_BMBT_EXTLEN);
3110	}
3111
3112#ifdef DEBUG
3113	if (!eof && gotp->br_startoff != NULLFILEOFF)
3114		ASSERT(align_off + align_alen <= gotp->br_startoff);
3115	if (prevp->br_startoff != NULLFILEOFF)
3116		ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3117#endif
3118
3119	*lenp = align_alen;
3120	*offp = align_off;
3121	return 0;
3122}
3123
3124#define XFS_ALLOC_GAP_UNITS	4
3125
3126/* returns true if ap->blkno was modified */
3127bool
3128xfs_bmap_adjacent(
3129	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
3130{
3131	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
3132	xfs_mount_t	*mp;		/* mount point structure */
3133	int		rt;		/* true if inode is realtime */
3134
3135#define	ISVALID(x,y)	\
3136	(rt ? \
3137		(x) < mp->m_sb.sb_rblocks : \
3138		XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3139		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3140		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3141
3142	mp = ap->ip->i_mount;
3143	rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3144		(ap->datatype & XFS_ALLOC_USERDATA);
3145	/*
3146	 * If allocating at eof, and there's a previous real block,
3147	 * try to use its last block as our starting point.
3148	 */
3149	if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3150	    !isnullstartblock(ap->prev.br_startblock) &&
3151	    ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3152		    ap->prev.br_startblock)) {
3153		ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3154		/*
3155		 * Adjust for the gap between prevp and us.
3156		 */
3157		adjust = ap->offset -
3158			(ap->prev.br_startoff + ap->prev.br_blockcount);
3159		if (adjust &&
3160		    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3161			ap->blkno += adjust;
3162		return true;
3163	}
3164	/*
3165	 * If not at eof, then compare the two neighbor blocks.
3166	 * Figure out whether either one gives us a good starting point,
3167	 * and pick the better one.
3168	 */
3169	if (!ap->eof) {
3170		xfs_fsblock_t	gotbno;		/* right side block number */
3171		xfs_fsblock_t	gotdiff=0;	/* right side difference */
3172		xfs_fsblock_t	prevbno;	/* left side block number */
3173		xfs_fsblock_t	prevdiff=0;	/* left side difference */
3174
3175		/*
3176		 * If there's a previous (left) block, select a requested
3177		 * start block based on it.
3178		 */
3179		if (ap->prev.br_startoff != NULLFILEOFF &&
3180		    !isnullstartblock(ap->prev.br_startblock) &&
3181		    (prevbno = ap->prev.br_startblock +
3182			       ap->prev.br_blockcount) &&
3183		    ISVALID(prevbno, ap->prev.br_startblock)) {
3184			/*
3185			 * Calculate gap to end of previous block.
3186			 */
3187			adjust = prevdiff = ap->offset -
3188				(ap->prev.br_startoff +
3189				 ap->prev.br_blockcount);
3190			/*
3191			 * Figure the startblock based on the previous block's
3192			 * end and the gap size.
3193			 * Heuristic!
3194			 * If the gap is large relative to the piece we're
3195			 * allocating, or using it gives us an invalid block
3196			 * number, then just use the end of the previous block.
3197			 */
3198			if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3199			    ISVALID(prevbno + prevdiff,
3200				    ap->prev.br_startblock))
3201				prevbno += adjust;
3202			else
3203				prevdiff += adjust;
3204		}
3205		/*
3206		 * No previous block or can't follow it, just default.
3207		 */
3208		else
3209			prevbno = NULLFSBLOCK;
3210		/*
3211		 * If there's a following (right) block, select a requested
3212		 * start block based on it.
3213		 */
3214		if (!isnullstartblock(ap->got.br_startblock)) {
3215			/*
3216			 * Calculate gap to start of next block.
3217			 */
3218			adjust = gotdiff = ap->got.br_startoff - ap->offset;
3219			/*
3220			 * Figure the startblock based on the next block's
3221			 * start and the gap size.
3222			 */
3223			gotbno = ap->got.br_startblock;
3224			/*
3225			 * Heuristic!
3226			 * If the gap is large relative to the piece we're
3227			 * allocating, or using it gives us an invalid block
3228			 * number, then just use the start of the next block
3229			 * offset by our length.
3230			 */
3231			if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3232			    ISVALID(gotbno - gotdiff, gotbno))
3233				gotbno -= adjust;
3234			else if (ISVALID(gotbno - ap->length, gotbno)) {
3235				gotbno -= ap->length;
3236				gotdiff += adjust - ap->length;
3237			} else
3238				gotdiff += adjust;
3239		}
3240		/*
3241		 * No next block, just default.
3242		 */
3243		else
3244			gotbno = NULLFSBLOCK;
3245		/*
3246		 * If both valid, pick the better one, else the only good
3247		 * one, else ap->blkno is already set (to 0 or the inode block).
3248		 */
3249		if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) {
3250			ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3251			return true;
3252		}
3253		if (prevbno != NULLFSBLOCK) {
3254			ap->blkno = prevbno;
3255			return true;
3256		}
3257		if (gotbno != NULLFSBLOCK) {
3258			ap->blkno = gotbno;
3259			return true;
3260		}
3261	}
3262#undef ISVALID
3263	return false;
3264}
3265
3266int
3267xfs_bmap_longest_free_extent(
3268	struct xfs_perag	*pag,
3269	struct xfs_trans	*tp,
3270	xfs_extlen_t		*blen)
3271{
3272	xfs_extlen_t		longest;
3273	int			error = 0;
3274
3275	if (!xfs_perag_initialised_agf(pag)) {
3276		error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_TRYLOCK,
3277				NULL);
3278		if (error)
3279			return error;
3280	}
3281
3282	longest = xfs_alloc_longest_free_extent(pag,
3283				xfs_alloc_min_freelist(pag->pag_mount, pag),
3284				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3285	if (*blen < longest)
3286		*blen = longest;
3287
3288	return 0;
3289}
3290
3291static xfs_extlen_t
3292xfs_bmap_select_minlen(
3293	struct xfs_bmalloca	*ap,
3294	struct xfs_alloc_arg	*args,
3295	xfs_extlen_t		blen)
3296{
3297
3298	/*
3299	 * Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is
3300	 * possible that there is enough contiguous free space for this request.
3301	 */
3302	if (blen < ap->minlen)
3303		return ap->minlen;
3304
3305	/*
3306	 * If the best seen length is less than the request length,
3307	 * use the best as the minimum, otherwise we've got the maxlen we
3308	 * were asked for.
3309	 */
3310	if (blen < args->maxlen)
3311		return blen;
3312	return args->maxlen;
3313}
3314
3315static int
3316xfs_bmap_btalloc_select_lengths(
3317	struct xfs_bmalloca	*ap,
3318	struct xfs_alloc_arg	*args,
3319	xfs_extlen_t		*blen)
3320{
3321	struct xfs_mount	*mp = args->mp;
3322	struct xfs_perag	*pag;
3323	xfs_agnumber_t		agno, startag;
3324	int			error = 0;
3325
3326	if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3327		args->total = ap->minlen;
3328		args->minlen = ap->minlen;
3329		return 0;
3330	}
3331
3332	args->total = ap->total;
3333	startag = XFS_FSB_TO_AGNO(mp, ap->blkno);
3334	if (startag == NULLAGNUMBER)
3335		startag = 0;
3336
3337	*blen = 0;
3338	for_each_perag_wrap(mp, startag, agno, pag) {
3339		error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
3340		if (error && error != -EAGAIN)
3341			break;
3342		error = 0;
3343		if (*blen >= args->maxlen)
3344			break;
3345	}
3346	if (pag)
3347		xfs_perag_rele(pag);
3348
3349	args->minlen = xfs_bmap_select_minlen(ap, args, *blen);
3350	return error;
3351}
3352
3353/* Update all inode and quota accounting for the allocation we just did. */
3354void
3355xfs_bmap_alloc_account(
3356	struct xfs_bmalloca	*ap)
3357{
3358	bool			isrt = XFS_IS_REALTIME_INODE(ap->ip) &&
3359					!(ap->flags & XFS_BMAPI_ATTRFORK);
3360	uint			fld;
3361
3362	if (ap->flags & XFS_BMAPI_COWFORK) {
3363		/*
3364		 * COW fork blocks are in-core only and thus are treated as
3365		 * in-core quota reservation (like delalloc blocks) even when
3366		 * converted to real blocks. The quota reservation is not
3367		 * accounted to disk until blocks are remapped to the data
3368		 * fork. So if these blocks were previously delalloc, we
3369		 * already have quota reservation and there's nothing to do
3370		 * yet.
3371		 */
3372		if (ap->wasdel) {
3373			xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length);
3374			return;
3375		}
3376
3377		/*
3378		 * Otherwise, we've allocated blocks in a hole. The transaction
3379		 * has acquired in-core quota reservation for this extent.
3380		 * Rather than account these as real blocks, however, we reduce
3381		 * the transaction quota reservation based on the allocation.
3382		 * This essentially transfers the transaction quota reservation
3383		 * to that of a delalloc extent.
3384		 */
3385		ap->ip->i_delayed_blks += ap->length;
3386		xfs_trans_mod_dquot_byino(ap->tp, ap->ip, isrt ?
3387				XFS_TRANS_DQ_RES_RTBLKS : XFS_TRANS_DQ_RES_BLKS,
3388				-(long)ap->length);
3389		return;
3390	}
3391
3392	/* data/attr fork only */
3393	ap->ip->i_nblocks += ap->length;
3394	xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3395	if (ap->wasdel) {
3396		ap->ip->i_delayed_blks -= ap->length;
3397		xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length);
3398		fld = isrt ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_DELBCOUNT;
3399	} else {
3400		fld = isrt ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
3401	}
3402
3403	xfs_trans_mod_dquot_byino(ap->tp, ap->ip, fld, ap->length);
3404}
3405
3406static int
3407xfs_bmap_compute_alignments(
3408	struct xfs_bmalloca	*ap,
3409	struct xfs_alloc_arg	*args)
3410{
3411	struct xfs_mount	*mp = args->mp;
3412	xfs_extlen_t		align = 0; /* minimum allocation alignment */
3413	int			stripe_align = 0;
3414
3415	/* stripe alignment for allocation is determined by mount parameters */
3416	if (mp->m_swidth && xfs_has_swalloc(mp))
3417		stripe_align = mp->m_swidth;
3418	else if (mp->m_dalign)
3419		stripe_align = mp->m_dalign;
3420
3421	if (ap->flags & XFS_BMAPI_COWFORK)
3422		align = xfs_get_cowextsz_hint(ap->ip);
3423	else if (ap->datatype & XFS_ALLOC_USERDATA)
3424		align = xfs_get_extsz_hint(ap->ip);
3425	if (align) {
3426		if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
3427					ap->eof, 0, ap->conv, &ap->offset,
3428					&ap->length))
3429			ASSERT(0);
3430		ASSERT(ap->length);
3431	}
3432
3433	/* apply extent size hints if obtained earlier */
3434	if (align) {
3435		args->prod = align;
3436		div_u64_rem(ap->offset, args->prod, &args->mod);
3437		if (args->mod)
3438			args->mod = args->prod - args->mod;
3439	} else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3440		args->prod = 1;
3441		args->mod = 0;
3442	} else {
3443		args->prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3444		div_u64_rem(ap->offset, args->prod, &args->mod);
3445		if (args->mod)
3446			args->mod = args->prod - args->mod;
3447	}
3448
3449	return stripe_align;
3450}
3451
3452static void
3453xfs_bmap_process_allocated_extent(
3454	struct xfs_bmalloca	*ap,
3455	struct xfs_alloc_arg	*args,
3456	xfs_fileoff_t		orig_offset,
3457	xfs_extlen_t		orig_length)
3458{
3459	ap->blkno = args->fsbno;
3460	ap->length = args->len;
3461	/*
3462	 * If the extent size hint is active, we tried to round the
3463	 * caller's allocation request offset down to extsz and the
3464	 * length up to another extsz boundary.  If we found a free
3465	 * extent we mapped it in starting at this new offset.  If the
3466	 * newly mapped space isn't long enough to cover any of the
3467	 * range of offsets that was originally requested, move the
3468	 * mapping up so that we can fill as much of the caller's
3469	 * original request as possible.  Free space is apparently
3470	 * very fragmented so we're unlikely to be able to satisfy the
3471	 * hints anyway.
3472	 */
3473	if (ap->length <= orig_length)
3474		ap->offset = orig_offset;
3475	else if (ap->offset + ap->length < orig_offset + orig_length)
3476		ap->offset = orig_offset + orig_length - ap->length;
3477	xfs_bmap_alloc_account(ap);
3478}
3479
3480#ifdef DEBUG
3481static int
3482xfs_bmap_exact_minlen_extent_alloc(
3483	struct xfs_bmalloca	*ap)
3484{
3485	struct xfs_mount	*mp = ap->ip->i_mount;
3486	struct xfs_alloc_arg	args = { .tp = ap->tp, .mp = mp };
3487	xfs_fileoff_t		orig_offset;
3488	xfs_extlen_t		orig_length;
3489	int			error;
3490
3491	ASSERT(ap->length);
3492
3493	if (ap->minlen != 1) {
3494		ap->blkno = NULLFSBLOCK;
3495		ap->length = 0;
3496		return 0;
3497	}
3498
3499	orig_offset = ap->offset;
3500	orig_length = ap->length;
3501
3502	args.alloc_minlen_only = 1;
3503
3504	xfs_bmap_compute_alignments(ap, &args);
3505
3506	/*
3507	 * Unlike the longest extent available in an AG, we don't track
3508	 * the length of an AG's shortest extent.
3509	 * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and
3510	 * hence we can afford to start traversing from the 0th AG since
3511	 * we need not be concerned about a drop in performance in
3512	 * "debug only" code paths.
3513	 */
3514	ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
3515
3516	args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
3517	args.minlen = args.maxlen = ap->minlen;
3518	args.total = ap->total;
3519
3520	args.alignment = 1;
3521	args.minalignslop = 0;
3522
3523	args.minleft = ap->minleft;
3524	args.wasdel = ap->wasdel;
3525	args.resv = XFS_AG_RESV_NONE;
3526	args.datatype = ap->datatype;
3527
3528	error = xfs_alloc_vextent_first_ag(&args, ap->blkno);
3529	if (error)
3530		return error;
3531
3532	if (args.fsbno != NULLFSBLOCK) {
3533		xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
3534			orig_length);
3535	} else {
3536		ap->blkno = NULLFSBLOCK;
3537		ap->length = 0;
3538	}
3539
3540	return 0;
3541}
3542#else
3543
3544#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED)
3545
3546#endif
3547
3548/*
3549 * If we are not low on available data blocks and we are allocating at
3550 * EOF, optimise allocation for contiguous file extension and/or stripe
3551 * alignment of the new extent.
3552 *
3553 * NOTE: ap->aeof is only set if the allocation length is >= the
3554 * stripe unit and the allocation offset is at the end of file.
3555 */
3556static int
3557xfs_bmap_btalloc_at_eof(
3558	struct xfs_bmalloca	*ap,
3559	struct xfs_alloc_arg	*args,
3560	xfs_extlen_t		blen,
3561	int			stripe_align,
3562	bool			ag_only)
3563{
3564	struct xfs_mount	*mp = args->mp;
3565	struct xfs_perag	*caller_pag = args->pag;
3566	int			error;
3567
3568	/*
3569	 * If there are already extents in the file, try an exact EOF block
3570	 * allocation to extend the file as a contiguous extent. If that fails,
3571	 * or it's the first allocation in a file, just try for a stripe aligned
3572	 * allocation.
3573	 */
3574	if (ap->offset) {
3575		xfs_extlen_t	nextminlen = 0;
3576
3577		/*
3578		 * Compute the minlen+alignment for the next case.  Set slop so
3579		 * that the value of minlen+alignment+slop doesn't go up between
3580		 * the calls.
3581		 */
3582		args->alignment = 1;
3583		if (blen > stripe_align && blen <= args->maxlen)
3584			nextminlen = blen - stripe_align;
3585		else
3586			nextminlen = args->minlen;
3587		if (nextminlen + stripe_align > args->minlen + 1)
3588			args->minalignslop = nextminlen + stripe_align -
3589					args->minlen - 1;
3590		else
3591			args->minalignslop = 0;
3592
3593		if (!caller_pag)
3594			args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
3595		error = xfs_alloc_vextent_exact_bno(args, ap->blkno);
3596		if (!caller_pag) {
3597			xfs_perag_put(args->pag);
3598			args->pag = NULL;
3599		}
3600		if (error)
3601			return error;
3602
3603		if (args->fsbno != NULLFSBLOCK)
3604			return 0;
3605		/*
3606		 * Exact allocation failed. Reset to try an aligned allocation
3607		 * according to the original allocation specification.
3608		 */
3609		args->alignment = stripe_align;
3610		args->minlen = nextminlen;
3611		args->minalignslop = 0;
3612	} else {
3613		/*
3614		 * Adjust minlen to try and preserve alignment if we
3615		 * can't guarantee an aligned maxlen extent.
3616		 */
3617		args->alignment = stripe_align;
3618		if (blen > args->alignment &&
3619		    blen <= args->maxlen + args->alignment)
3620			args->minlen = blen - args->alignment;
3621		args->minalignslop = 0;
3622	}
3623
3624	if (ag_only) {
3625		error = xfs_alloc_vextent_near_bno(args, ap->blkno);
3626	} else {
3627		args->pag = NULL;
3628		error = xfs_alloc_vextent_start_ag(args, ap->blkno);
3629		ASSERT(args->pag == NULL);
3630		args->pag = caller_pag;
3631	}
3632	if (error)
3633		return error;
3634
3635	if (args->fsbno != NULLFSBLOCK)
3636		return 0;
3637
3638	/*
3639	 * Allocation failed, so turn return the allocation args to their
3640	 * original non-aligned state so the caller can proceed on allocation
3641	 * failure as if this function was never called.
3642	 */
3643	args->alignment = 1;
3644	return 0;
3645}
3646
3647/*
3648 * We have failed multiple allocation attempts so now are in a low space
3649 * allocation situation. Try a locality first full filesystem minimum length
3650 * allocation whilst still maintaining necessary total block reservation
3651 * requirements.
3652 *
3653 * If that fails, we are now critically low on space, so perform a last resort
3654 * allocation attempt: no reserve, no locality, blocking, minimum length, full
3655 * filesystem free space scan. We also indicate to future allocations in this
3656 * transaction that we are critically low on space so they don't waste time on
3657 * allocation modes that are unlikely to succeed.
3658 */
3659int
3660xfs_bmap_btalloc_low_space(
3661	struct xfs_bmalloca	*ap,
3662	struct xfs_alloc_arg	*args)
3663{
3664	int			error;
3665
3666	if (args->minlen > ap->minlen) {
3667		args->minlen = ap->minlen;
3668		error = xfs_alloc_vextent_start_ag(args, ap->blkno);
3669		if (error || args->fsbno != NULLFSBLOCK)
3670			return error;
3671	}
3672
3673	/* Last ditch attempt before failure is declared. */
3674	args->total = ap->minlen;
3675	error = xfs_alloc_vextent_first_ag(args, 0);
3676	if (error)
3677		return error;
3678	ap->tp->t_flags |= XFS_TRANS_LOWMODE;
3679	return 0;
3680}
3681
3682static int
3683xfs_bmap_btalloc_filestreams(
3684	struct xfs_bmalloca	*ap,
3685	struct xfs_alloc_arg	*args,
3686	int			stripe_align)
3687{
3688	xfs_extlen_t		blen = 0;
3689	int			error = 0;
3690
3691
3692	error = xfs_filestream_select_ag(ap, args, &blen);
3693	if (error)
3694		return error;
3695	ASSERT(args->pag);
3696
3697	/*
3698	 * If we are in low space mode, then optimal allocation will fail so
3699	 * prepare for minimal allocation and jump to the low space algorithm
3700	 * immediately.
3701	 */
3702	if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3703		args->minlen = ap->minlen;
3704		ASSERT(args->fsbno == NULLFSBLOCK);
3705		goto out_low_space;
3706	}
3707
3708	args->minlen = xfs_bmap_select_minlen(ap, args, blen);
3709	if (ap->aeof)
3710		error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
3711				true);
3712
3713	if (!error && args->fsbno == NULLFSBLOCK)
3714		error = xfs_alloc_vextent_near_bno(args, ap->blkno);
3715
3716out_low_space:
3717	/*
3718	 * We are now done with the perag reference for the filestreams
3719	 * association provided by xfs_filestream_select_ag(). Release it now as
3720	 * we've either succeeded, had a fatal error or we are out of space and
3721	 * need to do a full filesystem scan for free space which will take it's
3722	 * own references.
3723	 */
3724	xfs_perag_rele(args->pag);
3725	args->pag = NULL;
3726	if (error || args->fsbno != NULLFSBLOCK)
3727		return error;
3728
3729	return xfs_bmap_btalloc_low_space(ap, args);
3730}
3731
3732static int
3733xfs_bmap_btalloc_best_length(
3734	struct xfs_bmalloca	*ap,
3735	struct xfs_alloc_arg	*args,
3736	int			stripe_align)
3737{
3738	xfs_extlen_t		blen = 0;
3739	int			error;
3740
3741	ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino);
3742	xfs_bmap_adjacent(ap);
3743
3744	/*
3745	 * Search for an allocation group with a single extent large enough for
3746	 * the request.  If one isn't found, then adjust the minimum allocation
3747	 * size to the largest space found.
3748	 */
3749	error = xfs_bmap_btalloc_select_lengths(ap, args, &blen);
3750	if (error)
3751		return error;
3752
3753	/*
3754	 * Don't attempt optimal EOF allocation if previous allocations barely
3755	 * succeeded due to being near ENOSPC. It is highly unlikely we'll get
3756	 * optimal or even aligned allocations in this case, so don't waste time
3757	 * trying.
3758	 */
3759	if (ap->aeof && !(ap->tp->t_flags & XFS_TRANS_LOWMODE)) {
3760		error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
3761				false);
3762		if (error || args->fsbno != NULLFSBLOCK)
3763			return error;
3764	}
3765
3766	error = xfs_alloc_vextent_start_ag(args, ap->blkno);
3767	if (error || args->fsbno != NULLFSBLOCK)
3768		return error;
3769
3770	return xfs_bmap_btalloc_low_space(ap, args);
3771}
3772
3773static int
3774xfs_bmap_btalloc(
3775	struct xfs_bmalloca	*ap)
3776{
3777	struct xfs_mount	*mp = ap->ip->i_mount;
3778	struct xfs_alloc_arg	args = {
3779		.tp		= ap->tp,
3780		.mp		= mp,
3781		.fsbno		= NULLFSBLOCK,
3782		.oinfo		= XFS_RMAP_OINFO_SKIP_UPDATE,
3783		.minleft	= ap->minleft,
3784		.wasdel		= ap->wasdel,
3785		.resv		= XFS_AG_RESV_NONE,
3786		.datatype	= ap->datatype,
3787		.alignment	= 1,
3788		.minalignslop	= 0,
3789	};
3790	xfs_fileoff_t		orig_offset;
3791	xfs_extlen_t		orig_length;
3792	int			error;
3793	int			stripe_align;
3794
3795	ASSERT(ap->length);
3796	orig_offset = ap->offset;
3797	orig_length = ap->length;
3798
3799	stripe_align = xfs_bmap_compute_alignments(ap, &args);
3800
3801	/* Trim the allocation back to the maximum an AG can fit. */
3802	args.maxlen = min(ap->length, mp->m_ag_max_usable);
3803
3804	if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3805	    xfs_inode_is_filestream(ap->ip))
3806		error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
3807	else
3808		error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
3809	if (error)
3810		return error;
3811
3812	if (args.fsbno != NULLFSBLOCK) {
3813		xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
3814			orig_length);
3815	} else {
3816		ap->blkno = NULLFSBLOCK;
3817		ap->length = 0;
3818	}
3819	return 0;
3820}
3821
3822/* Trim extent to fit a logical block range. */
3823void
3824xfs_trim_extent(
3825	struct xfs_bmbt_irec	*irec,
3826	xfs_fileoff_t		bno,
3827	xfs_filblks_t		len)
3828{
3829	xfs_fileoff_t		distance;
3830	xfs_fileoff_t		end = bno + len;
3831
3832	if (irec->br_startoff + irec->br_blockcount <= bno ||
3833	    irec->br_startoff >= end) {
3834		irec->br_blockcount = 0;
3835		return;
3836	}
3837
3838	if (irec->br_startoff < bno) {
3839		distance = bno - irec->br_startoff;
3840		if (isnullstartblock(irec->br_startblock))
3841			irec->br_startblock = DELAYSTARTBLOCK;
3842		if (irec->br_startblock != DELAYSTARTBLOCK &&
3843		    irec->br_startblock != HOLESTARTBLOCK)
3844			irec->br_startblock += distance;
3845		irec->br_startoff += distance;
3846		irec->br_blockcount -= distance;
3847	}
3848
3849	if (end < irec->br_startoff + irec->br_blockcount) {
3850		distance = irec->br_startoff + irec->br_blockcount - end;
3851		irec->br_blockcount -= distance;
3852	}
3853}
3854
3855/*
3856 * Trim the returned map to the required bounds
3857 */
3858STATIC void
3859xfs_bmapi_trim_map(
3860	struct xfs_bmbt_irec	*mval,
3861	struct xfs_bmbt_irec	*got,
3862	xfs_fileoff_t		*bno,
3863	xfs_filblks_t		len,
3864	xfs_fileoff_t		obno,
3865	xfs_fileoff_t		end,
3866	int			n,
3867	uint32_t		flags)
3868{
3869	if ((flags & XFS_BMAPI_ENTIRE) ||
3870	    got->br_startoff + got->br_blockcount <= obno) {
3871		*mval = *got;
3872		if (isnullstartblock(got->br_startblock))
3873			mval->br_startblock = DELAYSTARTBLOCK;
3874		return;
3875	}
3876
3877	if (obno > *bno)
3878		*bno = obno;
3879	ASSERT((*bno >= obno) || (n == 0));
3880	ASSERT(*bno < end);
3881	mval->br_startoff = *bno;
3882	if (isnullstartblock(got->br_startblock))
3883		mval->br_startblock = DELAYSTARTBLOCK;
3884	else
3885		mval->br_startblock = got->br_startblock +
3886					(*bno - got->br_startoff);
3887	/*
3888	 * Return the minimum of what we got and what we asked for for
3889	 * the length.  We can use the len variable here because it is
3890	 * modified below and we could have been there before coming
3891	 * here if the first part of the allocation didn't overlap what
3892	 * was asked for.
3893	 */
3894	mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3895			got->br_blockcount - (*bno - got->br_startoff));
3896	mval->br_state = got->br_state;
3897	ASSERT(mval->br_blockcount <= len);
3898	return;
3899}
3900
3901/*
3902 * Update and validate the extent map to return
3903 */
3904STATIC void
3905xfs_bmapi_update_map(
3906	struct xfs_bmbt_irec	**map,
3907	xfs_fileoff_t		*bno,
3908	xfs_filblks_t		*len,
3909	xfs_fileoff_t		obno,
3910	xfs_fileoff_t		end,
3911	int			*n,
3912	uint32_t		flags)
3913{
3914	xfs_bmbt_irec_t	*mval = *map;
3915
3916	ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3917	       ((mval->br_startoff + mval->br_blockcount) <= end));
3918	ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3919	       (mval->br_startoff < obno));
3920
3921	*bno = mval->br_startoff + mval->br_blockcount;
3922	*len = end - *bno;
3923	if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3924		/* update previous map with new information */
3925		ASSERT(mval->br_startblock == mval[-1].br_startblock);
3926		ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3927		ASSERT(mval->br_state == mval[-1].br_state);
3928		mval[-1].br_blockcount = mval->br_blockcount;
3929		mval[-1].br_state = mval->br_state;
3930	} else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3931		   mval[-1].br_startblock != DELAYSTARTBLOCK &&
3932		   mval[-1].br_startblock != HOLESTARTBLOCK &&
3933		   mval->br_startblock == mval[-1].br_startblock +
3934					  mval[-1].br_blockcount &&
3935		   mval[-1].br_state == mval->br_state) {
3936		ASSERT(mval->br_startoff ==
3937		       mval[-1].br_startoff + mval[-1].br_blockcount);
3938		mval[-1].br_blockcount += mval->br_blockcount;
3939	} else if (*n > 0 &&
3940		   mval->br_startblock == DELAYSTARTBLOCK &&
3941		   mval[-1].br_startblock == DELAYSTARTBLOCK &&
3942		   mval->br_startoff ==
3943		   mval[-1].br_startoff + mval[-1].br_blockcount) {
3944		mval[-1].br_blockcount += mval->br_blockcount;
3945		mval[-1].br_state = mval->br_state;
3946	} else if (!((*n == 0) &&
3947		     ((mval->br_startoff + mval->br_blockcount) <=
3948		      obno))) {
3949		mval++;
3950		(*n)++;
3951	}
3952	*map = mval;
3953}
3954
3955/*
3956 * Map file blocks to filesystem blocks without allocation.
3957 */
3958int
3959xfs_bmapi_read(
3960	struct xfs_inode	*ip,
3961	xfs_fileoff_t		bno,
3962	xfs_filblks_t		len,
3963	struct xfs_bmbt_irec	*mval,
3964	int			*nmap,
3965	uint32_t		flags)
3966{
3967	struct xfs_mount	*mp = ip->i_mount;
3968	int			whichfork = xfs_bmapi_whichfork(flags);
3969	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
3970	struct xfs_bmbt_irec	got;
3971	xfs_fileoff_t		obno;
3972	xfs_fileoff_t		end;
3973	struct xfs_iext_cursor	icur;
3974	int			error;
3975	bool			eof = false;
3976	int			n = 0;
3977
3978	ASSERT(*nmap >= 1);
3979	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
3980	xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
3981
3982	if (WARN_ON_ONCE(!ifp)) {
3983		xfs_bmap_mark_sick(ip, whichfork);
3984		return -EFSCORRUPTED;
3985	}
3986
3987	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
3988	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
3989		xfs_bmap_mark_sick(ip, whichfork);
3990		return -EFSCORRUPTED;
3991	}
3992
3993	if (xfs_is_shutdown(mp))
3994		return -EIO;
3995
3996	XFS_STATS_INC(mp, xs_blk_mapr);
3997
3998	error = xfs_iread_extents(NULL, ip, whichfork);
3999	if (error)
4000		return error;
4001
4002	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
4003		eof = true;
4004	end = bno + len;
4005	obno = bno;
4006
4007	while (bno < end && n < *nmap) {
4008		/* Reading past eof, act as though there's a hole up to end. */
4009		if (eof)
4010			got.br_startoff = end;
4011		if (got.br_startoff > bno) {
4012			/* Reading in a hole.  */
4013			mval->br_startoff = bno;
4014			mval->br_startblock = HOLESTARTBLOCK;
4015			mval->br_blockcount =
4016				XFS_FILBLKS_MIN(len, got.br_startoff - bno);
4017			mval->br_state = XFS_EXT_NORM;
4018			bno += mval->br_blockcount;
4019			len -= mval->br_blockcount;
4020			mval++;
4021			n++;
4022			continue;
4023		}
4024
4025		/* set up the extent map to return. */
4026		xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4027		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4028
4029		/* If we're done, stop now. */
4030		if (bno >= end || n >= *nmap)
4031			break;
4032
4033		/* Else go on to the next record. */
4034		if (!xfs_iext_next_extent(ifp, &icur, &got))
4035			eof = true;
4036	}
4037	*nmap = n;
4038	return 0;
4039}
4040
4041/*
4042 * Add a delayed allocation extent to an inode. Blocks are reserved from the
4043 * global pool and the extent inserted into the inode in-core extent tree.
4044 *
4045 * On entry, got refers to the first extent beyond the offset of the extent to
4046 * allocate or eof is specified if no such extent exists. On return, got refers
4047 * to the extent record that was inserted to the inode fork.
4048 *
4049 * Note that the allocated extent may have been merged with contiguous extents
4050 * during insertion into the inode fork. Thus, got does not reflect the current
4051 * state of the inode fork on return. If necessary, the caller can use lastx to
4052 * look up the updated record in the inode fork.
4053 */
4054int
4055xfs_bmapi_reserve_delalloc(
4056	struct xfs_inode	*ip,
4057	int			whichfork,
4058	xfs_fileoff_t		off,
4059	xfs_filblks_t		len,
4060	xfs_filblks_t		prealloc,
4061	struct xfs_bmbt_irec	*got,
4062	struct xfs_iext_cursor	*icur,
4063	int			eof)
4064{
4065	struct xfs_mount	*mp = ip->i_mount;
4066	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
4067	xfs_extlen_t		alen;
4068	xfs_extlen_t		indlen;
4069	int			error;
4070	xfs_fileoff_t		aoff = off;
4071
4072	/*
4073	 * Cap the alloc length. Keep track of prealloc so we know whether to
4074	 * tag the inode before we return.
4075	 */
4076	alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
4077	if (!eof)
4078		alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
4079	if (prealloc && alen >= len)
4080		prealloc = alen - len;
4081
4082	/* Figure out the extent size, adjust alen */
4083	if (whichfork == XFS_COW_FORK) {
4084		struct xfs_bmbt_irec	prev;
4085		xfs_extlen_t		extsz = xfs_get_cowextsz_hint(ip);
4086
4087		if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
4088			prev.br_startoff = NULLFILEOFF;
4089
4090		error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
4091					       1, 0, &aoff, &alen);
4092		ASSERT(!error);
4093	}
4094
4095	/*
4096	 * Make a transaction-less quota reservation for delayed allocation
4097	 * blocks.  This number gets adjusted later.  We return if we haven't
4098	 * allocated blocks already inside this loop.
4099	 */
4100	error = xfs_quota_reserve_blkres(ip, alen);
4101	if (error)
4102		return error;
4103
4104	/*
4105	 * Split changing sb for alen and indlen since they could be coming
4106	 * from different places.
4107	 */
4108	indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4109	ASSERT(indlen > 0);
4110
4111	error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4112	if (error)
4113		goto out_unreserve_quota;
4114
4115	error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4116	if (error)
4117		goto out_unreserve_blocks;
4118
4119
4120	ip->i_delayed_blks += alen;
4121	xfs_mod_delalloc(ip->i_mount, alen + indlen);
4122
4123	got->br_startoff = aoff;
4124	got->br_startblock = nullstartblock(indlen);
4125	got->br_blockcount = alen;
4126	got->br_state = XFS_EXT_NORM;
4127
4128	xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
4129
4130	/*
4131	 * Tag the inode if blocks were preallocated. Note that COW fork
4132	 * preallocation can occur at the start or end of the extent, even when
4133	 * prealloc == 0, so we must also check the aligned offset and length.
4134	 */
4135	if (whichfork == XFS_DATA_FORK && prealloc)
4136		xfs_inode_set_eofblocks_tag(ip);
4137	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
4138		xfs_inode_set_cowblocks_tag(ip);
4139
4140	return 0;
4141
4142out_unreserve_blocks:
4143	xfs_mod_fdblocks(mp, alen, false);
4144out_unreserve_quota:
4145	if (XFS_IS_QUOTA_ON(mp))
4146		xfs_quota_unreserve_blkres(ip, alen);
4147	return error;
4148}
4149
4150static int
4151xfs_bmap_alloc_userdata(
4152	struct xfs_bmalloca	*bma)
4153{
4154	struct xfs_mount	*mp = bma->ip->i_mount;
4155	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4156	int			error;
4157
4158	/*
4159	 * Set the data type being allocated. For the data fork, the first data
4160	 * in the file is treated differently to all other allocations. For the
4161	 * attribute fork, we only need to ensure the allocated range is not on
4162	 * the busy list.
4163	 */
4164	bma->datatype = XFS_ALLOC_NOBUSY;
4165	if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
4166		bma->datatype |= XFS_ALLOC_USERDATA;
4167		if (bma->offset == 0)
4168			bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4169
4170		if (mp->m_dalign && bma->length >= mp->m_dalign) {
4171			error = xfs_bmap_isaeof(bma, whichfork);
4172			if (error)
4173				return error;
4174		}
4175
4176		if (XFS_IS_REALTIME_INODE(bma->ip))
4177			return xfs_bmap_rtalloc(bma);
4178	}
4179
4180	if (unlikely(XFS_TEST_ERROR(false, mp,
4181			XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
4182		return xfs_bmap_exact_minlen_extent_alloc(bma);
4183
4184	return xfs_bmap_btalloc(bma);
4185}
4186
4187static int
4188xfs_bmapi_allocate(
4189	struct xfs_bmalloca	*bma)
4190{
4191	struct xfs_mount	*mp = bma->ip->i_mount;
4192	int			whichfork = xfs_bmapi_whichfork(bma->flags);
4193	struct xfs_ifork	*ifp = xfs_ifork_ptr(bma->ip, whichfork);
4194	int			tmp_logflags = 0;
4195	int			error;
4196
4197	ASSERT(bma->length > 0);
4198
4199	/*
4200	 * For the wasdelay case, we could also just allocate the stuff asked
4201	 * for in this bmap call but that wouldn't be as good.
4202	 */
4203	if (bma->wasdel) {
4204		bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4205		bma->offset = bma->got.br_startoff;
4206		if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
4207			bma->prev.br_startoff = NULLFILEOFF;
4208	} else {
4209		bma->length = XFS_FILBLKS_MIN(bma->length, XFS_MAX_BMBT_EXTLEN);
4210		if (!bma->eof)
4211			bma->length = XFS_FILBLKS_MIN(bma->length,
4212					bma->got.br_startoff - bma->offset);
4213	}
4214
4215	if (bma->flags & XFS_BMAPI_CONTIG)
4216		bma->minlen = bma->length;
4217	else
4218		bma->minlen = 1;
4219
4220	if (bma->flags & XFS_BMAPI_METADATA) {
4221		if (unlikely(XFS_TEST_ERROR(false, mp,
4222				XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
4223			error = xfs_bmap_exact_minlen_extent_alloc(bma);
4224		else
4225			error = xfs_bmap_btalloc(bma);
4226	} else {
4227		error = xfs_bmap_alloc_userdata(bma);
4228	}
4229	if (error || bma->blkno == NULLFSBLOCK)
4230		return error;
4231
4232	if (bma->flags & XFS_BMAPI_ZERO) {
4233		error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
4234		if (error)
4235			return error;
4236	}
4237
4238	if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur)
4239		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4240	/*
4241	 * Bump the number of extents we've allocated
4242	 * in this call.
4243	 */
4244	bma->nallocs++;
4245
4246	if (bma->cur && bma->wasdel)
4247		bma->cur->bc_flags |= XFS_BTREE_BMBT_WASDEL;
4248
4249	bma->got.br_startoff = bma->offset;
4250	bma->got.br_startblock = bma->blkno;
4251	bma->got.br_blockcount = bma->length;
4252	bma->got.br_state = XFS_EXT_NORM;
4253
4254	if (bma->flags & XFS_BMAPI_PREALLOC)
4255		bma->got.br_state = XFS_EXT_UNWRITTEN;
4256
4257	if (bma->wasdel)
4258		error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4259	else
4260		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4261				whichfork, &bma->icur, &bma->cur, &bma->got,
4262				&bma->logflags, bma->flags);
4263
4264	bma->logflags |= tmp_logflags;
4265	if (error)
4266		return error;
4267
4268	/*
4269	 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4270	 * or xfs_bmap_add_extent_hole_real might have merged it into one of
4271	 * the neighbouring ones.
4272	 */
4273	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4274
4275	ASSERT(bma->got.br_startoff <= bma->offset);
4276	ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4277	       bma->offset + bma->length);
4278	ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4279	       bma->got.br_state == XFS_EXT_UNWRITTEN);
4280	return 0;
4281}
4282
4283STATIC int
4284xfs_bmapi_convert_unwritten(
4285	struct xfs_bmalloca	*bma,
4286	struct xfs_bmbt_irec	*mval,
4287	xfs_filblks_t		len,
4288	uint32_t		flags)
4289{
4290	int			whichfork = xfs_bmapi_whichfork(flags);
4291	struct xfs_ifork	*ifp = xfs_ifork_ptr(bma->ip, whichfork);
4292	int			tmp_logflags = 0;
4293	int			error;
4294
4295	/* check if we need to do unwritten->real conversion */
4296	if (mval->br_state == XFS_EXT_UNWRITTEN &&
4297	    (flags & XFS_BMAPI_PREALLOC))
4298		return 0;
4299
4300	/* check if we need to do real->unwritten conversion */
4301	if (mval->br_state == XFS_EXT_NORM &&
4302	    (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4303			(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4304		return 0;
4305
4306	/*
4307	 * Modify (by adding) the state flag, if writing.
4308	 */
4309	ASSERT(mval->br_blockcount <= len);
4310	if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur) {
4311		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4312					bma->ip, whichfork);
4313	}
4314	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4315				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4316
4317	/*
4318	 * Before insertion into the bmbt, zero the range being converted
4319	 * if required.
4320	 */
4321	if (flags & XFS_BMAPI_ZERO) {
4322		error = xfs_zero_extent(bma->ip, mval->br_startblock,
4323					mval->br_blockcount);
4324		if (error)
4325			return error;
4326	}
4327
4328	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4329			&bma->icur, &bma->cur, mval, &tmp_logflags);
4330	/*
4331	 * Log the inode core unconditionally in the unwritten extent conversion
4332	 * path because the conversion might not have done so (e.g., if the
4333	 * extent count hasn't changed). We need to make sure the inode is dirty
4334	 * in the transaction for the sake of fsync(), even if nothing has
4335	 * changed, because fsync() will not force the log for this transaction
4336	 * unless it sees the inode pinned.
4337	 *
4338	 * Note: If we're only converting cow fork extents, there aren't
4339	 * any on-disk updates to make, so we don't need to log anything.
4340	 */
4341	if (whichfork != XFS_COW_FORK)
4342		bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4343	if (error)
4344		return error;
4345
4346	/*
4347	 * Update our extent pointer, given that
4348	 * xfs_bmap_add_extent_unwritten_real might have merged it into one
4349	 * of the neighbouring ones.
4350	 */
4351	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4352
4353	/*
4354	 * We may have combined previously unwritten space with written space,
4355	 * so generate another request.
4356	 */
4357	if (mval->br_blockcount < len)
4358		return -EAGAIN;
4359	return 0;
4360}
4361
4362xfs_extlen_t
4363xfs_bmapi_minleft(
4364	struct xfs_trans	*tp,
4365	struct xfs_inode	*ip,
4366	int			fork)
4367{
4368	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, fork);
4369
4370	if (tp && tp->t_highest_agno != NULLAGNUMBER)
4371		return 0;
4372	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
4373		return 1;
4374	return be16_to_cpu(ifp->if_broot->bb_level) + 1;
4375}
4376
4377/*
4378 * Log whatever the flags say, even if error.  Otherwise we might miss detecting
4379 * a case where the data is changed, there's an error, and it's not logged so we
4380 * don't shutdown when we should.  Don't bother logging extents/btree changes if
4381 * we converted to the other format.
4382 */
4383static void
4384xfs_bmapi_finish(
4385	struct xfs_bmalloca	*bma,
4386	int			whichfork,
4387	int			error)
4388{
4389	struct xfs_ifork	*ifp = xfs_ifork_ptr(bma->ip, whichfork);
4390
4391	if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
4392	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
4393		bma->logflags &= ~xfs_ilog_fext(whichfork);
4394	else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
4395		 ifp->if_format != XFS_DINODE_FMT_BTREE)
4396		bma->logflags &= ~xfs_ilog_fbroot(whichfork);
4397
4398	if (bma->logflags)
4399		xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
4400	if (bma->cur)
4401		xfs_btree_del_cursor(bma->cur, error);
4402}
4403
4404/*
4405 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4406 * extent state if necessary.  Details behaviour is controlled by the flags
4407 * parameter.  Only allocates blocks from a single allocation group, to avoid
4408 * locking problems.
4409 */
4410int
4411xfs_bmapi_write(
4412	struct xfs_trans	*tp,		/* transaction pointer */
4413	struct xfs_inode	*ip,		/* incore inode */
4414	xfs_fileoff_t		bno,		/* starting file offs. mapped */
4415	xfs_filblks_t		len,		/* length to map in file */
4416	uint32_t		flags,		/* XFS_BMAPI_... */
4417	xfs_extlen_t		total,		/* total blocks needed */
4418	struct xfs_bmbt_irec	*mval,		/* output: map values */
4419	int			*nmap)		/* i/o: mval size/count */
4420{
4421	struct xfs_bmalloca	bma = {
4422		.tp		= tp,
4423		.ip		= ip,
4424		.total		= total,
4425	};
4426	struct xfs_mount	*mp = ip->i_mount;
4427	int			whichfork = xfs_bmapi_whichfork(flags);
4428	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
4429	xfs_fileoff_t		end;		/* end of mapped file region */
4430	bool			eof = false;	/* after the end of extents */
4431	int			error;		/* error return */
4432	int			n;		/* current extent index */
4433	xfs_fileoff_t		obno;		/* old block number (offset) */
4434
4435#ifdef DEBUG
4436	xfs_fileoff_t		orig_bno;	/* original block number value */
4437	int			orig_flags;	/* original flags arg value */
4438	xfs_filblks_t		orig_len;	/* original value of len arg */
4439	struct xfs_bmbt_irec	*orig_mval;	/* original value of mval */
4440	int			orig_nmap;	/* original value of *nmap */
4441
4442	orig_bno = bno;
4443	orig_len = len;
4444	orig_flags = flags;
4445	orig_mval = mval;
4446	orig_nmap = *nmap;
4447#endif
4448
4449	ASSERT(*nmap >= 1);
4450	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4451	ASSERT(tp != NULL);
4452	ASSERT(len > 0);
4453	ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
4454	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
4455	ASSERT(!(flags & XFS_BMAPI_REMAP));
4456
4457	/* zeroing is for currently only for data extents, not metadata */
4458	ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4459			(XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4460	/*
4461	 * we can allocate unwritten extents or pre-zero allocated blocks,
4462	 * but it makes no sense to do both at once. This would result in
4463	 * zeroing the unwritten extent twice, but it still being an
4464	 * unwritten extent....
4465	 */
4466	ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4467			(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4468
4469	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4470	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4471		xfs_bmap_mark_sick(ip, whichfork);
4472		return -EFSCORRUPTED;
4473	}
4474
4475	if (xfs_is_shutdown(mp))
4476		return -EIO;
4477
4478	XFS_STATS_INC(mp, xs_blk_mapw);
4479
4480	error = xfs_iread_extents(tp, ip, whichfork);
4481	if (error)
4482		goto error0;
4483
4484	if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4485		eof = true;
4486	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4487		bma.prev.br_startoff = NULLFILEOFF;
4488	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4489
4490	n = 0;
4491	end = bno + len;
4492	obno = bno;
4493	while (bno < end && n < *nmap) {
4494		bool			need_alloc = false, wasdelay = false;
4495
4496		/* in hole or beyond EOF? */
4497		if (eof || bma.got.br_startoff > bno) {
4498			/*
4499			 * CoW fork conversions should /never/ hit EOF or
4500			 * holes.  There should always be something for us
4501			 * to work on.
4502			 */
4503			ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4504			         (flags & XFS_BMAPI_COWFORK)));
4505
4506			need_alloc = true;
4507		} else if (isnullstartblock(bma.got.br_startblock)) {
4508			wasdelay = true;
4509		}
4510
4511		/*
4512		 * First, deal with the hole before the allocated space
4513		 * that we found, if any.
4514		 */
4515		if (need_alloc || wasdelay) {
4516			bma.eof = eof;
4517			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4518			bma.wasdel = wasdelay;
4519			bma.offset = bno;
4520			bma.flags = flags;
4521
4522			/*
4523			 * There's a 32/64 bit type mismatch between the
4524			 * allocation length request (which can be 64 bits in
4525			 * length) and the bma length request, which is
4526			 * xfs_extlen_t and therefore 32 bits. Hence we have to
4527			 * check for 32-bit overflows and handle them here.
4528			 */
4529			if (len > (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN)
4530				bma.length = XFS_MAX_BMBT_EXTLEN;
4531			else
4532				bma.length = len;
4533
4534			ASSERT(len > 0);
4535			ASSERT(bma.length > 0);
4536			error = xfs_bmapi_allocate(&bma);
4537			if (error)
4538				goto error0;
4539			if (bma.blkno == NULLFSBLOCK)
4540				break;
4541
4542			/*
4543			 * If this is a CoW allocation, record the data in
4544			 * the refcount btree for orphan recovery.
4545			 */
4546			if (whichfork == XFS_COW_FORK)
4547				xfs_refcount_alloc_cow_extent(tp, bma.blkno,
4548						bma.length);
4549		}
4550
4551		/* Deal with the allocated space we found.  */
4552		xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4553							end, n, flags);
4554
4555		/* Execute unwritten extent conversion if necessary */
4556		error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4557		if (error == -EAGAIN)
4558			continue;
4559		if (error)
4560			goto error0;
4561
4562		/* update the extent map to return */
4563		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4564
4565		/*
4566		 * If we're done, stop now.  Stop when we've allocated
4567		 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4568		 * the transaction may get too big.
4569		 */
4570		if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4571			break;
4572
4573		/* Else go on to the next record. */
4574		bma.prev = bma.got;
4575		if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4576			eof = true;
4577	}
4578	*nmap = n;
4579
4580	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4581			whichfork);
4582	if (error)
4583		goto error0;
4584
4585	ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
4586	       ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
4587	xfs_bmapi_finish(&bma, whichfork, 0);
4588	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4589		orig_nmap, *nmap);
4590	return 0;
4591error0:
4592	xfs_bmapi_finish(&bma, whichfork, error);
4593	return error;
4594}
4595
4596/*
4597 * Convert an existing delalloc extent to real blocks based on file offset. This
4598 * attempts to allocate the entire delalloc extent and may require multiple
4599 * invocations to allocate the target offset if a large enough physical extent
4600 * is not available.
4601 */
4602int
4603xfs_bmapi_convert_delalloc(
4604	struct xfs_inode	*ip,
4605	int			whichfork,
4606	xfs_off_t		offset,
4607	struct iomap		*iomap,
4608	unsigned int		*seq)
4609{
4610	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
4611	struct xfs_mount	*mp = ip->i_mount;
4612	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
4613	struct xfs_bmalloca	bma = { NULL };
4614	uint16_t		flags = 0;
4615	struct xfs_trans	*tp;
4616	int			error;
4617
4618	if (whichfork == XFS_COW_FORK)
4619		flags |= IOMAP_F_SHARED;
4620
4621	/*
4622	 * Space for the extent and indirect blocks was reserved when the
4623	 * delalloc extent was created so there's no need to do so here.
4624	 */
4625	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
4626				XFS_TRANS_RESERVE, &tp);
4627	if (error)
4628		return error;
4629
4630	xfs_ilock(ip, XFS_ILOCK_EXCL);
4631	xfs_trans_ijoin(tp, ip, 0);
4632
4633	error = xfs_iext_count_may_overflow(ip, whichfork,
4634			XFS_IEXT_ADD_NOSPLIT_CNT);
4635	if (error == -EFBIG)
4636		error = xfs_iext_count_upgrade(tp, ip,
4637				XFS_IEXT_ADD_NOSPLIT_CNT);
4638	if (error)
4639		goto out_trans_cancel;
4640
4641	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
4642	    bma.got.br_startoff > offset_fsb) {
4643		/*
4644		 * No extent found in the range we are trying to convert.  This
4645		 * should only happen for the COW fork, where another thread
4646		 * might have moved the extent to the data fork in the meantime.
4647		 */
4648		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
4649		error = -EAGAIN;
4650		goto out_trans_cancel;
4651	}
4652
4653	/*
4654	 * If we find a real extent here we raced with another thread converting
4655	 * the extent.  Just return the real extent at this offset.
4656	 */
4657	if (!isnullstartblock(bma.got.br_startblock)) {
4658		xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
4659				xfs_iomap_inode_sequence(ip, flags));
4660		*seq = READ_ONCE(ifp->if_seq);
4661		goto out_trans_cancel;
4662	}
4663
4664	bma.tp = tp;
4665	bma.ip = ip;
4666	bma.wasdel = true;
4667	bma.offset = bma.got.br_startoff;
4668	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount,
4669			XFS_MAX_BMBT_EXTLEN);
4670	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4671
4672	/*
4673	 * When we're converting the delalloc reservations backing dirty pages
4674	 * in the page cache, we must be careful about how we create the new
4675	 * extents:
4676	 *
4677	 * New CoW fork extents are created unwritten, turned into real extents
4678	 * when we're about to write the data to disk, and mapped into the data
4679	 * fork after the write finishes.  End of story.
4680	 *
4681	 * New data fork extents must be mapped in as unwritten and converted
4682	 * to real extents after the write succeeds to avoid exposing stale
4683	 * disk contents if we crash.
4684	 */
4685	bma.flags = XFS_BMAPI_PREALLOC;
4686	if (whichfork == XFS_COW_FORK)
4687		bma.flags |= XFS_BMAPI_COWFORK;
4688
4689	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4690		bma.prev.br_startoff = NULLFILEOFF;
4691
4692	error = xfs_bmapi_allocate(&bma);
4693	if (error)
4694		goto out_finish;
4695
4696	error = -ENOSPC;
4697	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
4698		goto out_finish;
4699	if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock))) {
4700		xfs_bmap_mark_sick(ip, whichfork);
4701		error = -EFSCORRUPTED;
4702		goto out_finish;
4703	}
4704
4705	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
4706	XFS_STATS_INC(mp, xs_xstrat_quick);
4707
4708	ASSERT(!isnullstartblock(bma.got.br_startblock));
4709	xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
4710				xfs_iomap_inode_sequence(ip, flags));
4711	*seq = READ_ONCE(ifp->if_seq);
4712
4713	if (whichfork == XFS_COW_FORK)
4714		xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
4715
4716	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4717			whichfork);
4718	if (error)
4719		goto out_finish;
4720
4721	xfs_bmapi_finish(&bma, whichfork, 0);
4722	error = xfs_trans_commit(tp);
4723	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4724	return error;
4725
4726out_finish:
4727	xfs_bmapi_finish(&bma, whichfork, error);
4728out_trans_cancel:
4729	xfs_trans_cancel(tp);
4730	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4731	return error;
4732}
4733
4734int
4735xfs_bmapi_remap(
4736	struct xfs_trans	*tp,
4737	struct xfs_inode	*ip,
4738	xfs_fileoff_t		bno,
4739	xfs_filblks_t		len,
4740	xfs_fsblock_t		startblock,
4741	uint32_t		flags)
4742{
4743	struct xfs_mount	*mp = ip->i_mount;
4744	struct xfs_ifork	*ifp;
4745	struct xfs_btree_cur	*cur = NULL;
4746	struct xfs_bmbt_irec	got;
4747	struct xfs_iext_cursor	icur;
4748	int			whichfork = xfs_bmapi_whichfork(flags);
4749	int			logflags = 0, error;
4750
4751	ifp = xfs_ifork_ptr(ip, whichfork);
4752	ASSERT(len > 0);
4753	ASSERT(len <= (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN);
4754	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
4755	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
4756			   XFS_BMAPI_NORMAP)));
4757	ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
4758			(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
4759
4760	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4761	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4762		xfs_bmap_mark_sick(ip, whichfork);
4763		return -EFSCORRUPTED;
4764	}
4765
4766	if (xfs_is_shutdown(mp))
4767		return -EIO;
4768
4769	error = xfs_iread_extents(tp, ip, whichfork);
4770	if (error)
4771		return error;
4772
4773	if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4774		/* make sure we only reflink into a hole. */
4775		ASSERT(got.br_startoff > bno);
4776		ASSERT(got.br_startoff - bno >= len);
4777	}
4778
4779	ip->i_nblocks += len;
4780	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4781
4782	if (ifp->if_format == XFS_DINODE_FMT_BTREE)
4783		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
4784
4785	got.br_startoff = bno;
4786	got.br_startblock = startblock;
4787	got.br_blockcount = len;
4788	if (flags & XFS_BMAPI_PREALLOC)
4789		got.br_state = XFS_EXT_UNWRITTEN;
4790	else
4791		got.br_state = XFS_EXT_NORM;
4792
4793	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
4794			&cur, &got, &logflags, flags);
4795	if (error)
4796		goto error0;
4797
4798	error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
4799
4800error0:
4801	if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
4802		logflags &= ~XFS_ILOG_DEXT;
4803	else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
4804		logflags &= ~XFS_ILOG_DBROOT;
4805
4806	if (logflags)
4807		xfs_trans_log_inode(tp, ip, logflags);
4808	if (cur)
4809		xfs_btree_del_cursor(cur, error);
4810	return error;
4811}
4812
4813/*
4814 * When a delalloc extent is split (e.g., due to a hole punch), the original
4815 * indlen reservation must be shared across the two new extents that are left
4816 * behind.
4817 *
4818 * Given the original reservation and the worst case indlen for the two new
4819 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4820 * reservation fairly across the two new extents. If necessary, steal available
4821 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4822 * ores == 1). The number of stolen blocks is returned. The availability and
4823 * subsequent accounting of stolen blocks is the responsibility of the caller.
4824 */
4825static xfs_filblks_t
4826xfs_bmap_split_indlen(
4827	xfs_filblks_t			ores,		/* original res. */
4828	xfs_filblks_t			*indlen1,	/* ext1 worst indlen */
4829	xfs_filblks_t			*indlen2,	/* ext2 worst indlen */
4830	xfs_filblks_t			avail)		/* stealable blocks */
4831{
4832	xfs_filblks_t			len1 = *indlen1;
4833	xfs_filblks_t			len2 = *indlen2;
4834	xfs_filblks_t			nres = len1 + len2; /* new total res. */
4835	xfs_filblks_t			stolen = 0;
4836	xfs_filblks_t			resfactor;
4837
4838	/*
4839	 * Steal as many blocks as we can to try and satisfy the worst case
4840	 * indlen for both new extents.
4841	 */
4842	if (ores < nres && avail)
4843		stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4844	ores += stolen;
4845
4846	 /* nothing else to do if we've satisfied the new reservation */
4847	if (ores >= nres)
4848		return stolen;
4849
4850	/*
4851	 * We can't meet the total required reservation for the two extents.
4852	 * Calculate the percent of the overall shortage between both extents
4853	 * and apply this percentage to each of the requested indlen values.
4854	 * This distributes the shortage fairly and reduces the chances that one
4855	 * of the two extents is left with nothing when extents are repeatedly
4856	 * split.
4857	 */
4858	resfactor = (ores * 100);
4859	do_div(resfactor, nres);
4860	len1 *= resfactor;
4861	do_div(len1, 100);
4862	len2 *= resfactor;
4863	do_div(len2, 100);
4864	ASSERT(len1 + len2 <= ores);
4865	ASSERT(len1 < *indlen1 && len2 < *indlen2);
4866
4867	/*
4868	 * Hand out the remainder to each extent. If one of the two reservations
4869	 * is zero, we want to make sure that one gets a block first. The loop
4870	 * below starts with len1, so hand len2 a block right off the bat if it
4871	 * is zero.
4872	 */
4873	ores -= (len1 + len2);
4874	ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4875	if (ores && !len2 && *indlen2) {
4876		len2++;
4877		ores--;
4878	}
4879	while (ores) {
4880		if (len1 < *indlen1) {
4881			len1++;
4882			ores--;
4883		}
4884		if (!ores)
4885			break;
4886		if (len2 < *indlen2) {
4887			len2++;
4888			ores--;
4889		}
4890	}
4891
4892	*indlen1 = len1;
4893	*indlen2 = len2;
4894
4895	return stolen;
4896}
4897
4898int
4899xfs_bmap_del_extent_delay(
4900	struct xfs_inode	*ip,
4901	int			whichfork,
4902	struct xfs_iext_cursor	*icur,
4903	struct xfs_bmbt_irec	*got,
4904	struct xfs_bmbt_irec	*del)
4905{
4906	struct xfs_mount	*mp = ip->i_mount;
4907	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
4908	struct xfs_bmbt_irec	new;
4909	int64_t			da_old, da_new, da_diff = 0;
4910	xfs_fileoff_t		del_endoff, got_endoff;
4911	xfs_filblks_t		got_indlen, new_indlen, stolen;
4912	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
4913	int			error = 0;
4914	bool			isrt;
4915
4916	XFS_STATS_INC(mp, xs_del_exlist);
4917
4918	isrt = xfs_ifork_is_realtime(ip, whichfork);
4919	del_endoff = del->br_startoff + del->br_blockcount;
4920	got_endoff = got->br_startoff + got->br_blockcount;
4921	da_old = startblockval(got->br_startblock);
4922	da_new = 0;
4923
4924	ASSERT(del->br_blockcount > 0);
4925	ASSERT(got->br_startoff <= del->br_startoff);
4926	ASSERT(got_endoff >= del_endoff);
4927
4928	if (isrt)
4929		xfs_mod_frextents(mp, xfs_rtb_to_rtx(mp, del->br_blockcount));
4930
4931	/*
4932	 * Update the inode delalloc counter now and wait to update the
4933	 * sb counters as we might have to borrow some blocks for the
4934	 * indirect block accounting.
4935	 */
4936	ASSERT(!isrt);
4937	error = xfs_quota_unreserve_blkres(ip, del->br_blockcount);
4938	if (error)
4939		return error;
4940	ip->i_delayed_blks -= del->br_blockcount;
4941
4942	if (got->br_startoff == del->br_startoff)
4943		state |= BMAP_LEFT_FILLING;
4944	if (got_endoff == del_endoff)
4945		state |= BMAP_RIGHT_FILLING;
4946
4947	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4948	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4949		/*
4950		 * Matches the whole extent.  Delete the entry.
4951		 */
4952		xfs_iext_remove(ip, icur, state);
4953		xfs_iext_prev(ifp, icur);
4954		break;
4955	case BMAP_LEFT_FILLING:
4956		/*
4957		 * Deleting the first part of the extent.
4958		 */
4959		got->br_startoff = del_endoff;
4960		got->br_blockcount -= del->br_blockcount;
4961		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4962				got->br_blockcount), da_old);
4963		got->br_startblock = nullstartblock((int)da_new);
4964		xfs_iext_update_extent(ip, state, icur, got);
4965		break;
4966	case BMAP_RIGHT_FILLING:
4967		/*
4968		 * Deleting the last part of the extent.
4969		 */
4970		got->br_blockcount = got->br_blockcount - del->br_blockcount;
4971		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4972				got->br_blockcount), da_old);
4973		got->br_startblock = nullstartblock((int)da_new);
4974		xfs_iext_update_extent(ip, state, icur, got);
4975		break;
4976	case 0:
4977		/*
4978		 * Deleting the middle of the extent.
4979		 *
4980		 * Distribute the original indlen reservation across the two new
4981		 * extents.  Steal blocks from the deleted extent if necessary.
4982		 * Stealing blocks simply fudges the fdblocks accounting below.
4983		 * Warn if either of the new indlen reservations is zero as this
4984		 * can lead to delalloc problems.
4985		 */
4986		got->br_blockcount = del->br_startoff - got->br_startoff;
4987		got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4988
4989		new.br_blockcount = got_endoff - del_endoff;
4990		new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4991
4992		WARN_ON_ONCE(!got_indlen || !new_indlen);
4993		stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4994						       del->br_blockcount);
4995
4996		got->br_startblock = nullstartblock((int)got_indlen);
4997
4998		new.br_startoff = del_endoff;
4999		new.br_state = got->br_state;
5000		new.br_startblock = nullstartblock((int)new_indlen);
5001
5002		xfs_iext_update_extent(ip, state, icur, got);
5003		xfs_iext_next(ifp, icur);
5004		xfs_iext_insert(ip, icur, &new, state);
5005
5006		da_new = got_indlen + new_indlen - stolen;
5007		del->br_blockcount -= stolen;
5008		break;
5009	}
5010
5011	ASSERT(da_old >= da_new);
5012	da_diff = da_old - da_new;
5013	if (!isrt)
5014		da_diff += del->br_blockcount;
5015	if (da_diff) {
5016		xfs_mod_fdblocks(mp, da_diff, false);
5017		xfs_mod_delalloc(mp, -da_diff);
5018	}
5019	return error;
5020}
5021
5022void
5023xfs_bmap_del_extent_cow(
5024	struct xfs_inode	*ip,
5025	struct xfs_iext_cursor	*icur,
5026	struct xfs_bmbt_irec	*got,
5027	struct xfs_bmbt_irec	*del)
5028{
5029	struct xfs_mount	*mp = ip->i_mount;
5030	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
5031	struct xfs_bmbt_irec	new;
5032	xfs_fileoff_t		del_endoff, got_endoff;
5033	uint32_t		state = BMAP_COWFORK;
5034
5035	XFS_STATS_INC(mp, xs_del_exlist);
5036
5037	del_endoff = del->br_startoff + del->br_blockcount;
5038	got_endoff = got->br_startoff + got->br_blockcount;
5039
5040	ASSERT(del->br_blockcount > 0);
5041	ASSERT(got->br_startoff <= del->br_startoff);
5042	ASSERT(got_endoff >= del_endoff);
5043	ASSERT(!isnullstartblock(got->br_startblock));
5044
5045	if (got->br_startoff == del->br_startoff)
5046		state |= BMAP_LEFT_FILLING;
5047	if (got_endoff == del_endoff)
5048		state |= BMAP_RIGHT_FILLING;
5049
5050	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5051	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5052		/*
5053		 * Matches the whole extent.  Delete the entry.
5054		 */
5055		xfs_iext_remove(ip, icur, state);
5056		xfs_iext_prev(ifp, icur);
5057		break;
5058	case BMAP_LEFT_FILLING:
5059		/*
5060		 * Deleting the first part of the extent.
5061		 */
5062		got->br_startoff = del_endoff;
5063		got->br_blockcount -= del->br_blockcount;
5064		got->br_startblock = del->br_startblock + del->br_blockcount;
5065		xfs_iext_update_extent(ip, state, icur, got);
5066		break;
5067	case BMAP_RIGHT_FILLING:
5068		/*
5069		 * Deleting the last part of the extent.
5070		 */
5071		got->br_blockcount -= del->br_blockcount;
5072		xfs_iext_update_extent(ip, state, icur, got);
5073		break;
5074	case 0:
5075		/*
5076		 * Deleting the middle of the extent.
5077		 */
5078		got->br_blockcount = del->br_startoff - got->br_startoff;
5079
5080		new.br_startoff = del_endoff;
5081		new.br_blockcount = got_endoff - del_endoff;
5082		new.br_state = got->br_state;
5083		new.br_startblock = del->br_startblock + del->br_blockcount;
5084
5085		xfs_iext_update_extent(ip, state, icur, got);
5086		xfs_iext_next(ifp, icur);
5087		xfs_iext_insert(ip, icur, &new, state);
5088		break;
5089	}
5090	ip->i_delayed_blks -= del->br_blockcount;
5091}
5092
5093/*
5094 * Called by xfs_bmapi to update file extent records and the btree
5095 * after removing space.
5096 */
5097STATIC int				/* error */
5098xfs_bmap_del_extent_real(
5099	xfs_inode_t		*ip,	/* incore inode pointer */
5100	xfs_trans_t		*tp,	/* current transaction pointer */
5101	struct xfs_iext_cursor	*icur,
5102	struct xfs_btree_cur	*cur,	/* if null, not a btree */
5103	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
5104	int			*logflagsp, /* inode logging flags */
5105	int			whichfork, /* data or attr fork */
5106	uint32_t		bflags)	/* bmapi flags */
5107{
5108	xfs_fsblock_t		del_endblock=0;	/* first block past del */
5109	xfs_fileoff_t		del_endoff;	/* first offset past del */
5110	int			do_fx;	/* free extent at end of routine */
5111	int			error;	/* error return value */
5112	struct xfs_bmbt_irec	got;	/* current extent entry */
5113	xfs_fileoff_t		got_endoff;	/* first offset past got */
5114	int			i;	/* temp state */
5115	struct xfs_ifork	*ifp;	/* inode fork pointer */
5116	xfs_mount_t		*mp;	/* mount structure */
5117	xfs_filblks_t		nblks;	/* quota/sb block count */
5118	xfs_bmbt_irec_t		new;	/* new record to be inserted */
5119	/* REFERENCED */
5120	uint			qfield;	/* quota field to update */
5121	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
5122	struct xfs_bmbt_irec	old;
5123
5124	*logflagsp = 0;
5125
5126	mp = ip->i_mount;
5127	XFS_STATS_INC(mp, xs_del_exlist);
5128
5129	ifp = xfs_ifork_ptr(ip, whichfork);
5130	ASSERT(del->br_blockcount > 0);
5131	xfs_iext_get_extent(ifp, icur, &got);
5132	ASSERT(got.br_startoff <= del->br_startoff);
5133	del_endoff = del->br_startoff + del->br_blockcount;
5134	got_endoff = got.br_startoff + got.br_blockcount;
5135	ASSERT(got_endoff >= del_endoff);
5136	ASSERT(!isnullstartblock(got.br_startblock));
5137	qfield = 0;
5138
5139	/*
5140	 * If it's the case where the directory code is running with no block
5141	 * reservation, and the deleted block is in the middle of its extent,
5142	 * and the resulting insert of an extent would cause transformation to
5143	 * btree format, then reject it.  The calling code will then swap blocks
5144	 * around instead.  We have to do this now, rather than waiting for the
5145	 * conversion to btree format, since the transaction will be dirty then.
5146	 */
5147	if (tp->t_blk_res == 0 &&
5148	    ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
5149	    ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
5150	    del->br_startoff > got.br_startoff && del_endoff < got_endoff)
5151		return -ENOSPC;
5152
5153	*logflagsp = XFS_ILOG_CORE;
5154	if (xfs_ifork_is_realtime(ip, whichfork)) {
5155		if (!(bflags & XFS_BMAPI_REMAP)) {
5156			error = xfs_rtfree_blocks(tp, del->br_startblock,
5157					del->br_blockcount);
5158			if (error)
5159				return error;
5160		}
5161
5162		do_fx = 0;
5163		qfield = XFS_TRANS_DQ_RTBCOUNT;
5164	} else {
5165		do_fx = 1;
5166		qfield = XFS_TRANS_DQ_BCOUNT;
5167	}
5168	nblks = del->br_blockcount;
5169
5170	del_endblock = del->br_startblock + del->br_blockcount;
5171	if (cur) {
5172		error = xfs_bmbt_lookup_eq(cur, &got, &i);
5173		if (error)
5174			return error;
5175		if (XFS_IS_CORRUPT(mp, i != 1)) {
5176			xfs_btree_mark_sick(cur);
5177			return -EFSCORRUPTED;
5178		}
5179	}
5180
5181	if (got.br_startoff == del->br_startoff)
5182		state |= BMAP_LEFT_FILLING;
5183	if (got_endoff == del_endoff)
5184		state |= BMAP_RIGHT_FILLING;
5185
5186	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5187	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5188		/*
5189		 * Matches the whole extent.  Delete the entry.
5190		 */
5191		xfs_iext_remove(ip, icur, state);
5192		xfs_iext_prev(ifp, icur);
5193		ifp->if_nextents--;
5194
5195		*logflagsp |= XFS_ILOG_CORE;
5196		if (!cur) {
5197			*logflagsp |= xfs_ilog_fext(whichfork);
5198			break;
5199		}
5200		if ((error = xfs_btree_delete(cur, &i)))
5201			return error;
5202		if (XFS_IS_CORRUPT(mp, i != 1)) {
5203			xfs_btree_mark_sick(cur);
5204			return -EFSCORRUPTED;
5205		}
5206		break;
5207	case BMAP_LEFT_FILLING:
5208		/*
5209		 * Deleting the first part of the extent.
5210		 */
5211		got.br_startoff = del_endoff;
5212		got.br_startblock = del_endblock;
5213		got.br_blockcount -= del->br_blockcount;
5214		xfs_iext_update_extent(ip, state, icur, &got);
5215		if (!cur) {
5216			*logflagsp |= xfs_ilog_fext(whichfork);
5217			break;
5218		}
5219		error = xfs_bmbt_update(cur, &got);
5220		if (error)
5221			return error;
5222		break;
5223	case BMAP_RIGHT_FILLING:
5224		/*
5225		 * Deleting the last part of the extent.
5226		 */
5227		got.br_blockcount -= del->br_blockcount;
5228		xfs_iext_update_extent(ip, state, icur, &got);
5229		if (!cur) {
5230			*logflagsp |= xfs_ilog_fext(whichfork);
5231			break;
5232		}
5233		error = xfs_bmbt_update(cur, &got);
5234		if (error)
5235			return error;
5236		break;
5237	case 0:
5238		/*
5239		 * Deleting the middle of the extent.
5240		 */
5241
5242		old = got;
5243
5244		got.br_blockcount = del->br_startoff - got.br_startoff;
5245		xfs_iext_update_extent(ip, state, icur, &got);
5246
5247		new.br_startoff = del_endoff;
5248		new.br_blockcount = got_endoff - del_endoff;
5249		new.br_state = got.br_state;
5250		new.br_startblock = del_endblock;
5251
5252		*logflagsp |= XFS_ILOG_CORE;
5253		if (cur) {
5254			error = xfs_bmbt_update(cur, &got);
5255			if (error)
5256				return error;
5257			error = xfs_btree_increment(cur, 0, &i);
5258			if (error)
5259				return error;
5260			cur->bc_rec.b = new;
5261			error = xfs_btree_insert(cur, &i);
5262			if (error && error != -ENOSPC)
5263				return error;
5264			/*
5265			 * If get no-space back from btree insert, it tried a
5266			 * split, and we have a zero block reservation.  Fix up
5267			 * our state and return the error.
5268			 */
5269			if (error == -ENOSPC) {
5270				/*
5271				 * Reset the cursor, don't trust it after any
5272				 * insert operation.
5273				 */
5274				error = xfs_bmbt_lookup_eq(cur, &got, &i);
5275				if (error)
5276					return error;
5277				if (XFS_IS_CORRUPT(mp, i != 1)) {
5278					xfs_btree_mark_sick(cur);
5279					return -EFSCORRUPTED;
5280				}
5281				/*
5282				 * Update the btree record back
5283				 * to the original value.
5284				 */
5285				error = xfs_bmbt_update(cur, &old);
5286				if (error)
5287					return error;
5288				/*
5289				 * Reset the extent record back
5290				 * to the original value.
5291				 */
5292				xfs_iext_update_extent(ip, state, icur, &old);
5293				*logflagsp = 0;
5294				return -ENOSPC;
5295			}
5296			if (XFS_IS_CORRUPT(mp, i != 1)) {
5297				xfs_btree_mark_sick(cur);
5298				return -EFSCORRUPTED;
5299			}
5300		} else
5301			*logflagsp |= xfs_ilog_fext(whichfork);
5302
5303		ifp->if_nextents++;
5304		xfs_iext_next(ifp, icur);
5305		xfs_iext_insert(ip, icur, &new, state);
5306		break;
5307	}
5308
5309	/* remove reverse mapping */
5310	xfs_rmap_unmap_extent(tp, ip, whichfork, del);
5311
5312	/*
5313	 * If we need to, add to list of extents to delete.
5314	 */
5315	if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5316		if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5317			xfs_refcount_decrease_extent(tp, del);
5318		} else {
5319			error = xfs_free_extent_later(tp, del->br_startblock,
5320					del->br_blockcount, NULL,
5321					XFS_AG_RESV_NONE,
5322					((bflags & XFS_BMAPI_NODISCARD) ||
5323					del->br_state == XFS_EXT_UNWRITTEN));
5324			if (error)
5325				return error;
5326		}
5327	}
5328
5329	/*
5330	 * Adjust inode # blocks in the file.
5331	 */
5332	if (nblks)
5333		ip->i_nblocks -= nblks;
5334	/*
5335	 * Adjust quota data.
5336	 */
5337	if (qfield && !(bflags & XFS_BMAPI_REMAP))
5338		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5339
5340	return 0;
5341}
5342
5343/*
5344 * Unmap (remove) blocks from a file.
5345 * If nexts is nonzero then the number of extents to remove is limited to
5346 * that value.  If not all extents in the block range can be removed then
5347 * *done is set.
5348 */
5349static int
5350__xfs_bunmapi(
5351	struct xfs_trans	*tp,		/* transaction pointer */
5352	struct xfs_inode	*ip,		/* incore inode */
5353	xfs_fileoff_t		start,		/* first file offset deleted */
5354	xfs_filblks_t		*rlen,		/* i/o: amount remaining */
5355	uint32_t		flags,		/* misc flags */
5356	xfs_extnum_t		nexts)		/* number of extents max */
5357{
5358	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
5359	struct xfs_bmbt_irec	del;		/* extent being deleted */
5360	int			error;		/* error return value */
5361	xfs_extnum_t		extno;		/* extent number in list */
5362	struct xfs_bmbt_irec	got;		/* current extent record */
5363	struct xfs_ifork	*ifp;		/* inode fork pointer */
5364	int			isrt;		/* freeing in rt area */
5365	int			logflags;	/* transaction logging flags */
5366	xfs_extlen_t		mod;		/* rt extent offset */
5367	struct xfs_mount	*mp = ip->i_mount;
5368	int			tmp_logflags;	/* partial logging flags */
5369	int			wasdel;		/* was a delayed alloc extent */
5370	int			whichfork;	/* data or attribute fork */
5371	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
5372	xfs_fileoff_t		end;
5373	struct xfs_iext_cursor	icur;
5374	bool			done = false;
5375
5376	trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5377
5378	whichfork = xfs_bmapi_whichfork(flags);
5379	ASSERT(whichfork != XFS_COW_FORK);
5380	ifp = xfs_ifork_ptr(ip, whichfork);
5381	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp))) {
5382		xfs_bmap_mark_sick(ip, whichfork);
5383		return -EFSCORRUPTED;
5384	}
5385	if (xfs_is_shutdown(mp))
5386		return -EIO;
5387
5388	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
5389	ASSERT(len > 0);
5390	ASSERT(nexts >= 0);
5391
5392	error = xfs_iread_extents(tp, ip, whichfork);
5393	if (error)
5394		return error;
5395
5396	if (xfs_iext_count(ifp) == 0) {
5397		*rlen = 0;
5398		return 0;
5399	}
5400	XFS_STATS_INC(mp, xs_blk_unmap);
5401	isrt = xfs_ifork_is_realtime(ip, whichfork);
5402	end = start + len;
5403
5404	if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5405		*rlen = 0;
5406		return 0;
5407	}
5408	end--;
5409
5410	logflags = 0;
5411	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
5412		ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
5413		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5414	} else
5415		cur = NULL;
5416
5417	if (isrt) {
5418		/*
5419		 * Synchronize by locking the bitmap inode.
5420		 */
5421		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5422		xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5423		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5424		xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5425	}
5426
5427	extno = 0;
5428	while (end != (xfs_fileoff_t)-1 && end >= start &&
5429	       (nexts == 0 || extno < nexts)) {
5430		/*
5431		 * Is the found extent after a hole in which end lives?
5432		 * Just back up to the previous extent, if so.
5433		 */
5434		if (got.br_startoff > end &&
5435		    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5436			done = true;
5437			break;
5438		}
5439		/*
5440		 * Is the last block of this extent before the range
5441		 * we're supposed to delete?  If so, we're done.
5442		 */
5443		end = XFS_FILEOFF_MIN(end,
5444			got.br_startoff + got.br_blockcount - 1);
5445		if (end < start)
5446			break;
5447		/*
5448		 * Then deal with the (possibly delayed) allocated space
5449		 * we found.
5450		 */
5451		del = got;
5452		wasdel = isnullstartblock(del.br_startblock);
5453
5454		if (got.br_startoff < start) {
5455			del.br_startoff = start;
5456			del.br_blockcount -= start - got.br_startoff;
5457			if (!wasdel)
5458				del.br_startblock += start - got.br_startoff;
5459		}
5460		if (del.br_startoff + del.br_blockcount > end + 1)
5461			del.br_blockcount = end + 1 - del.br_startoff;
5462
5463		if (!isrt || (flags & XFS_BMAPI_REMAP))
5464			goto delete;
5465
5466		mod = xfs_rtb_to_rtxoff(mp,
5467				del.br_startblock + del.br_blockcount);
5468		if (mod) {
5469			/*
5470			 * Realtime extent not lined up at the end.
5471			 * The extent could have been split into written
5472			 * and unwritten pieces, or we could just be
5473			 * unmapping part of it.  But we can't really
5474			 * get rid of part of a realtime extent.
5475			 */
5476			if (del.br_state == XFS_EXT_UNWRITTEN) {
5477				/*
5478				 * This piece is unwritten, or we're not
5479				 * using unwritten extents.  Skip over it.
5480				 */
5481				ASSERT((flags & XFS_BMAPI_REMAP) || end >= mod);
5482				end -= mod > del.br_blockcount ?
5483					del.br_blockcount : mod;
5484				if (end < got.br_startoff &&
5485				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5486					done = true;
5487					break;
5488				}
5489				continue;
5490			}
5491			/*
5492			 * It's written, turn it unwritten.
5493			 * This is better than zeroing it.
5494			 */
5495			ASSERT(del.br_state == XFS_EXT_NORM);
5496			ASSERT(tp->t_blk_res > 0);
5497			/*
5498			 * If this spans a realtime extent boundary,
5499			 * chop it back to the start of the one we end at.
5500			 */
5501			if (del.br_blockcount > mod) {
5502				del.br_startoff += del.br_blockcount - mod;
5503				del.br_startblock += del.br_blockcount - mod;
5504				del.br_blockcount = mod;
5505			}
5506			del.br_state = XFS_EXT_UNWRITTEN;
5507			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5508					whichfork, &icur, &cur, &del,
5509					&logflags);
5510			if (error)
5511				goto error0;
5512			goto nodelete;
5513		}
5514
5515		mod = xfs_rtb_to_rtxoff(mp, del.br_startblock);
5516		if (mod) {
5517			xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
5518
5519			/*
5520			 * Realtime extent is lined up at the end but not
5521			 * at the front.  We'll get rid of full extents if
5522			 * we can.
5523			 */
5524			if (del.br_blockcount > off) {
5525				del.br_blockcount -= off;
5526				del.br_startoff += off;
5527				del.br_startblock += off;
5528			} else if (del.br_startoff == start &&
5529				   (del.br_state == XFS_EXT_UNWRITTEN ||
5530				    tp->t_blk_res == 0)) {
5531				/*
5532				 * Can't make it unwritten.  There isn't
5533				 * a full extent here so just skip it.
5534				 */
5535				ASSERT(end >= del.br_blockcount);
5536				end -= del.br_blockcount;
5537				if (got.br_startoff > end &&
5538				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5539					done = true;
5540					break;
5541				}
5542				continue;
5543			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
5544				struct xfs_bmbt_irec	prev;
5545				xfs_fileoff_t		unwrite_start;
5546
5547				/*
5548				 * This one is already unwritten.
5549				 * It must have a written left neighbor.
5550				 * Unwrite the killed part of that one and
5551				 * try again.
5552				 */
5553				if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5554					ASSERT(0);
5555				ASSERT(prev.br_state == XFS_EXT_NORM);
5556				ASSERT(!isnullstartblock(prev.br_startblock));
5557				ASSERT(del.br_startblock ==
5558				       prev.br_startblock + prev.br_blockcount);
5559				unwrite_start = max3(start,
5560						     del.br_startoff - mod,
5561						     prev.br_startoff);
5562				mod = unwrite_start - prev.br_startoff;
5563				prev.br_startoff = unwrite_start;
5564				prev.br_startblock += mod;
5565				prev.br_blockcount -= mod;
5566				prev.br_state = XFS_EXT_UNWRITTEN;
5567				error = xfs_bmap_add_extent_unwritten_real(tp,
5568						ip, whichfork, &icur, &cur,
5569						&prev, &logflags);
5570				if (error)
5571					goto error0;
5572				goto nodelete;
5573			} else {
5574				ASSERT(del.br_state == XFS_EXT_NORM);
5575				del.br_state = XFS_EXT_UNWRITTEN;
5576				error = xfs_bmap_add_extent_unwritten_real(tp,
5577						ip, whichfork, &icur, &cur,
5578						&del, &logflags);
5579				if (error)
5580					goto error0;
5581				goto nodelete;
5582			}
5583		}
5584
5585delete:
5586		if (wasdel) {
5587			error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5588					&got, &del);
5589		} else {
5590			error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
5591					&del, &tmp_logflags, whichfork,
5592					flags);
5593			logflags |= tmp_logflags;
5594		}
5595
5596		if (error)
5597			goto error0;
5598
5599		end = del.br_startoff - 1;
5600nodelete:
5601		/*
5602		 * If not done go on to the next (previous) record.
5603		 */
5604		if (end != (xfs_fileoff_t)-1 && end >= start) {
5605			if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5606			    (got.br_startoff > end &&
5607			     !xfs_iext_prev_extent(ifp, &icur, &got))) {
5608				done = true;
5609				break;
5610			}
5611			extno++;
5612		}
5613	}
5614	if (done || end == (xfs_fileoff_t)-1 || end < start)
5615		*rlen = 0;
5616	else
5617		*rlen = end - start + 1;
5618
5619	/*
5620	 * Convert to a btree if necessary.
5621	 */
5622	if (xfs_bmap_needs_btree(ip, whichfork)) {
5623		ASSERT(cur == NULL);
5624		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5625				&tmp_logflags, whichfork);
5626		logflags |= tmp_logflags;
5627	} else {
5628		error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
5629			whichfork);
5630	}
5631
5632error0:
5633	/*
5634	 * Log everything.  Do this after conversion, there's no point in
5635	 * logging the extent records if we've converted to btree format.
5636	 */
5637	if ((logflags & xfs_ilog_fext(whichfork)) &&
5638	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
5639		logflags &= ~xfs_ilog_fext(whichfork);
5640	else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5641		 ifp->if_format != XFS_DINODE_FMT_BTREE)
5642		logflags &= ~xfs_ilog_fbroot(whichfork);
5643	/*
5644	 * Log inode even in the error case, if the transaction
5645	 * is dirty we'll need to shut down the filesystem.
5646	 */
5647	if (logflags)
5648		xfs_trans_log_inode(tp, ip, logflags);
5649	if (cur) {
5650		if (!error)
5651			cur->bc_bmap.allocated = 0;
5652		xfs_btree_del_cursor(cur, error);
5653	}
5654	return error;
5655}
5656
5657/* Unmap a range of a file. */
5658int
5659xfs_bunmapi(
5660	xfs_trans_t		*tp,
5661	struct xfs_inode	*ip,
5662	xfs_fileoff_t		bno,
5663	xfs_filblks_t		len,
5664	uint32_t		flags,
5665	xfs_extnum_t		nexts,
5666	int			*done)
5667{
5668	int			error;
5669
5670	error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
5671	*done = (len == 0);
5672	return error;
5673}
5674
5675/*
5676 * Determine whether an extent shift can be accomplished by a merge with the
5677 * extent that precedes the target hole of the shift.
5678 */
5679STATIC bool
5680xfs_bmse_can_merge(
5681	struct xfs_bmbt_irec	*left,	/* preceding extent */
5682	struct xfs_bmbt_irec	*got,	/* current extent to shift */
5683	xfs_fileoff_t		shift)	/* shift fsb */
5684{
5685	xfs_fileoff_t		startoff;
5686
5687	startoff = got->br_startoff - shift;
5688
5689	/*
5690	 * The extent, once shifted, must be adjacent in-file and on-disk with
5691	 * the preceding extent.
5692	 */
5693	if ((left->br_startoff + left->br_blockcount != startoff) ||
5694	    (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5695	    (left->br_state != got->br_state) ||
5696	    (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN))
5697		return false;
5698
5699	return true;
5700}
5701
5702/*
5703 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5704 * hole in the file. If an extent shift would result in the extent being fully
5705 * adjacent to the extent that currently precedes the hole, we can merge with
5706 * the preceding extent rather than do the shift.
5707 *
5708 * This function assumes the caller has verified a shift-by-merge is possible
5709 * with the provided extents via xfs_bmse_can_merge().
5710 */
5711STATIC int
5712xfs_bmse_merge(
5713	struct xfs_trans		*tp,
5714	struct xfs_inode		*ip,
5715	int				whichfork,
5716	xfs_fileoff_t			shift,		/* shift fsb */
5717	struct xfs_iext_cursor		*icur,
5718	struct xfs_bmbt_irec		*got,		/* extent to shift */
5719	struct xfs_bmbt_irec		*left,		/* preceding extent */
5720	struct xfs_btree_cur		*cur,
5721	int				*logflags)	/* output */
5722{
5723	struct xfs_ifork		*ifp = xfs_ifork_ptr(ip, whichfork);
5724	struct xfs_bmbt_irec		new;
5725	xfs_filblks_t			blockcount;
5726	int				error, i;
5727	struct xfs_mount		*mp = ip->i_mount;
5728
5729	blockcount = left->br_blockcount + got->br_blockcount;
5730
5731	xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
5732	ASSERT(xfs_bmse_can_merge(left, got, shift));
5733
5734	new = *left;
5735	new.br_blockcount = blockcount;
5736
5737	/*
5738	 * Update the on-disk extent count, the btree if necessary and log the
5739	 * inode.
5740	 */
5741	ifp->if_nextents--;
5742	*logflags |= XFS_ILOG_CORE;
5743	if (!cur) {
5744		*logflags |= XFS_ILOG_DEXT;
5745		goto done;
5746	}
5747
5748	/* lookup and remove the extent to merge */
5749	error = xfs_bmbt_lookup_eq(cur, got, &i);
5750	if (error)
5751		return error;
5752	if (XFS_IS_CORRUPT(mp, i != 1)) {
5753		xfs_btree_mark_sick(cur);
5754		return -EFSCORRUPTED;
5755	}
5756
5757	error = xfs_btree_delete(cur, &i);
5758	if (error)
5759		return error;
5760	if (XFS_IS_CORRUPT(mp, i != 1)) {
5761		xfs_btree_mark_sick(cur);
5762		return -EFSCORRUPTED;
5763	}
5764
5765	/* lookup and update size of the previous extent */
5766	error = xfs_bmbt_lookup_eq(cur, left, &i);
5767	if (error)
5768		return error;
5769	if (XFS_IS_CORRUPT(mp, i != 1)) {
5770		xfs_btree_mark_sick(cur);
5771		return -EFSCORRUPTED;
5772	}
5773
5774	error = xfs_bmbt_update(cur, &new);
5775	if (error)
5776		return error;
5777
5778	/* change to extent format if required after extent removal */
5779	error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
5780	if (error)
5781		return error;
5782
5783done:
5784	xfs_iext_remove(ip, icur, 0);
5785	xfs_iext_prev(ifp, icur);
5786	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5787			&new);
5788
5789	/* update reverse mapping. rmap functions merge the rmaps for us */
5790	xfs_rmap_unmap_extent(tp, ip, whichfork, got);
5791	memcpy(&new, got, sizeof(new));
5792	new.br_startoff = left->br_startoff + left->br_blockcount;
5793	xfs_rmap_map_extent(tp, ip, whichfork, &new);
5794	return 0;
5795}
5796
5797static int
5798xfs_bmap_shift_update_extent(
5799	struct xfs_trans	*tp,
5800	struct xfs_inode	*ip,
5801	int			whichfork,
5802	struct xfs_iext_cursor	*icur,
5803	struct xfs_bmbt_irec	*got,
5804	struct xfs_btree_cur	*cur,
5805	int			*logflags,
5806	xfs_fileoff_t		startoff)
5807{
5808	struct xfs_mount	*mp = ip->i_mount;
5809	struct xfs_bmbt_irec	prev = *got;
5810	int			error, i;
5811
5812	*logflags |= XFS_ILOG_CORE;
5813
5814	got->br_startoff = startoff;
5815
5816	if (cur) {
5817		error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5818		if (error)
5819			return error;
5820		if (XFS_IS_CORRUPT(mp, i != 1)) {
5821			xfs_btree_mark_sick(cur);
5822			return -EFSCORRUPTED;
5823		}
5824
5825		error = xfs_bmbt_update(cur, got);
5826		if (error)
5827			return error;
5828	} else {
5829		*logflags |= XFS_ILOG_DEXT;
5830	}
5831
5832	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5833			got);
5834
5835	/* update reverse mapping */
5836	xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
5837	xfs_rmap_map_extent(tp, ip, whichfork, got);
5838	return 0;
5839}
5840
5841int
5842xfs_bmap_collapse_extents(
5843	struct xfs_trans	*tp,
5844	struct xfs_inode	*ip,
5845	xfs_fileoff_t		*next_fsb,
5846	xfs_fileoff_t		offset_shift_fsb,
5847	bool			*done)
5848{
5849	int			whichfork = XFS_DATA_FORK;
5850	struct xfs_mount	*mp = ip->i_mount;
5851	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
5852	struct xfs_btree_cur	*cur = NULL;
5853	struct xfs_bmbt_irec	got, prev;
5854	struct xfs_iext_cursor	icur;
5855	xfs_fileoff_t		new_startoff;
5856	int			error = 0;
5857	int			logflags = 0;
5858
5859	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5860	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5861		xfs_bmap_mark_sick(ip, whichfork);
5862		return -EFSCORRUPTED;
5863	}
5864
5865	if (xfs_is_shutdown(mp))
5866		return -EIO;
5867
5868	xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
5869
5870	error = xfs_iread_extents(tp, ip, whichfork);
5871	if (error)
5872		return error;
5873
5874	if (ifp->if_format == XFS_DINODE_FMT_BTREE)
5875		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5876
5877	if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5878		*done = true;
5879		goto del_cursor;
5880	}
5881	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5882		xfs_bmap_mark_sick(ip, whichfork);
5883		error = -EFSCORRUPTED;
5884		goto del_cursor;
5885	}
5886
5887	new_startoff = got.br_startoff - offset_shift_fsb;
5888	if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5889		if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5890			error = -EINVAL;
5891			goto del_cursor;
5892		}
5893
5894		if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5895			error = xfs_bmse_merge(tp, ip, whichfork,
5896					offset_shift_fsb, &icur, &got, &prev,
5897					cur, &logflags);
5898			if (error)
5899				goto del_cursor;
5900			goto done;
5901		}
5902	} else {
5903		if (got.br_startoff < offset_shift_fsb) {
5904			error = -EINVAL;
5905			goto del_cursor;
5906		}
5907	}
5908
5909	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5910			cur, &logflags, new_startoff);
5911	if (error)
5912		goto del_cursor;
5913
5914done:
5915	if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5916		*done = true;
5917		goto del_cursor;
5918	}
5919
5920	*next_fsb = got.br_startoff;
5921del_cursor:
5922	if (cur)
5923		xfs_btree_del_cursor(cur, error);
5924	if (logflags)
5925		xfs_trans_log_inode(tp, ip, logflags);
5926	return error;
5927}
5928
5929/* Make sure we won't be right-shifting an extent past the maximum bound. */
5930int
5931xfs_bmap_can_insert_extents(
5932	struct xfs_inode	*ip,
5933	xfs_fileoff_t		off,
5934	xfs_fileoff_t		shift)
5935{
5936	struct xfs_bmbt_irec	got;
5937	int			is_empty;
5938	int			error = 0;
5939
5940	xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
5941
5942	if (xfs_is_shutdown(ip->i_mount))
5943		return -EIO;
5944
5945	xfs_ilock(ip, XFS_ILOCK_EXCL);
5946	error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
5947	if (!error && !is_empty && got.br_startoff >= off &&
5948	    ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
5949		error = -EINVAL;
5950	xfs_iunlock(ip, XFS_ILOCK_EXCL);
5951
5952	return error;
5953}
5954
5955int
5956xfs_bmap_insert_extents(
5957	struct xfs_trans	*tp,
5958	struct xfs_inode	*ip,
5959	xfs_fileoff_t		*next_fsb,
5960	xfs_fileoff_t		offset_shift_fsb,
5961	bool			*done,
5962	xfs_fileoff_t		stop_fsb)
5963{
5964	int			whichfork = XFS_DATA_FORK;
5965	struct xfs_mount	*mp = ip->i_mount;
5966	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
5967	struct xfs_btree_cur	*cur = NULL;
5968	struct xfs_bmbt_irec	got, next;
5969	struct xfs_iext_cursor	icur;
5970	xfs_fileoff_t		new_startoff;
5971	int			error = 0;
5972	int			logflags = 0;
5973
5974	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5975	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5976		xfs_bmap_mark_sick(ip, whichfork);
5977		return -EFSCORRUPTED;
5978	}
5979
5980	if (xfs_is_shutdown(mp))
5981		return -EIO;
5982
5983	xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
5984
5985	error = xfs_iread_extents(tp, ip, whichfork);
5986	if (error)
5987		return error;
5988
5989	if (ifp->if_format == XFS_DINODE_FMT_BTREE)
5990		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5991
5992	if (*next_fsb == NULLFSBLOCK) {
5993		xfs_iext_last(ifp, &icur);
5994		if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5995		    stop_fsb > got.br_startoff) {
5996			*done = true;
5997			goto del_cursor;
5998		}
5999	} else {
6000		if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
6001			*done = true;
6002			goto del_cursor;
6003		}
6004	}
6005	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
6006		xfs_bmap_mark_sick(ip, whichfork);
6007		error = -EFSCORRUPTED;
6008		goto del_cursor;
6009	}
6010
6011	if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
6012		xfs_bmap_mark_sick(ip, whichfork);
6013		error = -EFSCORRUPTED;
6014		goto del_cursor;
6015	}
6016
6017	new_startoff = got.br_startoff + offset_shift_fsb;
6018	if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
6019		if (new_startoff + got.br_blockcount > next.br_startoff) {
6020			error = -EINVAL;
6021			goto del_cursor;
6022		}
6023
6024		/*
6025		 * Unlike a left shift (which involves a hole punch), a right
6026		 * shift does not modify extent neighbors in any way.  We should
6027		 * never find mergeable extents in this scenario.  Check anyways
6028		 * and warn if we encounter two extents that could be one.
6029		 */
6030		if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
6031			WARN_ON_ONCE(1);
6032	}
6033
6034	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
6035			cur, &logflags, new_startoff);
6036	if (error)
6037		goto del_cursor;
6038
6039	if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
6040	    stop_fsb >= got.br_startoff + got.br_blockcount) {
6041		*done = true;
6042		goto del_cursor;
6043	}
6044
6045	*next_fsb = got.br_startoff;
6046del_cursor:
6047	if (cur)
6048		xfs_btree_del_cursor(cur, error);
6049	if (logflags)
6050		xfs_trans_log_inode(tp, ip, logflags);
6051	return error;
6052}
6053
6054/*
6055 * Splits an extent into two extents at split_fsb block such that it is the
6056 * first block of the current_ext. @ext is a target extent to be split.
6057 * @split_fsb is a block where the extents is split.  If split_fsb lies in a
6058 * hole or the first block of extents, just return 0.
6059 */
6060int
6061xfs_bmap_split_extent(
6062	struct xfs_trans	*tp,
6063	struct xfs_inode	*ip,
6064	xfs_fileoff_t		split_fsb)
6065{
6066	int				whichfork = XFS_DATA_FORK;
6067	struct xfs_ifork		*ifp = xfs_ifork_ptr(ip, whichfork);
6068	struct xfs_btree_cur		*cur = NULL;
6069	struct xfs_bmbt_irec		got;
6070	struct xfs_bmbt_irec		new; /* split extent */
6071	struct xfs_mount		*mp = ip->i_mount;
6072	xfs_fsblock_t			gotblkcnt; /* new block count for got */
6073	struct xfs_iext_cursor		icur;
6074	int				error = 0;
6075	int				logflags = 0;
6076	int				i = 0;
6077
6078	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
6079	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
6080		xfs_bmap_mark_sick(ip, whichfork);
6081		return -EFSCORRUPTED;
6082	}
6083
6084	if (xfs_is_shutdown(mp))
6085		return -EIO;
6086
6087	/* Read in all the extents */
6088	error = xfs_iread_extents(tp, ip, whichfork);
6089	if (error)
6090		return error;
6091
6092	/*
6093	 * If there are not extents, or split_fsb lies in a hole we are done.
6094	 */
6095	if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
6096	    got.br_startoff >= split_fsb)
6097		return 0;
6098
6099	gotblkcnt = split_fsb - got.br_startoff;
6100	new.br_startoff = split_fsb;
6101	new.br_startblock = got.br_startblock + gotblkcnt;
6102	new.br_blockcount = got.br_blockcount - gotblkcnt;
6103	new.br_state = got.br_state;
6104
6105	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
6106		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6107		error = xfs_bmbt_lookup_eq(cur, &got, &i);
6108		if (error)
6109			goto del_cursor;
6110		if (XFS_IS_CORRUPT(mp, i != 1)) {
6111			xfs_btree_mark_sick(cur);
6112			error = -EFSCORRUPTED;
6113			goto del_cursor;
6114		}
6115	}
6116
6117	got.br_blockcount = gotblkcnt;
6118	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
6119			&got);
6120
6121	logflags = XFS_ILOG_CORE;
6122	if (cur) {
6123		error = xfs_bmbt_update(cur, &got);
6124		if (error)
6125			goto del_cursor;
6126	} else
6127		logflags |= XFS_ILOG_DEXT;
6128
6129	/* Add new extent */
6130	xfs_iext_next(ifp, &icur);
6131	xfs_iext_insert(ip, &icur, &new, 0);
6132	ifp->if_nextents++;
6133
6134	if (cur) {
6135		error = xfs_bmbt_lookup_eq(cur, &new, &i);
6136		if (error)
6137			goto del_cursor;
6138		if (XFS_IS_CORRUPT(mp, i != 0)) {
6139			xfs_btree_mark_sick(cur);
6140			error = -EFSCORRUPTED;
6141			goto del_cursor;
6142		}
6143		error = xfs_btree_insert(cur, &i);
6144		if (error)
6145			goto del_cursor;
6146		if (XFS_IS_CORRUPT(mp, i != 1)) {
6147			xfs_btree_mark_sick(cur);
6148			error = -EFSCORRUPTED;
6149			goto del_cursor;
6150		}
6151	}
6152
6153	/*
6154	 * Convert to a btree if necessary.
6155	 */
6156	if (xfs_bmap_needs_btree(ip, whichfork)) {
6157		int tmp_logflags; /* partial log flag return val */
6158
6159		ASSERT(cur == NULL);
6160		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
6161				&tmp_logflags, whichfork);
6162		logflags |= tmp_logflags;
6163	}
6164
6165del_cursor:
6166	if (cur) {
6167		cur->bc_bmap.allocated = 0;
6168		xfs_btree_del_cursor(cur, error);
6169	}
6170
6171	if (logflags)
6172		xfs_trans_log_inode(tp, ip, logflags);
6173	return error;
6174}
6175
6176/* Record a bmap intent. */
6177static inline void
6178__xfs_bmap_add(
6179	struct xfs_trans		*tp,
6180	enum xfs_bmap_intent_type	type,
6181	struct xfs_inode		*ip,
6182	int				whichfork,
6183	struct xfs_bmbt_irec		*bmap)
6184{
6185	struct xfs_bmap_intent		*bi;
6186
6187	if ((whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK) ||
6188	    bmap->br_startblock == HOLESTARTBLOCK ||
6189	    bmap->br_startblock == DELAYSTARTBLOCK)
6190		return;
6191
6192	bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL);
6193	INIT_LIST_HEAD(&bi->bi_list);
6194	bi->bi_type = type;
6195	bi->bi_owner = ip;
6196	bi->bi_whichfork = whichfork;
6197	bi->bi_bmap = *bmap;
6198
6199	xfs_bmap_defer_add(tp, bi);
6200}
6201
6202/* Map an extent into a file. */
6203void
6204xfs_bmap_map_extent(
6205	struct xfs_trans	*tp,
6206	struct xfs_inode	*ip,
6207	int			whichfork,
6208	struct xfs_bmbt_irec	*PREV)
6209{
6210	__xfs_bmap_add(tp, XFS_BMAP_MAP, ip, whichfork, PREV);
6211}
6212
6213/* Unmap an extent out of a file. */
6214void
6215xfs_bmap_unmap_extent(
6216	struct xfs_trans	*tp,
6217	struct xfs_inode	*ip,
6218	int			whichfork,
6219	struct xfs_bmbt_irec	*PREV)
6220{
6221	__xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, whichfork, PREV);
6222}
6223
6224/*
6225 * Process one of the deferred bmap operations.  We pass back the
6226 * btree cursor to maintain our lock on the bmapbt between calls.
6227 */
6228int
6229xfs_bmap_finish_one(
6230	struct xfs_trans		*tp,
6231	struct xfs_bmap_intent		*bi)
6232{
6233	struct xfs_bmbt_irec		*bmap = &bi->bi_bmap;
6234	int				error = 0;
6235	int				flags = 0;
6236
6237	if (bi->bi_whichfork == XFS_ATTR_FORK)
6238		flags |= XFS_BMAPI_ATTRFORK;
6239
6240	ASSERT(tp->t_highest_agno == NULLAGNUMBER);
6241
6242	trace_xfs_bmap_deferred(bi);
6243
6244	if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE))
6245		return -EIO;
6246
6247	switch (bi->bi_type) {
6248	case XFS_BMAP_MAP:
6249		if (bi->bi_bmap.br_state == XFS_EXT_UNWRITTEN)
6250			flags |= XFS_BMAPI_PREALLOC;
6251		error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff,
6252				bmap->br_blockcount, bmap->br_startblock,
6253				flags);
6254		bmap->br_blockcount = 0;
6255		break;
6256	case XFS_BMAP_UNMAP:
6257		error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff,
6258				&bmap->br_blockcount, flags | XFS_BMAPI_REMAP,
6259				1);
6260		break;
6261	default:
6262		ASSERT(0);
6263		xfs_bmap_mark_sick(bi->bi_owner, bi->bi_whichfork);
6264		error = -EFSCORRUPTED;
6265	}
6266
6267	return error;
6268}
6269
6270/* Check that an extent does not have invalid flags or bad ranges. */
6271xfs_failaddr_t
6272xfs_bmap_validate_extent_raw(
6273	struct xfs_mount	*mp,
6274	bool			rtfile,
6275	int			whichfork,
6276	struct xfs_bmbt_irec	*irec)
6277{
6278	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
6279		return __this_address;
6280
6281	if (rtfile && whichfork == XFS_DATA_FORK) {
6282		if (!xfs_verify_rtbext(mp, irec->br_startblock,
6283					   irec->br_blockcount))
6284			return __this_address;
6285	} else {
6286		if (!xfs_verify_fsbext(mp, irec->br_startblock,
6287					   irec->br_blockcount))
6288			return __this_address;
6289	}
6290	if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
6291		return __this_address;
6292	return NULL;
6293}
6294
6295int __init
6296xfs_bmap_intent_init_cache(void)
6297{
6298	xfs_bmap_intent_cache = kmem_cache_create("xfs_bmap_intent",
6299			sizeof(struct xfs_bmap_intent),
6300			0, 0, NULL);
6301
6302	return xfs_bmap_intent_cache != NULL ? 0 : -ENOMEM;
6303}
6304
6305void
6306xfs_bmap_intent_destroy_cache(void)
6307{
6308	kmem_cache_destroy(xfs_bmap_intent_cache);
6309	xfs_bmap_intent_cache = NULL;
6310}
6311
6312/* Check that an inode's extent does not have invalid flags or bad ranges. */
6313xfs_failaddr_t
6314xfs_bmap_validate_extent(
6315	struct xfs_inode	*ip,
6316	int			whichfork,
6317	struct xfs_bmbt_irec	*irec)
6318{
6319	return xfs_bmap_validate_extent_raw(ip->i_mount,
6320			XFS_IS_REALTIME_INODE(ip), whichfork, irec);
6321}
6322
6323/*
6324 * Used in xfs_itruncate_extents().  This is the maximum number of extents
6325 * freed from a file in a single transaction.
6326 */
6327#define	XFS_ITRUNC_MAX_EXTENTS	2
6328
6329/*
6330 * Unmap every extent in part of an inode's fork.  We don't do any higher level
6331 * invalidation work at all.
6332 */
6333int
6334xfs_bunmapi_range(
6335	struct xfs_trans	**tpp,
6336	struct xfs_inode	*ip,
6337	uint32_t		flags,
6338	xfs_fileoff_t		startoff,
6339	xfs_fileoff_t		endoff)
6340{
6341	xfs_filblks_t		unmap_len = endoff - startoff + 1;
6342	int			error = 0;
6343
6344	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
6345
6346	while (unmap_len > 0) {
6347		ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER);
6348		error = __xfs_bunmapi(*tpp, ip, startoff, &unmap_len, flags,
6349				XFS_ITRUNC_MAX_EXTENTS);
6350		if (error)
6351			goto out;
6352
6353		/* free the just unmapped extents */
6354		error = xfs_defer_finish(tpp);
6355		if (error)
6356			goto out;
6357	}
6358out:
6359	return error;
6360}
6361
6362struct xfs_bmap_query_range {
6363	xfs_bmap_query_range_fn	fn;
6364	void			*priv;
6365};
6366
6367/* Format btree record and pass to our callback. */
6368STATIC int
6369xfs_bmap_query_range_helper(
6370	struct xfs_btree_cur		*cur,
6371	const union xfs_btree_rec	*rec,
6372	void				*priv)
6373{
6374	struct xfs_bmap_query_range	*query = priv;
6375	struct xfs_bmbt_irec		irec;
6376	xfs_failaddr_t			fa;
6377
6378	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
6379	fa = xfs_bmap_validate_extent(cur->bc_ino.ip, cur->bc_ino.whichfork,
6380			&irec);
6381	if (fa) {
6382		xfs_btree_mark_sick(cur);
6383		return xfs_bmap_complain_bad_rec(cur->bc_ino.ip,
6384				cur->bc_ino.whichfork, fa, &irec);
6385	}
6386
6387	return query->fn(cur, &irec, query->priv);
6388}
6389
6390/* Find all bmaps. */
6391int
6392xfs_bmap_query_all(
6393	struct xfs_btree_cur		*cur,
6394	xfs_bmap_query_range_fn		fn,
6395	void				*priv)
6396{
6397	struct xfs_bmap_query_range	query = {
6398		.priv			= priv,
6399		.fn			= fn,
6400	};
6401
6402	return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query);
6403}
6404