xfs_ag.c revision 9343ee76
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * Copyright (c) 2018 Red Hat, Inc.
5 * All rights reserved.
6 */
7
8#include "xfs.h"
9#include "xfs_fs.h"
10#include "xfs_shared.h"
11#include "xfs_format.h"
12#include "xfs_trans_resv.h"
13#include "xfs_bit.h"
14#include "xfs_sb.h"
15#include "xfs_mount.h"
16#include "xfs_btree.h"
17#include "xfs_alloc_btree.h"
18#include "xfs_rmap_btree.h"
19#include "xfs_alloc.h"
20#include "xfs_ialloc.h"
21#include "xfs_rmap.h"
22#include "xfs_ag.h"
23#include "xfs_ag_resv.h"
24#include "xfs_health.h"
25#include "xfs_error.h"
26#include "xfs_bmap.h"
27#include "xfs_defer.h"
28#include "xfs_log_format.h"
29#include "xfs_trans.h"
30#include "xfs_trace.h"
31#include "xfs_inode.h"
32#include "xfs_icache.h"
33
34
35/*
36 * Passive reference counting access wrappers to the perag structures.  If the
37 * per-ag structure is to be freed, the freeing code is responsible for cleaning
38 * up objects with passive references before freeing the structure. This is
39 * things like cached buffers.
40 */
41struct xfs_perag *
42xfs_perag_get(
43	struct xfs_mount	*mp,
44	xfs_agnumber_t		agno)
45{
46	struct xfs_perag	*pag;
47	int			ref = 0;
48
49	rcu_read_lock();
50	pag = radix_tree_lookup(&mp->m_perag_tree, agno);
51	if (pag) {
52		ASSERT(atomic_read(&pag->pag_ref) >= 0);
53		ref = atomic_inc_return(&pag->pag_ref);
54	}
55	rcu_read_unlock();
56	trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
57	return pag;
58}
59
60/*
61 * search from @first to find the next perag with the given tag set.
62 */
63struct xfs_perag *
64xfs_perag_get_tag(
65	struct xfs_mount	*mp,
66	xfs_agnumber_t		first,
67	unsigned int		tag)
68{
69	struct xfs_perag	*pag;
70	int			found;
71	int			ref;
72
73	rcu_read_lock();
74	found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
75					(void **)&pag, first, 1, tag);
76	if (found <= 0) {
77		rcu_read_unlock();
78		return NULL;
79	}
80	ref = atomic_inc_return(&pag->pag_ref);
81	rcu_read_unlock();
82	trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
83	return pag;
84}
85
86void
87xfs_perag_put(
88	struct xfs_perag	*pag)
89{
90	int	ref;
91
92	ASSERT(atomic_read(&pag->pag_ref) > 0);
93	ref = atomic_dec_return(&pag->pag_ref);
94	trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
95}
96
97/*
98 * xfs_initialize_perag_data
99 *
100 * Read in each per-ag structure so we can count up the number of
101 * allocated inodes, free inodes and used filesystem blocks as this
102 * information is no longer persistent in the superblock. Once we have
103 * this information, write it into the in-core superblock structure.
104 */
105int
106xfs_initialize_perag_data(
107	struct xfs_mount	*mp,
108	xfs_agnumber_t		agcount)
109{
110	xfs_agnumber_t		index;
111	struct xfs_perag	*pag;
112	struct xfs_sb		*sbp = &mp->m_sb;
113	uint64_t		ifree = 0;
114	uint64_t		ialloc = 0;
115	uint64_t		bfree = 0;
116	uint64_t		bfreelst = 0;
117	uint64_t		btree = 0;
118	uint64_t		fdblocks;
119	int			error = 0;
120
121	for (index = 0; index < agcount; index++) {
122		/*
123		 * read the agf, then the agi. This gets us
124		 * all the information we need and populates the
125		 * per-ag structures for us.
126		 */
127		error = xfs_alloc_pagf_init(mp, NULL, index, 0);
128		if (error)
129			return error;
130
131		error = xfs_ialloc_pagi_init(mp, NULL, index);
132		if (error)
133			return error;
134		pag = xfs_perag_get(mp, index);
135		ifree += pag->pagi_freecount;
136		ialloc += pag->pagi_count;
137		bfree += pag->pagf_freeblks;
138		bfreelst += pag->pagf_flcount;
139		btree += pag->pagf_btreeblks;
140		xfs_perag_put(pag);
141	}
142	fdblocks = bfree + bfreelst + btree;
143
144	/*
145	 * If the new summary counts are obviously incorrect, fail the
146	 * mount operation because that implies the AGFs are also corrupt.
147	 * Clear FS_COUNTERS so that we don't unmount with a dirty log, which
148	 * will prevent xfs_repair from fixing anything.
149	 */
150	if (fdblocks > sbp->sb_dblocks || ifree > ialloc) {
151		xfs_alert(mp, "AGF corruption. Please run xfs_repair.");
152		error = -EFSCORRUPTED;
153		goto out;
154	}
155
156	/* Overwrite incore superblock counters with just-read data */
157	spin_lock(&mp->m_sb_lock);
158	sbp->sb_ifree = ifree;
159	sbp->sb_icount = ialloc;
160	sbp->sb_fdblocks = fdblocks;
161	spin_unlock(&mp->m_sb_lock);
162
163	xfs_reinit_percpu_counters(mp);
164out:
165	xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS);
166	return error;
167}
168
169STATIC void
170__xfs_free_perag(
171	struct rcu_head	*head)
172{
173	struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
174
175	ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
176	ASSERT(atomic_read(&pag->pag_ref) == 0);
177	kmem_free(pag);
178}
179
180/*
181 * Free up the per-ag resources associated with the mount structure.
182 */
183void
184xfs_free_perag(
185	struct xfs_mount	*mp)
186{
187	struct xfs_perag	*pag;
188	xfs_agnumber_t		agno;
189
190	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
191		spin_lock(&mp->m_perag_lock);
192		pag = radix_tree_delete(&mp->m_perag_tree, agno);
193		spin_unlock(&mp->m_perag_lock);
194		ASSERT(pag);
195		ASSERT(atomic_read(&pag->pag_ref) == 0);
196
197		cancel_delayed_work_sync(&pag->pag_blockgc_work);
198		xfs_iunlink_destroy(pag);
199		xfs_buf_hash_destroy(pag);
200
201		call_rcu(&pag->rcu_head, __xfs_free_perag);
202	}
203}
204
205int
206xfs_initialize_perag(
207	struct xfs_mount	*mp,
208	xfs_agnumber_t		agcount,
209	xfs_agnumber_t		*maxagi)
210{
211	struct xfs_perag	*pag;
212	xfs_agnumber_t		index;
213	xfs_agnumber_t		first_initialised = NULLAGNUMBER;
214	int			error;
215
216	/*
217	 * Walk the current per-ag tree so we don't try to initialise AGs
218	 * that already exist (growfs case). Allocate and insert all the
219	 * AGs we don't find ready for initialisation.
220	 */
221	for (index = 0; index < agcount; index++) {
222		pag = xfs_perag_get(mp, index);
223		if (pag) {
224			xfs_perag_put(pag);
225			continue;
226		}
227
228		pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
229		if (!pag) {
230			error = -ENOMEM;
231			goto out_unwind_new_pags;
232		}
233		pag->pag_agno = index;
234		pag->pag_mount = mp;
235
236		error = radix_tree_preload(GFP_NOFS);
237		if (error)
238			goto out_free_pag;
239
240		spin_lock(&mp->m_perag_lock);
241		if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
242			WARN_ON_ONCE(1);
243			spin_unlock(&mp->m_perag_lock);
244			radix_tree_preload_end();
245			error = -EEXIST;
246			goto out_free_pag;
247		}
248		spin_unlock(&mp->m_perag_lock);
249		radix_tree_preload_end();
250
251		/* Place kernel structure only init below this point. */
252		spin_lock_init(&pag->pag_ici_lock);
253		spin_lock_init(&pag->pagb_lock);
254		spin_lock_init(&pag->pag_state_lock);
255		INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
256		INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
257		init_waitqueue_head(&pag->pagb_wait);
258		pag->pagb_count = 0;
259		pag->pagb_tree = RB_ROOT;
260
261		error = xfs_buf_hash_init(pag);
262		if (error)
263			goto out_remove_pag;
264
265		error = xfs_iunlink_init(pag);
266		if (error)
267			goto out_hash_destroy;
268
269		/* first new pag is fully initialized */
270		if (first_initialised == NULLAGNUMBER)
271			first_initialised = index;
272	}
273
274	index = xfs_set_inode_alloc(mp, agcount);
275
276	if (maxagi)
277		*maxagi = index;
278
279	mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
280	return 0;
281
282out_hash_destroy:
283	xfs_buf_hash_destroy(pag);
284out_remove_pag:
285	radix_tree_delete(&mp->m_perag_tree, index);
286out_free_pag:
287	kmem_free(pag);
288out_unwind_new_pags:
289	/* unwind any prior newly initialized pags */
290	for (index = first_initialised; index < agcount; index++) {
291		pag = radix_tree_delete(&mp->m_perag_tree, index);
292		if (!pag)
293			break;
294		xfs_buf_hash_destroy(pag);
295		xfs_iunlink_destroy(pag);
296		kmem_free(pag);
297	}
298	return error;
299}
300
301static int
302xfs_get_aghdr_buf(
303	struct xfs_mount	*mp,
304	xfs_daddr_t		blkno,
305	size_t			numblks,
306	struct xfs_buf		**bpp,
307	const struct xfs_buf_ops *ops)
308{
309	struct xfs_buf		*bp;
310	int			error;
311
312	error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0, &bp);
313	if (error)
314		return error;
315
316	bp->b_maps[0].bm_bn = blkno;
317	bp->b_ops = ops;
318
319	*bpp = bp;
320	return 0;
321}
322
323static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id)
324{
325	return mp->m_sb.sb_logstart > 0 &&
326	       id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
327}
328
329/*
330 * Generic btree root block init function
331 */
332static void
333xfs_btroot_init(
334	struct xfs_mount	*mp,
335	struct xfs_buf		*bp,
336	struct aghdr_init_data	*id)
337{
338	xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno);
339}
340
341/* Finish initializing a free space btree. */
342static void
343xfs_freesp_init_recs(
344	struct xfs_mount	*mp,
345	struct xfs_buf		*bp,
346	struct aghdr_init_data	*id)
347{
348	struct xfs_alloc_rec	*arec;
349	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
350
351	arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
352	arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
353
354	if (is_log_ag(mp, id)) {
355		struct xfs_alloc_rec	*nrec;
356		xfs_agblock_t		start = XFS_FSB_TO_AGBNO(mp,
357							mp->m_sb.sb_logstart);
358
359		ASSERT(start >= mp->m_ag_prealloc_blocks);
360		if (start != mp->m_ag_prealloc_blocks) {
361			/*
362			 * Modify first record to pad stripe align of log
363			 */
364			arec->ar_blockcount = cpu_to_be32(start -
365						mp->m_ag_prealloc_blocks);
366			nrec = arec + 1;
367
368			/*
369			 * Insert second record at start of internal log
370			 * which then gets trimmed.
371			 */
372			nrec->ar_startblock = cpu_to_be32(
373					be32_to_cpu(arec->ar_startblock) +
374					be32_to_cpu(arec->ar_blockcount));
375			arec = nrec;
376			be16_add_cpu(&block->bb_numrecs, 1);
377		}
378		/*
379		 * Change record start to after the internal log
380		 */
381		be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks);
382	}
383
384	/*
385	 * Calculate the record block count and check for the case where
386	 * the log might have consumed all available space in the AG. If
387	 * so, reset the record count to 0 to avoid exposure of an invalid
388	 * record start block.
389	 */
390	arec->ar_blockcount = cpu_to_be32(id->agsize -
391					  be32_to_cpu(arec->ar_startblock));
392	if (!arec->ar_blockcount)
393		block->bb_numrecs = 0;
394}
395
396/*
397 * Alloc btree root block init functions
398 */
399static void
400xfs_bnoroot_init(
401	struct xfs_mount	*mp,
402	struct xfs_buf		*bp,
403	struct aghdr_init_data	*id)
404{
405	xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
406	xfs_freesp_init_recs(mp, bp, id);
407}
408
409static void
410xfs_cntroot_init(
411	struct xfs_mount	*mp,
412	struct xfs_buf		*bp,
413	struct aghdr_init_data	*id)
414{
415	xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
416	xfs_freesp_init_recs(mp, bp, id);
417}
418
419/*
420 * Reverse map root block init
421 */
422static void
423xfs_rmaproot_init(
424	struct xfs_mount	*mp,
425	struct xfs_buf		*bp,
426	struct aghdr_init_data	*id)
427{
428	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
429	struct xfs_rmap_rec	*rrec;
430
431	xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno);
432
433	/*
434	 * mark the AG header regions as static metadata The BNO
435	 * btree block is the first block after the headers, so
436	 * it's location defines the size of region the static
437	 * metadata consumes.
438	 *
439	 * Note: unlike mkfs, we never have to account for log
440	 * space when growing the data regions
441	 */
442	rrec = XFS_RMAP_REC_ADDR(block, 1);
443	rrec->rm_startblock = 0;
444	rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
445	rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
446	rrec->rm_offset = 0;
447
448	/* account freespace btree root blocks */
449	rrec = XFS_RMAP_REC_ADDR(block, 2);
450	rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
451	rrec->rm_blockcount = cpu_to_be32(2);
452	rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
453	rrec->rm_offset = 0;
454
455	/* account inode btree root blocks */
456	rrec = XFS_RMAP_REC_ADDR(block, 3);
457	rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
458	rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
459					  XFS_IBT_BLOCK(mp));
460	rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
461	rrec->rm_offset = 0;
462
463	/* account for rmap btree root */
464	rrec = XFS_RMAP_REC_ADDR(block, 4);
465	rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
466	rrec->rm_blockcount = cpu_to_be32(1);
467	rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
468	rrec->rm_offset = 0;
469
470	/* account for refc btree root */
471	if (xfs_has_reflink(mp)) {
472		rrec = XFS_RMAP_REC_ADDR(block, 5);
473		rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp));
474		rrec->rm_blockcount = cpu_to_be32(1);
475		rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC);
476		rrec->rm_offset = 0;
477		be16_add_cpu(&block->bb_numrecs, 1);
478	}
479
480	/* account for the log space */
481	if (is_log_ag(mp, id)) {
482		rrec = XFS_RMAP_REC_ADDR(block,
483				be16_to_cpu(block->bb_numrecs) + 1);
484		rrec->rm_startblock = cpu_to_be32(
485				XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart));
486		rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks);
487		rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG);
488		rrec->rm_offset = 0;
489		be16_add_cpu(&block->bb_numrecs, 1);
490	}
491}
492
493/*
494 * Initialise new secondary superblocks with the pre-grow geometry, but mark
495 * them as "in progress" so we know they haven't yet been activated. This will
496 * get cleared when the update with the new geometry information is done after
497 * changes to the primary are committed. This isn't strictly necessary, but we
498 * get it for free with the delayed buffer write lists and it means we can tell
499 * if a grow operation didn't complete properly after the fact.
500 */
501static void
502xfs_sbblock_init(
503	struct xfs_mount	*mp,
504	struct xfs_buf		*bp,
505	struct aghdr_init_data	*id)
506{
507	struct xfs_dsb		*dsb = bp->b_addr;
508
509	xfs_sb_to_disk(dsb, &mp->m_sb);
510	dsb->sb_inprogress = 1;
511}
512
513static void
514xfs_agfblock_init(
515	struct xfs_mount	*mp,
516	struct xfs_buf		*bp,
517	struct aghdr_init_data	*id)
518{
519	struct xfs_agf		*agf = bp->b_addr;
520	xfs_extlen_t		tmpsize;
521
522	agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
523	agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
524	agf->agf_seqno = cpu_to_be32(id->agno);
525	agf->agf_length = cpu_to_be32(id->agsize);
526	agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
527	agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
528	agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
529	agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
530	if (xfs_has_rmapbt(mp)) {
531		agf->agf_roots[XFS_BTNUM_RMAPi] =
532					cpu_to_be32(XFS_RMAP_BLOCK(mp));
533		agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
534		agf->agf_rmap_blocks = cpu_to_be32(1);
535	}
536
537	agf->agf_flfirst = cpu_to_be32(1);
538	agf->agf_fllast = 0;
539	agf->agf_flcount = 0;
540	tmpsize = id->agsize - mp->m_ag_prealloc_blocks;
541	agf->agf_freeblks = cpu_to_be32(tmpsize);
542	agf->agf_longest = cpu_to_be32(tmpsize);
543	if (xfs_has_crc(mp))
544		uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
545	if (xfs_has_reflink(mp)) {
546		agf->agf_refcount_root = cpu_to_be32(
547				xfs_refc_block(mp));
548		agf->agf_refcount_level = cpu_to_be32(1);
549		agf->agf_refcount_blocks = cpu_to_be32(1);
550	}
551
552	if (is_log_ag(mp, id)) {
553		int64_t	logblocks = mp->m_sb.sb_logblocks;
554
555		be32_add_cpu(&agf->agf_freeblks, -logblocks);
556		agf->agf_longest = cpu_to_be32(id->agsize -
557			XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks);
558	}
559}
560
561static void
562xfs_agflblock_init(
563	struct xfs_mount	*mp,
564	struct xfs_buf		*bp,
565	struct aghdr_init_data	*id)
566{
567	struct xfs_agfl		*agfl = XFS_BUF_TO_AGFL(bp);
568	__be32			*agfl_bno;
569	int			bucket;
570
571	if (xfs_has_crc(mp)) {
572		agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
573		agfl->agfl_seqno = cpu_to_be32(id->agno);
574		uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
575	}
576
577	agfl_bno = xfs_buf_to_agfl_bno(bp);
578	for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
579		agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
580}
581
582static void
583xfs_agiblock_init(
584	struct xfs_mount	*mp,
585	struct xfs_buf		*bp,
586	struct aghdr_init_data	*id)
587{
588	struct xfs_agi		*agi = bp->b_addr;
589	int			bucket;
590
591	agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
592	agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
593	agi->agi_seqno = cpu_to_be32(id->agno);
594	agi->agi_length = cpu_to_be32(id->agsize);
595	agi->agi_count = 0;
596	agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp));
597	agi->agi_level = cpu_to_be32(1);
598	agi->agi_freecount = 0;
599	agi->agi_newino = cpu_to_be32(NULLAGINO);
600	agi->agi_dirino = cpu_to_be32(NULLAGINO);
601	if (xfs_has_crc(mp))
602		uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
603	if (xfs_has_finobt(mp)) {
604		agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
605		agi->agi_free_level = cpu_to_be32(1);
606	}
607	for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
608		agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
609	if (xfs_has_inobtcounts(mp)) {
610		agi->agi_iblocks = cpu_to_be32(1);
611		if (xfs_has_finobt(mp))
612			agi->agi_fblocks = cpu_to_be32(1);
613	}
614}
615
616typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp,
617				  struct aghdr_init_data *id);
618static int
619xfs_ag_init_hdr(
620	struct xfs_mount	*mp,
621	struct aghdr_init_data	*id,
622	aghdr_init_work_f	work,
623	const struct xfs_buf_ops *ops)
624{
625	struct xfs_buf		*bp;
626	int			error;
627
628	error = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, &bp, ops);
629	if (error)
630		return error;
631
632	(*work)(mp, bp, id);
633
634	xfs_buf_delwri_queue(bp, &id->buffer_list);
635	xfs_buf_relse(bp);
636	return 0;
637}
638
639struct xfs_aghdr_grow_data {
640	xfs_daddr_t		daddr;
641	size_t			numblks;
642	const struct xfs_buf_ops *ops;
643	aghdr_init_work_f	work;
644	xfs_btnum_t		type;
645	bool			need_init;
646};
647
648/*
649 * Prepare new AG headers to be written to disk. We use uncached buffers here,
650 * as it is assumed these new AG headers are currently beyond the currently
651 * valid filesystem address space. Using cached buffers would trip over EOFS
652 * corruption detection alogrithms in the buffer cache lookup routines.
653 *
654 * This is a non-transactional function, but the prepared buffers are added to a
655 * delayed write buffer list supplied by the caller so they can submit them to
656 * disk and wait on them as required.
657 */
658int
659xfs_ag_init_headers(
660	struct xfs_mount	*mp,
661	struct aghdr_init_data	*id)
662
663{
664	struct xfs_aghdr_grow_data aghdr_data[] = {
665	{ /* SB */
666		.daddr = XFS_AG_DADDR(mp, id->agno, XFS_SB_DADDR),
667		.numblks = XFS_FSS_TO_BB(mp, 1),
668		.ops = &xfs_sb_buf_ops,
669		.work = &xfs_sbblock_init,
670		.need_init = true
671	},
672	{ /* AGF */
673		.daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGF_DADDR(mp)),
674		.numblks = XFS_FSS_TO_BB(mp, 1),
675		.ops = &xfs_agf_buf_ops,
676		.work = &xfs_agfblock_init,
677		.need_init = true
678	},
679	{ /* AGFL */
680		.daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGFL_DADDR(mp)),
681		.numblks = XFS_FSS_TO_BB(mp, 1),
682		.ops = &xfs_agfl_buf_ops,
683		.work = &xfs_agflblock_init,
684		.need_init = true
685	},
686	{ /* AGI */
687		.daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGI_DADDR(mp)),
688		.numblks = XFS_FSS_TO_BB(mp, 1),
689		.ops = &xfs_agi_buf_ops,
690		.work = &xfs_agiblock_init,
691		.need_init = true
692	},
693	{ /* BNO root block */
694		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_BNO_BLOCK(mp)),
695		.numblks = BTOBB(mp->m_sb.sb_blocksize),
696		.ops = &xfs_bnobt_buf_ops,
697		.work = &xfs_bnoroot_init,
698		.need_init = true
699	},
700	{ /* CNT root block */
701		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)),
702		.numblks = BTOBB(mp->m_sb.sb_blocksize),
703		.ops = &xfs_cntbt_buf_ops,
704		.work = &xfs_cntroot_init,
705		.need_init = true
706	},
707	{ /* INO root block */
708		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_IBT_BLOCK(mp)),
709		.numblks = BTOBB(mp->m_sb.sb_blocksize),
710		.ops = &xfs_inobt_buf_ops,
711		.work = &xfs_btroot_init,
712		.type = XFS_BTNUM_INO,
713		.need_init = true
714	},
715	{ /* FINO root block */
716		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_FIBT_BLOCK(mp)),
717		.numblks = BTOBB(mp->m_sb.sb_blocksize),
718		.ops = &xfs_finobt_buf_ops,
719		.work = &xfs_btroot_init,
720		.type = XFS_BTNUM_FINO,
721		.need_init =  xfs_has_finobt(mp)
722	},
723	{ /* RMAP root block */
724		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)),
725		.numblks = BTOBB(mp->m_sb.sb_blocksize),
726		.ops = &xfs_rmapbt_buf_ops,
727		.work = &xfs_rmaproot_init,
728		.need_init = xfs_has_rmapbt(mp)
729	},
730	{ /* REFC root block */
731		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)),
732		.numblks = BTOBB(mp->m_sb.sb_blocksize),
733		.ops = &xfs_refcountbt_buf_ops,
734		.work = &xfs_btroot_init,
735		.type = XFS_BTNUM_REFC,
736		.need_init = xfs_has_reflink(mp)
737	},
738	{ /* NULL terminating block */
739		.daddr = XFS_BUF_DADDR_NULL,
740	}
741	};
742	struct  xfs_aghdr_grow_data *dp;
743	int			error = 0;
744
745	/* Account for AG free space in new AG */
746	id->nfree += id->agsize - mp->m_ag_prealloc_blocks;
747	for (dp = &aghdr_data[0]; dp->daddr != XFS_BUF_DADDR_NULL; dp++) {
748		if (!dp->need_init)
749			continue;
750
751		id->daddr = dp->daddr;
752		id->numblks = dp->numblks;
753		id->type = dp->type;
754		error = xfs_ag_init_hdr(mp, id, dp->work, dp->ops);
755		if (error)
756			break;
757	}
758	return error;
759}
760
761int
762xfs_ag_shrink_space(
763	struct xfs_mount	*mp,
764	struct xfs_trans	**tpp,
765	xfs_agnumber_t		agno,
766	xfs_extlen_t		delta)
767{
768	struct xfs_alloc_arg	args = {
769		.tp	= *tpp,
770		.mp	= mp,
771		.type	= XFS_ALLOCTYPE_THIS_BNO,
772		.minlen = delta,
773		.maxlen = delta,
774		.oinfo	= XFS_RMAP_OINFO_SKIP_UPDATE,
775		.resv	= XFS_AG_RESV_NONE,
776		.prod	= 1
777	};
778	struct xfs_buf		*agibp, *agfbp;
779	struct xfs_agi		*agi;
780	struct xfs_agf		*agf;
781	xfs_agblock_t		aglen;
782	int			error, err2;
783
784	ASSERT(agno == mp->m_sb.sb_agcount - 1);
785	error = xfs_ialloc_read_agi(mp, *tpp, agno, &agibp);
786	if (error)
787		return error;
788
789	agi = agibp->b_addr;
790
791	error = xfs_alloc_read_agf(mp, *tpp, agno, 0, &agfbp);
792	if (error)
793		return error;
794
795	agf = agfbp->b_addr;
796	aglen = be32_to_cpu(agi->agi_length);
797	/* some extra paranoid checks before we shrink the ag */
798	if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length))
799		return -EFSCORRUPTED;
800	if (delta >= aglen)
801		return -EINVAL;
802
803	args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta);
804
805	/*
806	 * Make sure that the last inode cluster cannot overlap with the new
807	 * end of the AG, even if it's sparse.
808	 */
809	error = xfs_ialloc_check_shrink(*tpp, agno, agibp, aglen - delta);
810	if (error)
811		return error;
812
813	/*
814	 * Disable perag reservations so it doesn't cause the allocation request
815	 * to fail. We'll reestablish reservation before we return.
816	 */
817	error = xfs_ag_resv_free(agibp->b_pag);
818	if (error)
819		return error;
820
821	/* internal log shouldn't also show up in the free space btrees */
822	error = xfs_alloc_vextent(&args);
823	if (!error && args.agbno == NULLAGBLOCK)
824		error = -ENOSPC;
825
826	if (error) {
827		/*
828		 * if extent allocation fails, need to roll the transaction to
829		 * ensure that the AGFL fixup has been committed anyway.
830		 */
831		xfs_trans_bhold(*tpp, agfbp);
832		err2 = xfs_trans_roll(tpp);
833		if (err2)
834			return err2;
835		xfs_trans_bjoin(*tpp, agfbp);
836		goto resv_init_out;
837	}
838
839	/*
840	 * if successfully deleted from freespace btrees, need to confirm
841	 * per-AG reservation works as expected.
842	 */
843	be32_add_cpu(&agi->agi_length, -delta);
844	be32_add_cpu(&agf->agf_length, -delta);
845
846	err2 = xfs_ag_resv_init(agibp->b_pag, *tpp);
847	if (err2) {
848		be32_add_cpu(&agi->agi_length, delta);
849		be32_add_cpu(&agf->agf_length, delta);
850		if (err2 != -ENOSPC)
851			goto resv_err;
852
853		__xfs_bmap_add_free(*tpp, args.fsbno, delta, NULL, true);
854
855		/*
856		 * Roll the transaction before trying to re-init the per-ag
857		 * reservation. The new transaction is clean so it will cancel
858		 * without any side effects.
859		 */
860		error = xfs_defer_finish(tpp);
861		if (error)
862			return error;
863
864		error = -ENOSPC;
865		goto resv_init_out;
866	}
867	xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH);
868	xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH);
869	return 0;
870resv_init_out:
871	err2 = xfs_ag_resv_init(agibp->b_pag, *tpp);
872	if (!err2)
873		return error;
874resv_err:
875	xfs_warn(mp, "Error %d reserving per-AG metadata reserve pool.", err2);
876	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
877	return err2;
878}
879
880/*
881 * Extent the AG indicated by the @id by the length passed in
882 */
883int
884xfs_ag_extend_space(
885	struct xfs_mount	*mp,
886	struct xfs_trans	*tp,
887	struct aghdr_init_data	*id,
888	xfs_extlen_t		len)
889{
890	struct xfs_buf		*bp;
891	struct xfs_agi		*agi;
892	struct xfs_agf		*agf;
893	int			error;
894
895	/*
896	 * Change the agi length.
897	 */
898	error = xfs_ialloc_read_agi(mp, tp, id->agno, &bp);
899	if (error)
900		return error;
901
902	agi = bp->b_addr;
903	be32_add_cpu(&agi->agi_length, len);
904	ASSERT(id->agno == mp->m_sb.sb_agcount - 1 ||
905	       be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
906	xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
907
908	/*
909	 * Change agf length.
910	 */
911	error = xfs_alloc_read_agf(mp, tp, id->agno, 0, &bp);
912	if (error)
913		return error;
914
915	agf = bp->b_addr;
916	be32_add_cpu(&agf->agf_length, len);
917	ASSERT(agf->agf_length == agi->agi_length);
918	xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
919
920	/*
921	 * Free the new space.
922	 *
923	 * XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that
924	 * this doesn't actually exist in the rmap btree.
925	 */
926	error = xfs_rmap_free(tp, bp, bp->b_pag,
927				be32_to_cpu(agf->agf_length) - len,
928				len, &XFS_RMAP_OINFO_SKIP_UPDATE);
929	if (error)
930		return error;
931
932	return  xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, id->agno,
933					be32_to_cpu(agf->agf_length) - len),
934				len, &XFS_RMAP_OINFO_SKIP_UPDATE,
935				XFS_AG_RESV_NONE);
936}
937
938/* Retrieve AG geometry. */
939int
940xfs_ag_get_geometry(
941	struct xfs_mount	*mp,
942	xfs_agnumber_t		agno,
943	struct xfs_ag_geometry	*ageo)
944{
945	struct xfs_buf		*agi_bp;
946	struct xfs_buf		*agf_bp;
947	struct xfs_agi		*agi;
948	struct xfs_agf		*agf;
949	struct xfs_perag	*pag;
950	unsigned int		freeblks;
951	int			error;
952
953	if (agno >= mp->m_sb.sb_agcount)
954		return -EINVAL;
955
956	/* Lock the AG headers. */
957	error = xfs_ialloc_read_agi(mp, NULL, agno, &agi_bp);
958	if (error)
959		return error;
960	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agf_bp);
961	if (error)
962		goto out_agi;
963
964	pag = agi_bp->b_pag;
965
966	/* Fill out form. */
967	memset(ageo, 0, sizeof(*ageo));
968	ageo->ag_number = agno;
969
970	agi = agi_bp->b_addr;
971	ageo->ag_icount = be32_to_cpu(agi->agi_count);
972	ageo->ag_ifree = be32_to_cpu(agi->agi_freecount);
973
974	agf = agf_bp->b_addr;
975	ageo->ag_length = be32_to_cpu(agf->agf_length);
976	freeblks = pag->pagf_freeblks +
977		   pag->pagf_flcount +
978		   pag->pagf_btreeblks -
979		   xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE);
980	ageo->ag_freeblks = freeblks;
981	xfs_ag_geom_health(pag, ageo);
982
983	/* Release resources. */
984	xfs_buf_relse(agf_bp);
985out_agi:
986	xfs_buf_relse(agi_bp);
987	return error;
988}
989