1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
4 * Copyright (c) 2014 Christoph Hellwig.
5 * All Rights Reserved.
6 */
7#include "xfs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
13#include "xfs_inode.h"
14#include "xfs_bmap.h"
15#include "xfs_bmap_util.h"
16#include "xfs_alloc.h"
17#include "xfs_mru_cache.h"
18#include "xfs_trace.h"
19#include "xfs_ag.h"
20#include "xfs_ag_resv.h"
21#include "xfs_trans.h"
22#include "xfs_filestream.h"
23
24struct xfs_fstrm_item {
25	struct xfs_mru_cache_elem	mru;
26	struct xfs_perag		*pag; /* AG in use for this directory */
27};
28
29enum xfs_fstrm_alloc {
30	XFS_PICK_USERDATA = 1,
31	XFS_PICK_LOWSPACE = 2,
32};
33
34static void
35xfs_fstrm_free_func(
36	void			*data,
37	struct xfs_mru_cache_elem *mru)
38{
39	struct xfs_fstrm_item	*item =
40		container_of(mru, struct xfs_fstrm_item, mru);
41	struct xfs_perag	*pag = item->pag;
42
43	trace_xfs_filestream_free(pag, mru->key);
44	atomic_dec(&pag->pagf_fstrms);
45	xfs_perag_rele(pag);
46
47	kfree(item);
48}
49
50/*
51 * Scan the AGs starting at start_agno looking for an AG that isn't in use and
52 * has at least minlen blocks free. If no AG is found to match the allocation
53 * requirements, pick the AG with the most free space in it.
54 */
55static int
56xfs_filestream_pick_ag(
57	struct xfs_alloc_arg	*args,
58	xfs_ino_t		pino,
59	xfs_agnumber_t		start_agno,
60	int			flags,
61	xfs_extlen_t		*longest)
62{
63	struct xfs_mount	*mp = args->mp;
64	struct xfs_perag	*pag;
65	struct xfs_perag	*max_pag = NULL;
66	xfs_extlen_t		minlen = *longest;
67	xfs_extlen_t		free = 0, minfree, maxfree = 0;
68	xfs_agnumber_t		agno;
69	bool			first_pass = true;
70	int			err;
71
72	/* 2% of an AG's blocks must be free for it to be chosen. */
73	minfree = mp->m_sb.sb_agblocks / 50;
74
75restart:
76	for_each_perag_wrap(mp, start_agno, agno, pag) {
77		trace_xfs_filestream_scan(pag, pino);
78		*longest = 0;
79		err = xfs_bmap_longest_free_extent(pag, NULL, longest);
80		if (err) {
81			if (err != -EAGAIN)
82				break;
83			/* Couldn't lock the AGF, skip this AG. */
84			err = 0;
85			continue;
86		}
87
88		/* Keep track of the AG with the most free blocks. */
89		if (pag->pagf_freeblks > maxfree) {
90			maxfree = pag->pagf_freeblks;
91			if (max_pag)
92				xfs_perag_rele(max_pag);
93			atomic_inc(&pag->pag_active_ref);
94			max_pag = pag;
95		}
96
97		/*
98		 * The AG reference count does two things: it enforces mutual
99		 * exclusion when examining the suitability of an AG in this
100		 * loop, and it guards against two filestreams being established
101		 * in the same AG as each other.
102		 */
103		if (atomic_inc_return(&pag->pagf_fstrms) <= 1) {
104			if (((minlen && *longest >= minlen) ||
105			     (!minlen && pag->pagf_freeblks >= minfree)) &&
106			    (!xfs_perag_prefers_metadata(pag) ||
107			     !(flags & XFS_PICK_USERDATA) ||
108			     (flags & XFS_PICK_LOWSPACE))) {
109				/* Break out, retaining the reference on the AG. */
110				free = pag->pagf_freeblks;
111				break;
112			}
113		}
114
115		/* Drop the reference on this AG, it's not usable. */
116		atomic_dec(&pag->pagf_fstrms);
117	}
118
119	if (err) {
120		xfs_perag_rele(pag);
121		if (max_pag)
122			xfs_perag_rele(max_pag);
123		return err;
124	}
125
126	if (!pag) {
127		/*
128		 * Allow a second pass to give xfs_bmap_longest_free_extent()
129		 * another attempt at locking AGFs that it might have skipped
130		 * over before we fail.
131		 */
132		if (first_pass) {
133			first_pass = false;
134			goto restart;
135		}
136
137		/*
138		 * We must be low on data space, so run a final lowspace
139		 * optimised selection pass if we haven't already.
140		 */
141		if (!(flags & XFS_PICK_LOWSPACE)) {
142			flags |= XFS_PICK_LOWSPACE;
143			goto restart;
144		}
145
146		/*
147		 * No unassociated AGs are available, so select the AG with the
148		 * most free space, regardless of whether it's already in use by
149		 * another filestream. It none suit, just use whatever AG we can
150		 * grab.
151		 */
152		if (!max_pag) {
153			for_each_perag_wrap(args->mp, 0, start_agno, args->pag)
154				break;
155			atomic_inc(&args->pag->pagf_fstrms);
156			*longest = 0;
157		} else {
158			pag = max_pag;
159			free = maxfree;
160			atomic_inc(&pag->pagf_fstrms);
161		}
162	} else if (max_pag) {
163		xfs_perag_rele(max_pag);
164	}
165
166	trace_xfs_filestream_pick(pag, pino, free);
167	args->pag = pag;
168	return 0;
169
170}
171
172static struct xfs_inode *
173xfs_filestream_get_parent(
174	struct xfs_inode	*ip)
175{
176	struct inode		*inode = VFS_I(ip), *dir = NULL;
177	struct dentry		*dentry, *parent;
178
179	dentry = d_find_alias(inode);
180	if (!dentry)
181		goto out;
182
183	parent = dget_parent(dentry);
184	if (!parent)
185		goto out_dput;
186
187	dir = igrab(d_inode(parent));
188	dput(parent);
189
190out_dput:
191	dput(dentry);
192out:
193	return dir ? XFS_I(dir) : NULL;
194}
195
196/*
197 * Lookup the mru cache for an existing association. If one exists and we can
198 * use it, return with an active perag reference indicating that the allocation
199 * will proceed with that association.
200 *
201 * If we have no association, or we cannot use the current one and have to
202 * destroy it, return with longest = 0 to tell the caller to create a new
203 * association.
204 */
205static int
206xfs_filestream_lookup_association(
207	struct xfs_bmalloca	*ap,
208	struct xfs_alloc_arg	*args,
209	xfs_ino_t		pino,
210	xfs_extlen_t		*longest)
211{
212	struct xfs_mount	*mp = args->mp;
213	struct xfs_perag	*pag;
214	struct xfs_mru_cache_elem *mru;
215	int			error = 0;
216
217	*longest = 0;
218	mru = xfs_mru_cache_lookup(mp->m_filestream, pino);
219	if (!mru)
220		return 0;
221	/*
222	 * Grab the pag and take an extra active reference for the caller whilst
223	 * the mru item cannot go away. This means we'll pin the perag with
224	 * the reference we get here even if the filestreams association is torn
225	 * down immediately after we mark the lookup as done.
226	 */
227	pag = container_of(mru, struct xfs_fstrm_item, mru)->pag;
228	atomic_inc(&pag->pag_active_ref);
229	xfs_mru_cache_done(mp->m_filestream);
230
231	trace_xfs_filestream_lookup(pag, ap->ip->i_ino);
232
233	ap->blkno = XFS_AGB_TO_FSB(args->mp, pag->pag_agno, 0);
234	xfs_bmap_adjacent(ap);
235
236	/*
237	 * If there is very little free space before we start a filestreams
238	 * allocation, we're almost guaranteed to fail to find a large enough
239	 * free space available so just use the cached AG.
240	 */
241	if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
242		*longest = 1;
243		goto out_done;
244	}
245
246	error = xfs_bmap_longest_free_extent(pag, args->tp, longest);
247	if (error == -EAGAIN)
248		error = 0;
249	if (error || *longest < args->maxlen) {
250		/* We aren't going to use this perag */
251		*longest = 0;
252		xfs_perag_rele(pag);
253		return error;
254	}
255
256out_done:
257	args->pag = pag;
258	return 0;
259}
260
261static int
262xfs_filestream_create_association(
263	struct xfs_bmalloca	*ap,
264	struct xfs_alloc_arg	*args,
265	xfs_ino_t		pino,
266	xfs_extlen_t		*longest)
267{
268	struct xfs_mount	*mp = args->mp;
269	struct xfs_mru_cache_elem *mru;
270	struct xfs_fstrm_item	*item;
271	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, pino);
272	int			flags = 0;
273	int			error;
274
275	/* Changing parent AG association now, so remove the existing one. */
276	mru = xfs_mru_cache_remove(mp->m_filestream, pino);
277	if (mru) {
278		struct xfs_fstrm_item *item =
279			container_of(mru, struct xfs_fstrm_item, mru);
280
281		agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount;
282		xfs_fstrm_free_func(mp, mru);
283	} else if (xfs_is_inode32(mp)) {
284		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
285
286		agno = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
287		mp->m_agfrotor = (mp->m_agfrotor + 1) %
288				 (mp->m_sb.sb_agcount * rotorstep);
289	}
290
291	ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
292	xfs_bmap_adjacent(ap);
293
294	if (ap->datatype & XFS_ALLOC_USERDATA)
295		flags |= XFS_PICK_USERDATA;
296	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
297		flags |= XFS_PICK_LOWSPACE;
298
299	*longest = ap->length;
300	error = xfs_filestream_pick_ag(args, pino, agno, flags, longest);
301	if (error)
302		return error;
303
304	/*
305	 * We are going to use this perag now, so create an assoication for it.
306	 * xfs_filestream_pick_ag() has already bumped the perag fstrms counter
307	 * for us, so all we need to do here is take another active reference to
308	 * the perag for the cached association.
309	 *
310	 * If we fail to store the association, we need to drop the fstrms
311	 * counter as well as drop the perag reference we take here for the
312	 * item. We do not need to return an error for this failure - as long as
313	 * we return a referenced AG, the allocation can still go ahead just
314	 * fine.
315	 */
316	item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
317	if (!item)
318		goto out_put_fstrms;
319
320	atomic_inc(&args->pag->pag_active_ref);
321	item->pag = args->pag;
322	error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru);
323	if (error)
324		goto out_free_item;
325	return 0;
326
327out_free_item:
328	xfs_perag_rele(item->pag);
329	kfree(item);
330out_put_fstrms:
331	atomic_dec(&args->pag->pagf_fstrms);
332	return 0;
333}
334
335/*
336 * Search for an allocation group with a single extent large enough for
337 * the request. First we look for an existing association and use that if it
338 * is found. Otherwise, we create a new association by selecting an AG that fits
339 * the allocation criteria.
340 *
341 * We return with a referenced perag in args->pag to indicate which AG we are
342 * allocating into or an error with no references held.
343 */
344int
345xfs_filestream_select_ag(
346	struct xfs_bmalloca	*ap,
347	struct xfs_alloc_arg	*args,
348	xfs_extlen_t		*longest)
349{
350	struct xfs_mount	*mp = args->mp;
351	struct xfs_inode	*pip;
352	xfs_ino_t		ino = 0;
353	int			error = 0;
354
355	*longest = 0;
356	args->total = ap->total;
357	pip = xfs_filestream_get_parent(ap->ip);
358	if (pip) {
359		ino = pip->i_ino;
360		error = xfs_filestream_lookup_association(ap, args, ino,
361				longest);
362		xfs_irele(pip);
363		if (error)
364			return error;
365		if (*longest >= args->maxlen)
366			goto out_select;
367		if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
368			goto out_select;
369	}
370
371	error = xfs_filestream_create_association(ap, args, ino, longest);
372	if (error)
373		return error;
374
375out_select:
376	ap->blkno = XFS_AGB_TO_FSB(mp, args->pag->pag_agno, 0);
377	return 0;
378}
379
380void
381xfs_filestream_deassociate(
382	struct xfs_inode	*ip)
383{
384	xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
385}
386
387int
388xfs_filestream_mount(
389	xfs_mount_t	*mp)
390{
391	/*
392	 * The filestream timer tunable is currently fixed within the range of
393	 * one second to four minutes, with five seconds being the default.  The
394	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
395	 * timer tunable to within about 10 percent.  This requires at least 10
396	 * groups.
397	 */
398	return xfs_mru_cache_create(&mp->m_filestream, mp,
399			xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func);
400}
401
402void
403xfs_filestream_unmount(
404	xfs_mount_t	*mp)
405{
406	xfs_mru_cache_destroy(mp->m_filestream);
407}
408