xfs_qm.c revision 75c8c50f
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h"
15#include "xfs_inode.h"
16#include "xfs_iwalk.h"
17#include "xfs_quota.h"
18#include "xfs_bmap.h"
19#include "xfs_bmap_util.h"
20#include "xfs_trans.h"
21#include "xfs_trans_space.h"
22#include "xfs_qm.h"
23#include "xfs_trace.h"
24#include "xfs_icache.h"
25#include "xfs_error.h"
26#include "xfs_ag.h"
27#include "xfs_ialloc.h"
28
29/*
30 * The global quota manager. There is only one of these for the entire
31 * system, _not_ one per file system. XQM keeps track of the overall
32 * quota functionality, including maintaining the freelist and hash
33 * tables of dquots.
34 */
35STATIC int	xfs_qm_init_quotainos(struct xfs_mount *mp);
36STATIC int	xfs_qm_init_quotainfo(struct xfs_mount *mp);
37
38STATIC void	xfs_qm_destroy_quotainos(struct xfs_quotainfo *qi);
39STATIC void	xfs_qm_dqfree_one(struct xfs_dquot *dqp);
40/*
41 * We use the batch lookup interface to iterate over the dquots as it
42 * currently is the only interface into the radix tree code that allows
43 * fuzzy lookups instead of exact matches.  Holding the lock over multiple
44 * operations is fine as all callers are used either during mount/umount
45 * or quotaoff.
46 */
47#define XFS_DQ_LOOKUP_BATCH	32
48
49STATIC int
50xfs_qm_dquot_walk(
51	struct xfs_mount	*mp,
52	xfs_dqtype_t		type,
53	int			(*execute)(struct xfs_dquot *dqp, void *data),
54	void			*data)
55{
56	struct xfs_quotainfo	*qi = mp->m_quotainfo;
57	struct radix_tree_root	*tree = xfs_dquot_tree(qi, type);
58	uint32_t		next_index;
59	int			last_error = 0;
60	int			skipped;
61	int			nr_found;
62
63restart:
64	skipped = 0;
65	next_index = 0;
66	nr_found = 0;
67
68	while (1) {
69		struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
70		int		error = 0;
71		int		i;
72
73		mutex_lock(&qi->qi_tree_lock);
74		nr_found = radix_tree_gang_lookup(tree, (void **)batch,
75					next_index, XFS_DQ_LOOKUP_BATCH);
76		if (!nr_found) {
77			mutex_unlock(&qi->qi_tree_lock);
78			break;
79		}
80
81		for (i = 0; i < nr_found; i++) {
82			struct xfs_dquot *dqp = batch[i];
83
84			next_index = dqp->q_id + 1;
85
86			error = execute(batch[i], data);
87			if (error == -EAGAIN) {
88				skipped++;
89				continue;
90			}
91			if (error && last_error != -EFSCORRUPTED)
92				last_error = error;
93		}
94
95		mutex_unlock(&qi->qi_tree_lock);
96
97		/* bail out if the filesystem is corrupted.  */
98		if (last_error == -EFSCORRUPTED) {
99			skipped = 0;
100			break;
101		}
102		/* we're done if id overflows back to zero */
103		if (!next_index)
104			break;
105	}
106
107	if (skipped) {
108		delay(1);
109		goto restart;
110	}
111
112	return last_error;
113}
114
115
116/*
117 * Purge a dquot from all tracking data structures and free it.
118 */
119STATIC int
120xfs_qm_dqpurge(
121	struct xfs_dquot	*dqp,
122	void			*data)
123{
124	struct xfs_mount	*mp = dqp->q_mount;
125	struct xfs_quotainfo	*qi = mp->m_quotainfo;
126	int			error = -EAGAIN;
127
128	xfs_dqlock(dqp);
129	if ((dqp->q_flags & XFS_DQFLAG_FREEING) || dqp->q_nrefs != 0)
130		goto out_unlock;
131
132	dqp->q_flags |= XFS_DQFLAG_FREEING;
133
134	xfs_dqflock(dqp);
135
136	/*
137	 * If we are turning this type of quotas off, we don't care
138	 * about the dirty metadata sitting in this dquot. OTOH, if
139	 * we're unmounting, we do care, so we flush it and wait.
140	 */
141	if (XFS_DQ_IS_DIRTY(dqp)) {
142		struct xfs_buf	*bp = NULL;
143
144		/*
145		 * We don't care about getting disk errors here. We need
146		 * to purge this dquot anyway, so we go ahead regardless.
147		 */
148		error = xfs_qm_dqflush(dqp, &bp);
149		if (!error) {
150			error = xfs_bwrite(bp);
151			xfs_buf_relse(bp);
152		} else if (error == -EAGAIN) {
153			dqp->q_flags &= ~XFS_DQFLAG_FREEING;
154			goto out_unlock;
155		}
156		xfs_dqflock(dqp);
157	}
158
159	ASSERT(atomic_read(&dqp->q_pincount) == 0);
160	ASSERT(xfs_is_shutdown(mp) ||
161		!test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags));
162
163	xfs_dqfunlock(dqp);
164	xfs_dqunlock(dqp);
165
166	radix_tree_delete(xfs_dquot_tree(qi, xfs_dquot_type(dqp)), dqp->q_id);
167	qi->qi_dquots--;
168
169	/*
170	 * We move dquots to the freelist as soon as their reference count
171	 * hits zero, so it really should be on the freelist here.
172	 */
173	ASSERT(!list_empty(&dqp->q_lru));
174	list_lru_del(&qi->qi_lru, &dqp->q_lru);
175	XFS_STATS_DEC(mp, xs_qm_dquot_unused);
176
177	xfs_qm_dqdestroy(dqp);
178	return 0;
179
180out_unlock:
181	xfs_dqunlock(dqp);
182	return error;
183}
184
185/*
186 * Purge the dquot cache.
187 */
188static void
189xfs_qm_dqpurge_all(
190	struct xfs_mount	*mp)
191{
192	xfs_qm_dquot_walk(mp, XFS_DQTYPE_USER, xfs_qm_dqpurge, NULL);
193	xfs_qm_dquot_walk(mp, XFS_DQTYPE_GROUP, xfs_qm_dqpurge, NULL);
194	xfs_qm_dquot_walk(mp, XFS_DQTYPE_PROJ, xfs_qm_dqpurge, NULL);
195}
196
197/*
198 * Just destroy the quotainfo structure.
199 */
200void
201xfs_qm_unmount(
202	struct xfs_mount	*mp)
203{
204	if (mp->m_quotainfo) {
205		xfs_qm_dqpurge_all(mp);
206		xfs_qm_destroy_quotainfo(mp);
207	}
208}
209
210/*
211 * Called from the vfsops layer.
212 */
213void
214xfs_qm_unmount_quotas(
215	xfs_mount_t	*mp)
216{
217	/*
218	 * Release the dquots that root inode, et al might be holding,
219	 * before we flush quotas and blow away the quotainfo structure.
220	 */
221	ASSERT(mp->m_rootip);
222	xfs_qm_dqdetach(mp->m_rootip);
223	if (mp->m_rbmip)
224		xfs_qm_dqdetach(mp->m_rbmip);
225	if (mp->m_rsumip)
226		xfs_qm_dqdetach(mp->m_rsumip);
227
228	/*
229	 * Release the quota inodes.
230	 */
231	if (mp->m_quotainfo) {
232		if (mp->m_quotainfo->qi_uquotaip) {
233			xfs_irele(mp->m_quotainfo->qi_uquotaip);
234			mp->m_quotainfo->qi_uquotaip = NULL;
235		}
236		if (mp->m_quotainfo->qi_gquotaip) {
237			xfs_irele(mp->m_quotainfo->qi_gquotaip);
238			mp->m_quotainfo->qi_gquotaip = NULL;
239		}
240		if (mp->m_quotainfo->qi_pquotaip) {
241			xfs_irele(mp->m_quotainfo->qi_pquotaip);
242			mp->m_quotainfo->qi_pquotaip = NULL;
243		}
244	}
245}
246
247STATIC int
248xfs_qm_dqattach_one(
249	struct xfs_inode	*ip,
250	xfs_dqtype_t		type,
251	bool			doalloc,
252	struct xfs_dquot	**IO_idqpp)
253{
254	struct xfs_dquot	*dqp;
255	int			error;
256
257	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
258	error = 0;
259
260	/*
261	 * See if we already have it in the inode itself. IO_idqpp is &i_udquot
262	 * or &i_gdquot. This made the code look weird, but made the logic a lot
263	 * simpler.
264	 */
265	dqp = *IO_idqpp;
266	if (dqp) {
267		trace_xfs_dqattach_found(dqp);
268		return 0;
269	}
270
271	/*
272	 * Find the dquot from somewhere. This bumps the reference count of
273	 * dquot and returns it locked.  This can return ENOENT if dquot didn't
274	 * exist on disk and we didn't ask it to allocate; ESRCH if quotas got
275	 * turned off suddenly.
276	 */
277	error = xfs_qm_dqget_inode(ip, type, doalloc, &dqp);
278	if (error)
279		return error;
280
281	trace_xfs_dqattach_get(dqp);
282
283	/*
284	 * dqget may have dropped and re-acquired the ilock, but it guarantees
285	 * that the dquot returned is the one that should go in the inode.
286	 */
287	*IO_idqpp = dqp;
288	xfs_dqunlock(dqp);
289	return 0;
290}
291
292static bool
293xfs_qm_need_dqattach(
294	struct xfs_inode	*ip)
295{
296	struct xfs_mount	*mp = ip->i_mount;
297
298	if (!XFS_IS_QUOTA_ON(mp))
299		return false;
300	if (!XFS_NOT_DQATTACHED(mp, ip))
301		return false;
302	if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
303		return false;
304	return true;
305}
306
307/*
308 * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
309 * into account.
310 * If @doalloc is true, the dquot(s) will be allocated if needed.
311 * Inode may get unlocked and relocked in here, and the caller must deal with
312 * the consequences.
313 */
314int
315xfs_qm_dqattach_locked(
316	xfs_inode_t	*ip,
317	bool		doalloc)
318{
319	xfs_mount_t	*mp = ip->i_mount;
320	int		error = 0;
321
322	if (!xfs_qm_need_dqattach(ip))
323		return 0;
324
325	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
326
327	if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
328		error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_USER,
329				doalloc, &ip->i_udquot);
330		if (error)
331			goto done;
332		ASSERT(ip->i_udquot);
333	}
334
335	if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {
336		error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_GROUP,
337				doalloc, &ip->i_gdquot);
338		if (error)
339			goto done;
340		ASSERT(ip->i_gdquot);
341	}
342
343	if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {
344		error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_PROJ,
345				doalloc, &ip->i_pdquot);
346		if (error)
347			goto done;
348		ASSERT(ip->i_pdquot);
349	}
350
351done:
352	/*
353	 * Don't worry about the dquots that we may have attached before any
354	 * error - they'll get detached later if it has not already been done.
355	 */
356	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
357	return error;
358}
359
360int
361xfs_qm_dqattach(
362	struct xfs_inode	*ip)
363{
364	int			error;
365
366	if (!xfs_qm_need_dqattach(ip))
367		return 0;
368
369	xfs_ilock(ip, XFS_ILOCK_EXCL);
370	error = xfs_qm_dqattach_locked(ip, false);
371	xfs_iunlock(ip, XFS_ILOCK_EXCL);
372
373	return error;
374}
375
376/*
377 * Release dquots (and their references) if any.
378 * The inode should be locked EXCL except when this's called by
379 * xfs_ireclaim.
380 */
381void
382xfs_qm_dqdetach(
383	xfs_inode_t	*ip)
384{
385	if (!(ip->i_udquot || ip->i_gdquot || ip->i_pdquot))
386		return;
387
388	trace_xfs_dquot_dqdetach(ip);
389
390	ASSERT(!xfs_is_quota_inode(&ip->i_mount->m_sb, ip->i_ino));
391	if (ip->i_udquot) {
392		xfs_qm_dqrele(ip->i_udquot);
393		ip->i_udquot = NULL;
394	}
395	if (ip->i_gdquot) {
396		xfs_qm_dqrele(ip->i_gdquot);
397		ip->i_gdquot = NULL;
398	}
399	if (ip->i_pdquot) {
400		xfs_qm_dqrele(ip->i_pdquot);
401		ip->i_pdquot = NULL;
402	}
403}
404
405struct xfs_qm_isolate {
406	struct list_head	buffers;
407	struct list_head	dispose;
408};
409
410static enum lru_status
411xfs_qm_dquot_isolate(
412	struct list_head	*item,
413	struct list_lru_one	*lru,
414	spinlock_t		*lru_lock,
415	void			*arg)
416		__releases(lru_lock) __acquires(lru_lock)
417{
418	struct xfs_dquot	*dqp = container_of(item,
419						struct xfs_dquot, q_lru);
420	struct xfs_qm_isolate	*isol = arg;
421
422	if (!xfs_dqlock_nowait(dqp))
423		goto out_miss_busy;
424
425	/*
426	 * This dquot has acquired a reference in the meantime remove it from
427	 * the freelist and try again.
428	 */
429	if (dqp->q_nrefs) {
430		xfs_dqunlock(dqp);
431		XFS_STATS_INC(dqp->q_mount, xs_qm_dqwants);
432
433		trace_xfs_dqreclaim_want(dqp);
434		list_lru_isolate(lru, &dqp->q_lru);
435		XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
436		return LRU_REMOVED;
437	}
438
439	/*
440	 * If the dquot is dirty, flush it. If it's already being flushed, just
441	 * skip it so there is time for the IO to complete before we try to
442	 * reclaim it again on the next LRU pass.
443	 */
444	if (!xfs_dqflock_nowait(dqp)) {
445		xfs_dqunlock(dqp);
446		goto out_miss_busy;
447	}
448
449	if (XFS_DQ_IS_DIRTY(dqp)) {
450		struct xfs_buf	*bp = NULL;
451		int		error;
452
453		trace_xfs_dqreclaim_dirty(dqp);
454
455		/* we have to drop the LRU lock to flush the dquot */
456		spin_unlock(lru_lock);
457
458		error = xfs_qm_dqflush(dqp, &bp);
459		if (error)
460			goto out_unlock_dirty;
461
462		xfs_buf_delwri_queue(bp, &isol->buffers);
463		xfs_buf_relse(bp);
464		goto out_unlock_dirty;
465	}
466	xfs_dqfunlock(dqp);
467
468	/*
469	 * Prevent lookups now that we are past the point of no return.
470	 */
471	dqp->q_flags |= XFS_DQFLAG_FREEING;
472	xfs_dqunlock(dqp);
473
474	ASSERT(dqp->q_nrefs == 0);
475	list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose);
476	XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
477	trace_xfs_dqreclaim_done(dqp);
478	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims);
479	return LRU_REMOVED;
480
481out_miss_busy:
482	trace_xfs_dqreclaim_busy(dqp);
483	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
484	return LRU_SKIP;
485
486out_unlock_dirty:
487	trace_xfs_dqreclaim_busy(dqp);
488	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
489	xfs_dqunlock(dqp);
490	spin_lock(lru_lock);
491	return LRU_RETRY;
492}
493
494static unsigned long
495xfs_qm_shrink_scan(
496	struct shrinker		*shrink,
497	struct shrink_control	*sc)
498{
499	struct xfs_quotainfo	*qi = container_of(shrink,
500					struct xfs_quotainfo, qi_shrinker);
501	struct xfs_qm_isolate	isol;
502	unsigned long		freed;
503	int			error;
504
505	if ((sc->gfp_mask & (__GFP_FS|__GFP_DIRECT_RECLAIM)) != (__GFP_FS|__GFP_DIRECT_RECLAIM))
506		return 0;
507
508	INIT_LIST_HEAD(&isol.buffers);
509	INIT_LIST_HEAD(&isol.dispose);
510
511	freed = list_lru_shrink_walk(&qi->qi_lru, sc,
512				     xfs_qm_dquot_isolate, &isol);
513
514	error = xfs_buf_delwri_submit(&isol.buffers);
515	if (error)
516		xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
517
518	while (!list_empty(&isol.dispose)) {
519		struct xfs_dquot	*dqp;
520
521		dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru);
522		list_del_init(&dqp->q_lru);
523		xfs_qm_dqfree_one(dqp);
524	}
525
526	return freed;
527}
528
529static unsigned long
530xfs_qm_shrink_count(
531	struct shrinker		*shrink,
532	struct shrink_control	*sc)
533{
534	struct xfs_quotainfo	*qi = container_of(shrink,
535					struct xfs_quotainfo, qi_shrinker);
536
537	return list_lru_shrink_count(&qi->qi_lru, sc);
538}
539
540STATIC void
541xfs_qm_set_defquota(
542	struct xfs_mount	*mp,
543	xfs_dqtype_t		type,
544	struct xfs_quotainfo	*qinf)
545{
546	struct xfs_dquot	*dqp;
547	struct xfs_def_quota	*defq;
548	int			error;
549
550	error = xfs_qm_dqget_uncached(mp, 0, type, &dqp);
551	if (error)
552		return;
553
554	defq = xfs_get_defquota(qinf, xfs_dquot_type(dqp));
555
556	/*
557	 * Timers and warnings have been already set, let's just set the
558	 * default limits for this quota type
559	 */
560	defq->blk.hard = dqp->q_blk.hardlimit;
561	defq->blk.soft = dqp->q_blk.softlimit;
562	defq->ino.hard = dqp->q_ino.hardlimit;
563	defq->ino.soft = dqp->q_ino.softlimit;
564	defq->rtb.hard = dqp->q_rtb.hardlimit;
565	defq->rtb.soft = dqp->q_rtb.softlimit;
566	xfs_qm_dqdestroy(dqp);
567}
568
569/* Initialize quota time limits from the root dquot. */
570static void
571xfs_qm_init_timelimits(
572	struct xfs_mount	*mp,
573	xfs_dqtype_t		type)
574{
575	struct xfs_quotainfo	*qinf = mp->m_quotainfo;
576	struct xfs_def_quota	*defq;
577	struct xfs_dquot	*dqp;
578	int			error;
579
580	defq = xfs_get_defquota(qinf, type);
581
582	defq->blk.time = XFS_QM_BTIMELIMIT;
583	defq->ino.time = XFS_QM_ITIMELIMIT;
584	defq->rtb.time = XFS_QM_RTBTIMELIMIT;
585	defq->blk.warn = XFS_QM_BWARNLIMIT;
586	defq->ino.warn = XFS_QM_IWARNLIMIT;
587	defq->rtb.warn = XFS_QM_RTBWARNLIMIT;
588
589	/*
590	 * We try to get the limits from the superuser's limits fields.
591	 * This is quite hacky, but it is standard quota practice.
592	 *
593	 * Since we may not have done a quotacheck by this point, just read
594	 * the dquot without attaching it to any hashtables or lists.
595	 */
596	error = xfs_qm_dqget_uncached(mp, 0, type, &dqp);
597	if (error)
598		return;
599
600	/*
601	 * The warnings and timers set the grace period given to
602	 * a user or group before he or she can not perform any
603	 * more writing. If it is zero, a default is used.
604	 */
605	if (dqp->q_blk.timer)
606		defq->blk.time = dqp->q_blk.timer;
607	if (dqp->q_ino.timer)
608		defq->ino.time = dqp->q_ino.timer;
609	if (dqp->q_rtb.timer)
610		defq->rtb.time = dqp->q_rtb.timer;
611	if (dqp->q_blk.warnings)
612		defq->blk.warn = dqp->q_blk.warnings;
613	if (dqp->q_ino.warnings)
614		defq->ino.warn = dqp->q_ino.warnings;
615	if (dqp->q_rtb.warnings)
616		defq->rtb.warn = dqp->q_rtb.warnings;
617
618	xfs_qm_dqdestroy(dqp);
619}
620
621/*
622 * This initializes all the quota information that's kept in the
623 * mount structure
624 */
625STATIC int
626xfs_qm_init_quotainfo(
627	struct xfs_mount	*mp)
628{
629	struct xfs_quotainfo	*qinf;
630	int			error;
631
632	ASSERT(XFS_IS_QUOTA_ON(mp));
633
634	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(struct xfs_quotainfo), 0);
635
636	error = list_lru_init(&qinf->qi_lru);
637	if (error)
638		goto out_free_qinf;
639
640	/*
641	 * See if quotainodes are setup, and if not, allocate them,
642	 * and change the superblock accordingly.
643	 */
644	error = xfs_qm_init_quotainos(mp);
645	if (error)
646		goto out_free_lru;
647
648	INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
649	INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
650	INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS);
651	mutex_init(&qinf->qi_tree_lock);
652
653	/* mutex used to serialize quotaoffs */
654	mutex_init(&qinf->qi_quotaofflock);
655
656	/* Precalc some constants */
657	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
658	qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen);
659	if (xfs_has_bigtime(mp)) {
660		qinf->qi_expiry_min =
661			xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MIN);
662		qinf->qi_expiry_max =
663			xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MAX);
664	} else {
665		qinf->qi_expiry_min = XFS_DQ_LEGACY_EXPIRY_MIN;
666		qinf->qi_expiry_max = XFS_DQ_LEGACY_EXPIRY_MAX;
667	}
668	trace_xfs_quota_expiry_range(mp, qinf->qi_expiry_min,
669			qinf->qi_expiry_max);
670
671	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
672
673	xfs_qm_init_timelimits(mp, XFS_DQTYPE_USER);
674	xfs_qm_init_timelimits(mp, XFS_DQTYPE_GROUP);
675	xfs_qm_init_timelimits(mp, XFS_DQTYPE_PROJ);
676
677	if (XFS_IS_UQUOTA_ON(mp))
678		xfs_qm_set_defquota(mp, XFS_DQTYPE_USER, qinf);
679	if (XFS_IS_GQUOTA_ON(mp))
680		xfs_qm_set_defquota(mp, XFS_DQTYPE_GROUP, qinf);
681	if (XFS_IS_PQUOTA_ON(mp))
682		xfs_qm_set_defquota(mp, XFS_DQTYPE_PROJ, qinf);
683
684	qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
685	qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
686	qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
687	qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
688
689	error = register_shrinker(&qinf->qi_shrinker);
690	if (error)
691		goto out_free_inos;
692
693	return 0;
694
695out_free_inos:
696	mutex_destroy(&qinf->qi_quotaofflock);
697	mutex_destroy(&qinf->qi_tree_lock);
698	xfs_qm_destroy_quotainos(qinf);
699out_free_lru:
700	list_lru_destroy(&qinf->qi_lru);
701out_free_qinf:
702	kmem_free(qinf);
703	mp->m_quotainfo = NULL;
704	return error;
705}
706
707/*
708 * Gets called when unmounting a filesystem or when all quotas get
709 * turned off.
710 * This purges the quota inodes, destroys locks and frees itself.
711 */
712void
713xfs_qm_destroy_quotainfo(
714	struct xfs_mount	*mp)
715{
716	struct xfs_quotainfo	*qi;
717
718	qi = mp->m_quotainfo;
719	ASSERT(qi != NULL);
720
721	unregister_shrinker(&qi->qi_shrinker);
722	list_lru_destroy(&qi->qi_lru);
723	xfs_qm_destroy_quotainos(qi);
724	mutex_destroy(&qi->qi_tree_lock);
725	mutex_destroy(&qi->qi_quotaofflock);
726	kmem_free(qi);
727	mp->m_quotainfo = NULL;
728}
729
730/*
731 * Create an inode and return with a reference already taken, but unlocked
732 * This is how we create quota inodes
733 */
734STATIC int
735xfs_qm_qino_alloc(
736	struct xfs_mount	*mp,
737	struct xfs_inode	**ipp,
738	unsigned int		flags)
739{
740	struct xfs_trans	*tp;
741	int			error;
742	bool			need_alloc = true;
743
744	*ipp = NULL;
745	/*
746	 * With superblock that doesn't have separate pquotino, we
747	 * share an inode between gquota and pquota. If the on-disk
748	 * superblock has GQUOTA and the filesystem is now mounted
749	 * with PQUOTA, just use sb_gquotino for sb_pquotino and
750	 * vice-versa.
751	 */
752	if (!xfs_has_pquotino(mp) &&
753			(flags & (XFS_QMOPT_PQUOTA|XFS_QMOPT_GQUOTA))) {
754		xfs_ino_t ino = NULLFSINO;
755
756		if ((flags & XFS_QMOPT_PQUOTA) &&
757			     (mp->m_sb.sb_gquotino != NULLFSINO)) {
758			ino = mp->m_sb.sb_gquotino;
759			if (XFS_IS_CORRUPT(mp,
760					   mp->m_sb.sb_pquotino != NULLFSINO))
761				return -EFSCORRUPTED;
762		} else if ((flags & XFS_QMOPT_GQUOTA) &&
763			     (mp->m_sb.sb_pquotino != NULLFSINO)) {
764			ino = mp->m_sb.sb_pquotino;
765			if (XFS_IS_CORRUPT(mp,
766					   mp->m_sb.sb_gquotino != NULLFSINO))
767				return -EFSCORRUPTED;
768		}
769		if (ino != NULLFSINO) {
770			error = xfs_iget(mp, NULL, ino, 0, 0, ipp);
771			if (error)
772				return error;
773			mp->m_sb.sb_gquotino = NULLFSINO;
774			mp->m_sb.sb_pquotino = NULLFSINO;
775			need_alloc = false;
776		}
777	}
778
779	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create,
780			need_alloc ? XFS_QM_QINOCREATE_SPACE_RES(mp) : 0,
781			0, 0, &tp);
782	if (error)
783		return error;
784
785	if (need_alloc) {
786		xfs_ino_t	ino;
787
788		error = xfs_dialloc(&tp, 0, S_IFREG, &ino);
789		if (!error)
790			error = xfs_init_new_inode(&init_user_ns, tp, NULL, ino,
791					S_IFREG, 1, 0, 0, false, ipp);
792		if (error) {
793			xfs_trans_cancel(tp);
794			return error;
795		}
796	}
797
798	/*
799	 * Make the changes in the superblock, and log those too.
800	 * sbfields arg may contain fields other than *QUOTINO;
801	 * VERSIONNUM for example.
802	 */
803	spin_lock(&mp->m_sb_lock);
804	if (flags & XFS_QMOPT_SBVERSION) {
805		ASSERT(!xfs_has_quota(mp));
806
807		xfs_add_quota(mp);
808		mp->m_sb.sb_uquotino = NULLFSINO;
809		mp->m_sb.sb_gquotino = NULLFSINO;
810		mp->m_sb.sb_pquotino = NULLFSINO;
811
812		/* qflags will get updated fully _after_ quotacheck */
813		mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT;
814	}
815	if (flags & XFS_QMOPT_UQUOTA)
816		mp->m_sb.sb_uquotino = (*ipp)->i_ino;
817	else if (flags & XFS_QMOPT_GQUOTA)
818		mp->m_sb.sb_gquotino = (*ipp)->i_ino;
819	else
820		mp->m_sb.sb_pquotino = (*ipp)->i_ino;
821	spin_unlock(&mp->m_sb_lock);
822	xfs_log_sb(tp);
823
824	error = xfs_trans_commit(tp);
825	if (error) {
826		ASSERT(xfs_is_shutdown(mp));
827		xfs_alert(mp, "%s failed (error %d)!", __func__, error);
828	}
829	if (need_alloc)
830		xfs_finish_inode_setup(*ipp);
831	return error;
832}
833
834
835STATIC void
836xfs_qm_reset_dqcounts(
837	struct xfs_mount	*mp,
838	struct xfs_buf		*bp,
839	xfs_dqid_t		id,
840	xfs_dqtype_t		type)
841{
842	struct xfs_dqblk	*dqb;
843	int			j;
844
845	trace_xfs_reset_dqcounts(bp, _RET_IP_);
846
847	/*
848	 * Reset all counters and timers. They'll be
849	 * started afresh by xfs_qm_quotacheck.
850	 */
851#ifdef DEBUG
852	j = (int)XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) /
853		sizeof(xfs_dqblk_t);
854	ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
855#endif
856	dqb = bp->b_addr;
857	for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
858		struct xfs_disk_dquot	*ddq;
859
860		ddq = (struct xfs_disk_dquot *)&dqb[j];
861
862		/*
863		 * Do a sanity check, and if needed, repair the dqblk. Don't
864		 * output any warnings because it's perfectly possible to
865		 * find uninitialised dquot blks. See comment in
866		 * xfs_dquot_verify.
867		 */
868		if (xfs_dqblk_verify(mp, &dqb[j], id + j) ||
869		    (dqb[j].dd_diskdq.d_type & XFS_DQTYPE_REC_MASK) != type)
870			xfs_dqblk_repair(mp, &dqb[j], id + j, type);
871
872		/*
873		 * Reset type in case we are reusing group quota file for
874		 * project quotas or vice versa
875		 */
876		ddq->d_type = type;
877		ddq->d_bcount = 0;
878		ddq->d_icount = 0;
879		ddq->d_rtbcount = 0;
880
881		/*
882		 * dquot id 0 stores the default grace period and the maximum
883		 * warning limit that were set by the administrator, so we
884		 * should not reset them.
885		 */
886		if (ddq->d_id != 0) {
887			ddq->d_btimer = 0;
888			ddq->d_itimer = 0;
889			ddq->d_rtbtimer = 0;
890			ddq->d_bwarns = 0;
891			ddq->d_iwarns = 0;
892			ddq->d_rtbwarns = 0;
893			if (xfs_has_bigtime(mp))
894				ddq->d_type |= XFS_DQTYPE_BIGTIME;
895		}
896
897		if (xfs_has_crc(mp)) {
898			xfs_update_cksum((char *)&dqb[j],
899					 sizeof(struct xfs_dqblk),
900					 XFS_DQUOT_CRC_OFF);
901		}
902	}
903}
904
905STATIC int
906xfs_qm_reset_dqcounts_all(
907	struct xfs_mount	*mp,
908	xfs_dqid_t		firstid,
909	xfs_fsblock_t		bno,
910	xfs_filblks_t		blkcnt,
911	xfs_dqtype_t		type,
912	struct list_head	*buffer_list)
913{
914	struct xfs_buf		*bp;
915	int			error = 0;
916
917	ASSERT(blkcnt > 0);
918
919	/*
920	 * Blkcnt arg can be a very big number, and might even be
921	 * larger than the log itself. So, we have to break it up into
922	 * manageable-sized transactions.
923	 * Note that we don't start a permanent transaction here; we might
924	 * not be able to get a log reservation for the whole thing up front,
925	 * and we don't really care to either, because we just discard
926	 * everything if we were to crash in the middle of this loop.
927	 */
928	while (blkcnt--) {
929		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
930			      XFS_FSB_TO_DADDR(mp, bno),
931			      mp->m_quotainfo->qi_dqchunklen, 0, &bp,
932			      &xfs_dquot_buf_ops);
933
934		/*
935		 * CRC and validation errors will return a EFSCORRUPTED here. If
936		 * this occurs, re-read without CRC validation so that we can
937		 * repair the damage via xfs_qm_reset_dqcounts(). This process
938		 * will leave a trace in the log indicating corruption has
939		 * been detected.
940		 */
941		if (error == -EFSCORRUPTED) {
942			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
943				      XFS_FSB_TO_DADDR(mp, bno),
944				      mp->m_quotainfo->qi_dqchunklen, 0, &bp,
945				      NULL);
946		}
947
948		if (error)
949			break;
950
951		/*
952		 * A corrupt buffer might not have a verifier attached, so
953		 * make sure we have the correct one attached before writeback
954		 * occurs.
955		 */
956		bp->b_ops = &xfs_dquot_buf_ops;
957		xfs_qm_reset_dqcounts(mp, bp, firstid, type);
958		xfs_buf_delwri_queue(bp, buffer_list);
959		xfs_buf_relse(bp);
960
961		/* goto the next block. */
962		bno++;
963		firstid += mp->m_quotainfo->qi_dqperchunk;
964	}
965
966	return error;
967}
968
969/*
970 * Iterate over all allocated dquot blocks in this quota inode, zeroing all
971 * counters for every chunk of dquots that we find.
972 */
973STATIC int
974xfs_qm_reset_dqcounts_buf(
975	struct xfs_mount	*mp,
976	struct xfs_inode	*qip,
977	xfs_dqtype_t		type,
978	struct list_head	*buffer_list)
979{
980	struct xfs_bmbt_irec	*map;
981	int			i, nmaps;	/* number of map entries */
982	int			error;		/* return value */
983	xfs_fileoff_t		lblkno;
984	xfs_filblks_t		maxlblkcnt;
985	xfs_dqid_t		firstid;
986	xfs_fsblock_t		rablkno;
987	xfs_filblks_t		rablkcnt;
988
989	error = 0;
990	/*
991	 * This looks racy, but we can't keep an inode lock across a
992	 * trans_reserve. But, this gets called during quotacheck, and that
993	 * happens only at mount time which is single threaded.
994	 */
995	if (qip->i_nblocks == 0)
996		return 0;
997
998	map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), 0);
999
1000	lblkno = 0;
1001	maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
1002	do {
1003		uint		lock_mode;
1004
1005		nmaps = XFS_DQITER_MAP_SIZE;
1006		/*
1007		 * We aren't changing the inode itself. Just changing
1008		 * some of its data. No new blocks are added here, and
1009		 * the inode is never added to the transaction.
1010		 */
1011		lock_mode = xfs_ilock_data_map_shared(qip);
1012		error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno,
1013				       map, &nmaps, 0);
1014		xfs_iunlock(qip, lock_mode);
1015		if (error)
1016			break;
1017
1018		ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1019		for (i = 0; i < nmaps; i++) {
1020			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1021			ASSERT(map[i].br_blockcount);
1022
1023
1024			lblkno += map[i].br_blockcount;
1025
1026			if (map[i].br_startblock == HOLESTARTBLOCK)
1027				continue;
1028
1029			firstid = (xfs_dqid_t) map[i].br_startoff *
1030				mp->m_quotainfo->qi_dqperchunk;
1031			/*
1032			 * Do a read-ahead on the next extent.
1033			 */
1034			if ((i+1 < nmaps) &&
1035			    (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1036				rablkcnt =  map[i+1].br_blockcount;
1037				rablkno = map[i+1].br_startblock;
1038				while (rablkcnt--) {
1039					xfs_buf_readahead(mp->m_ddev_targp,
1040					       XFS_FSB_TO_DADDR(mp, rablkno),
1041					       mp->m_quotainfo->qi_dqchunklen,
1042					       &xfs_dquot_buf_ops);
1043					rablkno++;
1044				}
1045			}
1046			/*
1047			 * Iterate thru all the blks in the extent and
1048			 * reset the counters of all the dquots inside them.
1049			 */
1050			error = xfs_qm_reset_dqcounts_all(mp, firstid,
1051						   map[i].br_startblock,
1052						   map[i].br_blockcount,
1053						   type, buffer_list);
1054			if (error)
1055				goto out;
1056		}
1057	} while (nmaps > 0);
1058
1059out:
1060	kmem_free(map);
1061	return error;
1062}
1063
1064/*
1065 * Called by dqusage_adjust in doing a quotacheck.
1066 *
1067 * Given the inode, and a dquot id this updates both the incore dqout as well
1068 * as the buffer copy. This is so that once the quotacheck is done, we can
1069 * just log all the buffers, as opposed to logging numerous updates to
1070 * individual dquots.
1071 */
1072STATIC int
1073xfs_qm_quotacheck_dqadjust(
1074	struct xfs_inode	*ip,
1075	xfs_dqtype_t		type,
1076	xfs_qcnt_t		nblks,
1077	xfs_qcnt_t		rtblks)
1078{
1079	struct xfs_mount	*mp = ip->i_mount;
1080	struct xfs_dquot	*dqp;
1081	xfs_dqid_t		id;
1082	int			error;
1083
1084	id = xfs_qm_id_for_quotatype(ip, type);
1085	error = xfs_qm_dqget(mp, id, type, true, &dqp);
1086	if (error) {
1087		/*
1088		 * Shouldn't be able to turn off quotas here.
1089		 */
1090		ASSERT(error != -ESRCH);
1091		ASSERT(error != -ENOENT);
1092		return error;
1093	}
1094
1095	trace_xfs_dqadjust(dqp);
1096
1097	/*
1098	 * Adjust the inode count and the block count to reflect this inode's
1099	 * resource usage.
1100	 */
1101	dqp->q_ino.count++;
1102	dqp->q_ino.reserved++;
1103	if (nblks) {
1104		dqp->q_blk.count += nblks;
1105		dqp->q_blk.reserved += nblks;
1106	}
1107	if (rtblks) {
1108		dqp->q_rtb.count += rtblks;
1109		dqp->q_rtb.reserved += rtblks;
1110	}
1111
1112	/*
1113	 * Set default limits, adjust timers (since we changed usages)
1114	 *
1115	 * There are no timers for the default values set in the root dquot.
1116	 */
1117	if (dqp->q_id) {
1118		xfs_qm_adjust_dqlimits(dqp);
1119		xfs_qm_adjust_dqtimers(dqp);
1120	}
1121
1122	dqp->q_flags |= XFS_DQFLAG_DIRTY;
1123	xfs_qm_dqput(dqp);
1124	return 0;
1125}
1126
1127/*
1128 * callback routine supplied to bulkstat(). Given an inumber, find its
1129 * dquots and update them to account for resources taken by that inode.
1130 */
1131/* ARGSUSED */
1132STATIC int
1133xfs_qm_dqusage_adjust(
1134	struct xfs_mount	*mp,
1135	struct xfs_trans	*tp,
1136	xfs_ino_t		ino,
1137	void			*data)
1138{
1139	struct xfs_inode	*ip;
1140	xfs_qcnt_t		nblks;
1141	xfs_filblks_t		rtblks = 0;	/* total rt blks */
1142	int			error;
1143
1144	ASSERT(XFS_IS_QUOTA_ON(mp));
1145
1146	/*
1147	 * rootino must have its resources accounted for, not so with the quota
1148	 * inodes.
1149	 */
1150	if (xfs_is_quota_inode(&mp->m_sb, ino))
1151		return 0;
1152
1153	/*
1154	 * We don't _need_ to take the ilock EXCL here because quotacheck runs
1155	 * at mount time and therefore nobody will be racing chown/chproj.
1156	 */
1157	error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip);
1158	if (error == -EINVAL || error == -ENOENT)
1159		return 0;
1160	if (error)
1161		return error;
1162
1163	ASSERT(ip->i_delayed_blks == 0);
1164
1165	if (XFS_IS_REALTIME_INODE(ip)) {
1166		struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1167
1168		error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
1169		if (error)
1170			goto error0;
1171
1172		xfs_bmap_count_leaves(ifp, &rtblks);
1173	}
1174
1175	nblks = (xfs_qcnt_t)ip->i_nblocks - rtblks;
1176
1177	/*
1178	 * Add the (disk blocks and inode) resources occupied by this
1179	 * inode to its dquots. We do this adjustment in the incore dquot,
1180	 * and also copy the changes to its buffer.
1181	 * We don't care about putting these changes in a transaction
1182	 * envelope because if we crash in the middle of a 'quotacheck'
1183	 * we have to start from the beginning anyway.
1184	 * Once we're done, we'll log all the dquot bufs.
1185	 *
1186	 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1187	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
1188	 */
1189	if (XFS_IS_UQUOTA_ON(mp)) {
1190		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQTYPE_USER, nblks,
1191				rtblks);
1192		if (error)
1193			goto error0;
1194	}
1195
1196	if (XFS_IS_GQUOTA_ON(mp)) {
1197		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQTYPE_GROUP, nblks,
1198				rtblks);
1199		if (error)
1200			goto error0;
1201	}
1202
1203	if (XFS_IS_PQUOTA_ON(mp)) {
1204		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQTYPE_PROJ, nblks,
1205				rtblks);
1206		if (error)
1207			goto error0;
1208	}
1209
1210error0:
1211	xfs_irele(ip);
1212	return error;
1213}
1214
1215STATIC int
1216xfs_qm_flush_one(
1217	struct xfs_dquot	*dqp,
1218	void			*data)
1219{
1220	struct xfs_mount	*mp = dqp->q_mount;
1221	struct list_head	*buffer_list = data;
1222	struct xfs_buf		*bp = NULL;
1223	int			error = 0;
1224
1225	xfs_dqlock(dqp);
1226	if (dqp->q_flags & XFS_DQFLAG_FREEING)
1227		goto out_unlock;
1228	if (!XFS_DQ_IS_DIRTY(dqp))
1229		goto out_unlock;
1230
1231	/*
1232	 * The only way the dquot is already flush locked by the time quotacheck
1233	 * gets here is if reclaim flushed it before the dqadjust walk dirtied
1234	 * it for the final time. Quotacheck collects all dquot bufs in the
1235	 * local delwri queue before dquots are dirtied, so reclaim can't have
1236	 * possibly queued it for I/O. The only way out is to push the buffer to
1237	 * cycle the flush lock.
1238	 */
1239	if (!xfs_dqflock_nowait(dqp)) {
1240		/* buf is pinned in-core by delwri list */
1241		bp = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno,
1242				mp->m_quotainfo->qi_dqchunklen, 0);
1243		if (!bp) {
1244			error = -EINVAL;
1245			goto out_unlock;
1246		}
1247		xfs_buf_unlock(bp);
1248
1249		xfs_buf_delwri_pushbuf(bp, buffer_list);
1250		xfs_buf_rele(bp);
1251
1252		error = -EAGAIN;
1253		goto out_unlock;
1254	}
1255
1256	error = xfs_qm_dqflush(dqp, &bp);
1257	if (error)
1258		goto out_unlock;
1259
1260	xfs_buf_delwri_queue(bp, buffer_list);
1261	xfs_buf_relse(bp);
1262out_unlock:
1263	xfs_dqunlock(dqp);
1264	return error;
1265}
1266
1267/*
1268 * Walk thru all the filesystem inodes and construct a consistent view
1269 * of the disk quota world. If the quotacheck fails, disable quotas.
1270 */
1271STATIC int
1272xfs_qm_quotacheck(
1273	xfs_mount_t	*mp)
1274{
1275	int			error, error2;
1276	uint			flags;
1277	LIST_HEAD		(buffer_list);
1278	struct xfs_inode	*uip = mp->m_quotainfo->qi_uquotaip;
1279	struct xfs_inode	*gip = mp->m_quotainfo->qi_gquotaip;
1280	struct xfs_inode	*pip = mp->m_quotainfo->qi_pquotaip;
1281
1282	flags = 0;
1283
1284	ASSERT(uip || gip || pip);
1285	ASSERT(XFS_IS_QUOTA_ON(mp));
1286
1287	xfs_notice(mp, "Quotacheck needed: Please wait.");
1288
1289	/*
1290	 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1291	 * their counters to zero. We need a clean slate.
1292	 * We don't log our changes till later.
1293	 */
1294	if (uip) {
1295		error = xfs_qm_reset_dqcounts_buf(mp, uip, XFS_DQTYPE_USER,
1296					 &buffer_list);
1297		if (error)
1298			goto error_return;
1299		flags |= XFS_UQUOTA_CHKD;
1300	}
1301
1302	if (gip) {
1303		error = xfs_qm_reset_dqcounts_buf(mp, gip, XFS_DQTYPE_GROUP,
1304					 &buffer_list);
1305		if (error)
1306			goto error_return;
1307		flags |= XFS_GQUOTA_CHKD;
1308	}
1309
1310	if (pip) {
1311		error = xfs_qm_reset_dqcounts_buf(mp, pip, XFS_DQTYPE_PROJ,
1312					 &buffer_list);
1313		if (error)
1314			goto error_return;
1315		flags |= XFS_PQUOTA_CHKD;
1316	}
1317
1318	error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
1319			NULL);
1320	if (error)
1321		goto error_return;
1322
1323	/*
1324	 * We've made all the changes that we need to make incore.  Flush them
1325	 * down to disk buffers if everything was updated successfully.
1326	 */
1327	if (XFS_IS_UQUOTA_ON(mp)) {
1328		error = xfs_qm_dquot_walk(mp, XFS_DQTYPE_USER, xfs_qm_flush_one,
1329					  &buffer_list);
1330	}
1331	if (XFS_IS_GQUOTA_ON(mp)) {
1332		error2 = xfs_qm_dquot_walk(mp, XFS_DQTYPE_GROUP, xfs_qm_flush_one,
1333					   &buffer_list);
1334		if (!error)
1335			error = error2;
1336	}
1337	if (XFS_IS_PQUOTA_ON(mp)) {
1338		error2 = xfs_qm_dquot_walk(mp, XFS_DQTYPE_PROJ, xfs_qm_flush_one,
1339					   &buffer_list);
1340		if (!error)
1341			error = error2;
1342	}
1343
1344	error2 = xfs_buf_delwri_submit(&buffer_list);
1345	if (!error)
1346		error = error2;
1347
1348	/*
1349	 * We can get this error if we couldn't do a dquot allocation inside
1350	 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1351	 * dirty dquots that might be cached, we just want to get rid of them
1352	 * and turn quotaoff. The dquots won't be attached to any of the inodes
1353	 * at this point (because we intentionally didn't in dqget_noattach).
1354	 */
1355	if (error) {
1356		xfs_qm_dqpurge_all(mp);
1357		goto error_return;
1358	}
1359
1360	/*
1361	 * If one type of quotas is off, then it will lose its
1362	 * quotachecked status, since we won't be doing accounting for
1363	 * that type anymore.
1364	 */
1365	mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
1366	mp->m_qflags |= flags;
1367
1368 error_return:
1369	xfs_buf_delwri_cancel(&buffer_list);
1370
1371	if (error) {
1372		xfs_warn(mp,
1373	"Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
1374			error);
1375		/*
1376		 * We must turn off quotas.
1377		 */
1378		ASSERT(mp->m_quotainfo != NULL);
1379		xfs_qm_destroy_quotainfo(mp);
1380		if (xfs_mount_reset_sbqflags(mp)) {
1381			xfs_warn(mp,
1382				"Quotacheck: Failed to reset quota flags.");
1383		}
1384	} else
1385		xfs_notice(mp, "Quotacheck: Done.");
1386	return error;
1387}
1388
1389/*
1390 * This is called from xfs_mountfs to start quotas and initialize all
1391 * necessary data structures like quotainfo.  This is also responsible for
1392 * running a quotacheck as necessary.  We are guaranteed that the superblock
1393 * is consistently read in at this point.
1394 *
1395 * If we fail here, the mount will continue with quota turned off. We don't
1396 * need to inidicate success or failure at all.
1397 */
1398void
1399xfs_qm_mount_quotas(
1400	struct xfs_mount	*mp)
1401{
1402	int			error = 0;
1403	uint			sbf;
1404
1405	/*
1406	 * If quotas on realtime volumes is not supported, we disable
1407	 * quotas immediately.
1408	 */
1409	if (mp->m_sb.sb_rextents) {
1410		xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
1411		mp->m_qflags = 0;
1412		goto write_changes;
1413	}
1414
1415	ASSERT(XFS_IS_QUOTA_ON(mp));
1416
1417	/*
1418	 * Allocate the quotainfo structure inside the mount struct, and
1419	 * create quotainode(s), and change/rev superblock if necessary.
1420	 */
1421	error = xfs_qm_init_quotainfo(mp);
1422	if (error) {
1423		/*
1424		 * We must turn off quotas.
1425		 */
1426		ASSERT(mp->m_quotainfo == NULL);
1427		mp->m_qflags = 0;
1428		goto write_changes;
1429	}
1430	/*
1431	 * If any of the quotas are not consistent, do a quotacheck.
1432	 */
1433	if (XFS_QM_NEED_QUOTACHECK(mp)) {
1434		error = xfs_qm_quotacheck(mp);
1435		if (error) {
1436			/* Quotacheck failed and disabled quotas. */
1437			return;
1438		}
1439	}
1440	/*
1441	 * If one type of quotas is off, then it will lose its
1442	 * quotachecked status, since we won't be doing accounting for
1443	 * that type anymore.
1444	 */
1445	if (!XFS_IS_UQUOTA_ON(mp))
1446		mp->m_qflags &= ~XFS_UQUOTA_CHKD;
1447	if (!XFS_IS_GQUOTA_ON(mp))
1448		mp->m_qflags &= ~XFS_GQUOTA_CHKD;
1449	if (!XFS_IS_PQUOTA_ON(mp))
1450		mp->m_qflags &= ~XFS_PQUOTA_CHKD;
1451
1452 write_changes:
1453	/*
1454	 * We actually don't have to acquire the m_sb_lock at all.
1455	 * This can only be called from mount, and that's single threaded. XXX
1456	 */
1457	spin_lock(&mp->m_sb_lock);
1458	sbf = mp->m_sb.sb_qflags;
1459	mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
1460	spin_unlock(&mp->m_sb_lock);
1461
1462	if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
1463		if (xfs_sync_sb(mp, false)) {
1464			/*
1465			 * We could only have been turning quotas off.
1466			 * We aren't in very good shape actually because
1467			 * the incore structures are convinced that quotas are
1468			 * off, but the on disk superblock doesn't know that !
1469			 */
1470			ASSERT(!(XFS_IS_QUOTA_ON(mp)));
1471			xfs_alert(mp, "%s: Superblock update failed!",
1472				__func__);
1473		}
1474	}
1475
1476	if (error) {
1477		xfs_warn(mp, "Failed to initialize disk quotas.");
1478		return;
1479	}
1480}
1481
1482/*
1483 * This is called after the superblock has been read in and we're ready to
1484 * iget the quota inodes.
1485 */
1486STATIC int
1487xfs_qm_init_quotainos(
1488	xfs_mount_t	*mp)
1489{
1490	struct xfs_inode	*uip = NULL;
1491	struct xfs_inode	*gip = NULL;
1492	struct xfs_inode	*pip = NULL;
1493	int			error;
1494	uint			flags = 0;
1495
1496	ASSERT(mp->m_quotainfo);
1497
1498	/*
1499	 * Get the uquota and gquota inodes
1500	 */
1501	if (xfs_has_quota(mp)) {
1502		if (XFS_IS_UQUOTA_ON(mp) &&
1503		    mp->m_sb.sb_uquotino != NULLFSINO) {
1504			ASSERT(mp->m_sb.sb_uquotino > 0);
1505			error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1506					     0, 0, &uip);
1507			if (error)
1508				return error;
1509		}
1510		if (XFS_IS_GQUOTA_ON(mp) &&
1511		    mp->m_sb.sb_gquotino != NULLFSINO) {
1512			ASSERT(mp->m_sb.sb_gquotino > 0);
1513			error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1514					     0, 0, &gip);
1515			if (error)
1516				goto error_rele;
1517		}
1518		if (XFS_IS_PQUOTA_ON(mp) &&
1519		    mp->m_sb.sb_pquotino != NULLFSINO) {
1520			ASSERT(mp->m_sb.sb_pquotino > 0);
1521			error = xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
1522					     0, 0, &pip);
1523			if (error)
1524				goto error_rele;
1525		}
1526	} else {
1527		flags |= XFS_QMOPT_SBVERSION;
1528	}
1529
1530	/*
1531	 * Create the three inodes, if they don't exist already. The changes
1532	 * made above will get added to a transaction and logged in one of
1533	 * the qino_alloc calls below.  If the device is readonly,
1534	 * temporarily switch to read-write to do this.
1535	 */
1536	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1537		error = xfs_qm_qino_alloc(mp, &uip,
1538					      flags | XFS_QMOPT_UQUOTA);
1539		if (error)
1540			goto error_rele;
1541
1542		flags &= ~XFS_QMOPT_SBVERSION;
1543	}
1544	if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
1545		error = xfs_qm_qino_alloc(mp, &gip,
1546					  flags | XFS_QMOPT_GQUOTA);
1547		if (error)
1548			goto error_rele;
1549
1550		flags &= ~XFS_QMOPT_SBVERSION;
1551	}
1552	if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) {
1553		error = xfs_qm_qino_alloc(mp, &pip,
1554					  flags | XFS_QMOPT_PQUOTA);
1555		if (error)
1556			goto error_rele;
1557	}
1558
1559	mp->m_quotainfo->qi_uquotaip = uip;
1560	mp->m_quotainfo->qi_gquotaip = gip;
1561	mp->m_quotainfo->qi_pquotaip = pip;
1562
1563	return 0;
1564
1565error_rele:
1566	if (uip)
1567		xfs_irele(uip);
1568	if (gip)
1569		xfs_irele(gip);
1570	if (pip)
1571		xfs_irele(pip);
1572	return error;
1573}
1574
1575STATIC void
1576xfs_qm_destroy_quotainos(
1577	struct xfs_quotainfo	*qi)
1578{
1579	if (qi->qi_uquotaip) {
1580		xfs_irele(qi->qi_uquotaip);
1581		qi->qi_uquotaip = NULL; /* paranoia */
1582	}
1583	if (qi->qi_gquotaip) {
1584		xfs_irele(qi->qi_gquotaip);
1585		qi->qi_gquotaip = NULL;
1586	}
1587	if (qi->qi_pquotaip) {
1588		xfs_irele(qi->qi_pquotaip);
1589		qi->qi_pquotaip = NULL;
1590	}
1591}
1592
1593STATIC void
1594xfs_qm_dqfree_one(
1595	struct xfs_dquot	*dqp)
1596{
1597	struct xfs_mount	*mp = dqp->q_mount;
1598	struct xfs_quotainfo	*qi = mp->m_quotainfo;
1599
1600	mutex_lock(&qi->qi_tree_lock);
1601	radix_tree_delete(xfs_dquot_tree(qi, xfs_dquot_type(dqp)), dqp->q_id);
1602
1603	qi->qi_dquots--;
1604	mutex_unlock(&qi->qi_tree_lock);
1605
1606	xfs_qm_dqdestroy(dqp);
1607}
1608
1609/* --------------- utility functions for vnodeops ---------------- */
1610
1611
1612/*
1613 * Given an inode, a uid, gid and prid make sure that we have
1614 * allocated relevant dquot(s) on disk, and that we won't exceed inode
1615 * quotas by creating this file.
1616 * This also attaches dquot(s) to the given inode after locking it,
1617 * and returns the dquots corresponding to the uid and/or gid.
1618 *
1619 * in	: inode (unlocked)
1620 * out	: udquot, gdquot with references taken and unlocked
1621 */
1622int
1623xfs_qm_vop_dqalloc(
1624	struct xfs_inode	*ip,
1625	kuid_t			uid,
1626	kgid_t			gid,
1627	prid_t			prid,
1628	uint			flags,
1629	struct xfs_dquot	**O_udqpp,
1630	struct xfs_dquot	**O_gdqpp,
1631	struct xfs_dquot	**O_pdqpp)
1632{
1633	struct xfs_mount	*mp = ip->i_mount;
1634	struct inode		*inode = VFS_I(ip);
1635	struct user_namespace	*user_ns = inode->i_sb->s_user_ns;
1636	struct xfs_dquot	*uq = NULL;
1637	struct xfs_dquot	*gq = NULL;
1638	struct xfs_dquot	*pq = NULL;
1639	int			error;
1640	uint			lockflags;
1641
1642	if (!XFS_IS_QUOTA_ON(mp))
1643		return 0;
1644
1645	lockflags = XFS_ILOCK_EXCL;
1646	xfs_ilock(ip, lockflags);
1647
1648	if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
1649		gid = inode->i_gid;
1650
1651	/*
1652	 * Attach the dquot(s) to this inode, doing a dquot allocation
1653	 * if necessary. The dquot(s) will not be locked.
1654	 */
1655	if (XFS_NOT_DQATTACHED(mp, ip)) {
1656		error = xfs_qm_dqattach_locked(ip, true);
1657		if (error) {
1658			xfs_iunlock(ip, lockflags);
1659			return error;
1660		}
1661	}
1662
1663	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
1664		ASSERT(O_udqpp);
1665		if (!uid_eq(inode->i_uid, uid)) {
1666			/*
1667			 * What we need is the dquot that has this uid, and
1668			 * if we send the inode to dqget, the uid of the inode
1669			 * takes priority over what's sent in the uid argument.
1670			 * We must unlock inode here before calling dqget if
1671			 * we're not sending the inode, because otherwise
1672			 * we'll deadlock by doing trans_reserve while
1673			 * holding ilock.
1674			 */
1675			xfs_iunlock(ip, lockflags);
1676			error = xfs_qm_dqget(mp, from_kuid(user_ns, uid),
1677					XFS_DQTYPE_USER, true, &uq);
1678			if (error) {
1679				ASSERT(error != -ENOENT);
1680				return error;
1681			}
1682			/*
1683			 * Get the ilock in the right order.
1684			 */
1685			xfs_dqunlock(uq);
1686			lockflags = XFS_ILOCK_SHARED;
1687			xfs_ilock(ip, lockflags);
1688		} else {
1689			/*
1690			 * Take an extra reference, because we'll return
1691			 * this to caller
1692			 */
1693			ASSERT(ip->i_udquot);
1694			uq = xfs_qm_dqhold(ip->i_udquot);
1695		}
1696	}
1697	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
1698		ASSERT(O_gdqpp);
1699		if (!gid_eq(inode->i_gid, gid)) {
1700			xfs_iunlock(ip, lockflags);
1701			error = xfs_qm_dqget(mp, from_kgid(user_ns, gid),
1702					XFS_DQTYPE_GROUP, true, &gq);
1703			if (error) {
1704				ASSERT(error != -ENOENT);
1705				goto error_rele;
1706			}
1707			xfs_dqunlock(gq);
1708			lockflags = XFS_ILOCK_SHARED;
1709			xfs_ilock(ip, lockflags);
1710		} else {
1711			ASSERT(ip->i_gdquot);
1712			gq = xfs_qm_dqhold(ip->i_gdquot);
1713		}
1714	}
1715	if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
1716		ASSERT(O_pdqpp);
1717		if (ip->i_projid != prid) {
1718			xfs_iunlock(ip, lockflags);
1719			error = xfs_qm_dqget(mp, prid,
1720					XFS_DQTYPE_PROJ, true, &pq);
1721			if (error) {
1722				ASSERT(error != -ENOENT);
1723				goto error_rele;
1724			}
1725			xfs_dqunlock(pq);
1726			lockflags = XFS_ILOCK_SHARED;
1727			xfs_ilock(ip, lockflags);
1728		} else {
1729			ASSERT(ip->i_pdquot);
1730			pq = xfs_qm_dqhold(ip->i_pdquot);
1731		}
1732	}
1733	trace_xfs_dquot_dqalloc(ip);
1734
1735	xfs_iunlock(ip, lockflags);
1736	if (O_udqpp)
1737		*O_udqpp = uq;
1738	else
1739		xfs_qm_dqrele(uq);
1740	if (O_gdqpp)
1741		*O_gdqpp = gq;
1742	else
1743		xfs_qm_dqrele(gq);
1744	if (O_pdqpp)
1745		*O_pdqpp = pq;
1746	else
1747		xfs_qm_dqrele(pq);
1748	return 0;
1749
1750error_rele:
1751	xfs_qm_dqrele(gq);
1752	xfs_qm_dqrele(uq);
1753	return error;
1754}
1755
1756/*
1757 * Actually transfer ownership, and do dquot modifications.
1758 * These were already reserved.
1759 */
1760struct xfs_dquot *
1761xfs_qm_vop_chown(
1762	struct xfs_trans	*tp,
1763	struct xfs_inode	*ip,
1764	struct xfs_dquot	**IO_olddq,
1765	struct xfs_dquot	*newdq)
1766{
1767	struct xfs_dquot	*prevdq;
1768	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
1769				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
1770
1771
1772	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1773	ASSERT(XFS_IS_QUOTA_ON(ip->i_mount));
1774
1775	/* old dquot */
1776	prevdq = *IO_olddq;
1777	ASSERT(prevdq);
1778	ASSERT(prevdq != newdq);
1779
1780	xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_nblocks));
1781	xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
1782
1783	/* the sparkling new dquot */
1784	xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_nblocks);
1785	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
1786
1787	/*
1788	 * Back when we made quota reservations for the chown, we reserved the
1789	 * ondisk blocks + delalloc blocks with the new dquot.  Now that we've
1790	 * switched the dquots, decrease the new dquot's block reservation
1791	 * (having already bumped up the real counter) so that we don't have
1792	 * any reservation to give back when we commit.
1793	 */
1794	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_RES_BLKS,
1795			-ip->i_delayed_blks);
1796
1797	/*
1798	 * Give the incore reservation for delalloc blocks back to the old
1799	 * dquot.  We don't normally handle delalloc quota reservations
1800	 * transactionally, so just lock the dquot and subtract from the
1801	 * reservation.  Dirty the transaction because it's too late to turn
1802	 * back now.
1803	 */
1804	tp->t_flags |= XFS_TRANS_DIRTY;
1805	xfs_dqlock(prevdq);
1806	ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
1807	prevdq->q_blk.reserved -= ip->i_delayed_blks;
1808	xfs_dqunlock(prevdq);
1809
1810	/*
1811	 * Take an extra reference, because the inode is going to keep
1812	 * this dquot pointer even after the trans_commit.
1813	 */
1814	*IO_olddq = xfs_qm_dqhold(newdq);
1815
1816	return prevdq;
1817}
1818
1819int
1820xfs_qm_vop_rename_dqattach(
1821	struct xfs_inode	**i_tab)
1822{
1823	struct xfs_mount	*mp = i_tab[0]->i_mount;
1824	int			i;
1825
1826	if (!XFS_IS_QUOTA_ON(mp))
1827		return 0;
1828
1829	for (i = 0; (i < 4 && i_tab[i]); i++) {
1830		struct xfs_inode	*ip = i_tab[i];
1831		int			error;
1832
1833		/*
1834		 * Watch out for duplicate entries in the table.
1835		 */
1836		if (i == 0 || ip != i_tab[i-1]) {
1837			if (XFS_NOT_DQATTACHED(mp, ip)) {
1838				error = xfs_qm_dqattach(ip);
1839				if (error)
1840					return error;
1841			}
1842		}
1843	}
1844	return 0;
1845}
1846
1847void
1848xfs_qm_vop_create_dqattach(
1849	struct xfs_trans	*tp,
1850	struct xfs_inode	*ip,
1851	struct xfs_dquot	*udqp,
1852	struct xfs_dquot	*gdqp,
1853	struct xfs_dquot	*pdqp)
1854{
1855	struct xfs_mount	*mp = tp->t_mountp;
1856
1857	if (!XFS_IS_QUOTA_ON(mp))
1858		return;
1859
1860	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1861
1862	if (udqp && XFS_IS_UQUOTA_ON(mp)) {
1863		ASSERT(ip->i_udquot == NULL);
1864		ASSERT(i_uid_read(VFS_I(ip)) == udqp->q_id);
1865
1866		ip->i_udquot = xfs_qm_dqhold(udqp);
1867		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
1868	}
1869	if (gdqp && XFS_IS_GQUOTA_ON(mp)) {
1870		ASSERT(ip->i_gdquot == NULL);
1871		ASSERT(i_gid_read(VFS_I(ip)) == gdqp->q_id);
1872
1873		ip->i_gdquot = xfs_qm_dqhold(gdqp);
1874		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
1875	}
1876	if (pdqp && XFS_IS_PQUOTA_ON(mp)) {
1877		ASSERT(ip->i_pdquot == NULL);
1878		ASSERT(ip->i_projid == pdqp->q_id);
1879
1880		ip->i_pdquot = xfs_qm_dqhold(pdqp);
1881		xfs_trans_mod_dquot(tp, pdqp, XFS_TRANS_DQ_ICOUNT, 1);
1882	}
1883}
1884
1885/* Decide if this inode's dquot is near an enforcement boundary. */
1886bool
1887xfs_inode_near_dquot_enforcement(
1888	struct xfs_inode	*ip,
1889	xfs_dqtype_t		type)
1890{
1891	struct xfs_dquot	*dqp;
1892	int64_t			freesp;
1893
1894	/* We only care for quotas that are enabled and enforced. */
1895	dqp = xfs_inode_dquot(ip, type);
1896	if (!dqp || !xfs_dquot_is_enforced(dqp))
1897		return false;
1898
1899	if (xfs_dquot_res_over_limits(&dqp->q_ino) ||
1900	    xfs_dquot_res_over_limits(&dqp->q_rtb))
1901		return true;
1902
1903	/* For space on the data device, check the various thresholds. */
1904	if (!dqp->q_prealloc_hi_wmark)
1905		return false;
1906
1907	if (dqp->q_blk.reserved < dqp->q_prealloc_lo_wmark)
1908		return false;
1909
1910	if (dqp->q_blk.reserved >= dqp->q_prealloc_hi_wmark)
1911		return true;
1912
1913	freesp = dqp->q_prealloc_hi_wmark - dqp->q_blk.reserved;
1914	if (freesp < dqp->q_low_space[XFS_QLOWSP_5_PCNT])
1915		return true;
1916
1917	return false;
1918}
1919