1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_bit.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_clnt.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_alloc.h"
29#include "xfs_dmapi.h"
30#include "xfs_quota.h"
31#include "xfs_mount.h"
32#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h"
37#include "xfs_dinode.h"
38#include "xfs_inode.h"
39#include "xfs_btree.h"
40#include "xfs_ialloc.h"
41#include "xfs_itable.h"
42#include "xfs_rtalloc.h"
43#include "xfs_error.h"
44#include "xfs_bmap.h"
45#include "xfs_rw.h"
46#include "xfs_acl.h"
47#include "xfs_attr.h"
48#include "xfs_buf_item.h"
49#include "xfs_trans_space.h"
50#include "xfs_utils.h"
51#include "xfs_qm.h"
52
53/*
54 * The global quota manager. There is only one of these for the entire
55 * system, _not_ one per file system. XQM keeps track of the overall
56 * quota functionality, including maintaining the freelist and hash
57 * tables of dquots.
58 */
59mutex_t		xfs_Gqm_lock;
60struct xfs_qm	*xfs_Gqm;
61uint		ndquot;
62
63kmem_zone_t	*qm_dqzone;
64kmem_zone_t	*qm_dqtrxzone;
65static kmem_shaker_t	xfs_qm_shaker;
66
67static cred_t	xfs_zerocr;
68static xfs_inode_t	xfs_zeroino;
69
70STATIC void	xfs_qm_list_init(xfs_dqlist_t *, char *, int);
71STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
72
73STATIC void	xfs_qm_freelist_init(xfs_frlist_t *);
74STATIC void	xfs_qm_freelist_destroy(xfs_frlist_t *);
75STATIC int	xfs_qm_mplist_nowait(xfs_mount_t *);
76STATIC int	xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
77
78STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
79STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
80STATIC int	xfs_qm_shake(int, gfp_t);
81
82#ifdef DEBUG
83extern mutex_t	qcheck_lock;
84#endif
85
86#ifdef QUOTADEBUG
87#define XQM_LIST_PRINT(l, NXT, title) \
88{ \
89	xfs_dquot_t	*dqp; int i = 0; \
90	cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
91	for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
92		cmn_err(CE_DEBUG, "   %d.  \"%d (%s)\"   " \
93				  "bcnt = %d, icnt = %d, refs = %d", \
94			++i, (int) be32_to_cpu(dqp->q_core.d_id), \
95			DQFLAGTO_TYPESTR(dqp),	     \
96			(int) be64_to_cpu(dqp->q_core.d_bcount), \
97			(int) be64_to_cpu(dqp->q_core.d_icount), \
98			(int) dqp->q_nrefs);  } \
99}
100#else
101#define XQM_LIST_PRINT(l, NXT, title) do { } while (0)
102#endif
103
104/*
105 * Initialize the XQM structure.
106 * Note that there is not one quota manager per file system.
107 */
108STATIC struct xfs_qm *
109xfs_Gqm_init(void)
110{
111	xfs_dqhash_t	*udqhash, *gdqhash;
112	xfs_qm_t	*xqm;
113	size_t		hsize;
114	uint		i;
115
116	/*
117	 * Initialize the dquot hash tables.
118	 */
119	udqhash = kmem_zalloc_greedy(&hsize,
120				     XFS_QM_HASHSIZE_LOW, XFS_QM_HASHSIZE_HIGH,
121				     KM_SLEEP | KM_MAYFAIL | KM_LARGE);
122	gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE);
123	hsize /= sizeof(xfs_dqhash_t);
124	ndquot = hsize << 8;
125
126	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
127	xqm->qm_dqhashmask = hsize - 1;
128	xqm->qm_usr_dqhtable = udqhash;
129	xqm->qm_grp_dqhtable = gdqhash;
130	ASSERT(xqm->qm_usr_dqhtable != NULL);
131	ASSERT(xqm->qm_grp_dqhtable != NULL);
132
133	for (i = 0; i < hsize; i++) {
134		xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
135		xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
136	}
137
138	/*
139	 * Freelist of all dquots of all file systems
140	 */
141	xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
142
143	/*
144	 * dquot zone. we register our own low-memory callback.
145	 */
146	if (!qm_dqzone) {
147		xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
148						"xfs_dquots");
149		qm_dqzone = xqm->qm_dqzone;
150	} else
151		xqm->qm_dqzone = qm_dqzone;
152
153	xfs_qm_shaker = kmem_shake_register(xfs_qm_shake);
154
155	/*
156	 * The t_dqinfo portion of transactions.
157	 */
158	if (!qm_dqtrxzone) {
159		xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
160						   "xfs_dqtrx");
161		qm_dqtrxzone = xqm->qm_dqtrxzone;
162	} else
163		xqm->qm_dqtrxzone = qm_dqtrxzone;
164
165	atomic_set(&xqm->qm_totaldquots, 0);
166	xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
167	xqm->qm_nrefs = 0;
168#ifdef DEBUG
169	mutex_init(&qcheck_lock);
170#endif
171	return xqm;
172}
173
174/*
175 * Destroy the global quota manager when its reference count goes to zero.
176 */
177STATIC void
178xfs_qm_destroy(
179	struct xfs_qm	*xqm)
180{
181	int		hsize, i;
182
183	ASSERT(xqm != NULL);
184	ASSERT(xqm->qm_nrefs == 0);
185	kmem_shake_deregister(xfs_qm_shaker);
186	hsize = xqm->qm_dqhashmask + 1;
187	for (i = 0; i < hsize; i++) {
188		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
189		xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
190	}
191	kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t));
192	kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t));
193	xqm->qm_usr_dqhtable = NULL;
194	xqm->qm_grp_dqhtable = NULL;
195	xqm->qm_dqhashmask = 0;
196	xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
197#ifdef DEBUG
198	mutex_destroy(&qcheck_lock);
199#endif
200	kmem_free(xqm, sizeof(xfs_qm_t));
201}
202
203/*
204 * Called at mount time to let XQM know that another file system is
205 * starting quotas. This isn't crucial information as the individual mount
206 * structures are pretty independent, but it helps the XQM keep a
207 * global view of what's going on.
208 */
209/* ARGSUSED */
210STATIC int
211xfs_qm_hold_quotafs_ref(
212	struct xfs_mount *mp)
213{
214	/*
215	 * Need to lock the xfs_Gqm structure for things like this. For example,
216	 * the structure could disappear between the entry to this routine and
217	 * a HOLD operation if not locked.
218	 */
219	XFS_QM_LOCK(xfs_Gqm);
220
221	if (xfs_Gqm == NULL)
222		xfs_Gqm = xfs_Gqm_init();
223	/*
224	 * We can keep a list of all filesystems with quotas mounted for
225	 * debugging and statistical purposes, but ...
226	 * Just take a reference and get out.
227	 */
228	XFS_QM_HOLD(xfs_Gqm);
229	XFS_QM_UNLOCK(xfs_Gqm);
230
231	return 0;
232}
233
234
235/*
236 * Release the reference that a filesystem took at mount time,
237 * so that we know when we need to destroy the entire quota manager.
238 */
239/* ARGSUSED */
240STATIC void
241xfs_qm_rele_quotafs_ref(
242	struct xfs_mount *mp)
243{
244	xfs_dquot_t	*dqp, *nextdqp;
245
246	ASSERT(xfs_Gqm);
247	ASSERT(xfs_Gqm->qm_nrefs > 0);
248
249	/*
250	 * Go thru the freelist and destroy all inactive dquots.
251	 */
252	xfs_qm_freelist_lock(xfs_Gqm);
253
254	for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
255	     dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
256		xfs_dqlock(dqp);
257		nextdqp = dqp->dq_flnext;
258		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
259			ASSERT(dqp->q_mount == NULL);
260			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
261			ASSERT(dqp->HL_PREVP == NULL);
262			ASSERT(dqp->MPL_PREVP == NULL);
263			XQM_FREELIST_REMOVE(dqp);
264			xfs_dqunlock(dqp);
265			xfs_qm_dqdestroy(dqp);
266		} else {
267			xfs_dqunlock(dqp);
268		}
269		dqp = nextdqp;
270	}
271	xfs_qm_freelist_unlock(xfs_Gqm);
272
273	/*
274	 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
275	 * be restarted.
276	 */
277	XFS_QM_LOCK(xfs_Gqm);
278	XFS_QM_RELE(xfs_Gqm);
279	if (xfs_Gqm->qm_nrefs == 0) {
280		xfs_qm_destroy(xfs_Gqm);
281		xfs_Gqm = NULL;
282	}
283	XFS_QM_UNLOCK(xfs_Gqm);
284}
285
286/*
287 * This is called at mount time from xfs_mountfs to initialize the quotainfo
288 * structure and start the global quota manager (xfs_Gqm) if it hasn't done
289 * so already.	Note that the superblock has not been read in yet.
290 */
291void
292xfs_qm_mount_quotainit(
293	xfs_mount_t	*mp,
294	uint		flags)
295{
296	/*
297	 * User, projects or group quotas has to be on.
298	 */
299	ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA));
300
301	/*
302	 * Initialize the flags in the mount structure. From this point
303	 * onwards we look at m_qflags to figure out if quotas's ON/OFF, etc.
304	 * Note that we enforce nothing if accounting is off.
305	 * ie.	XFSMNT_*QUOTA must be ON for XFSMNT_*QUOTAENF.
306	 * It isn't necessary to take the quotaoff lock to do this; this is
307	 * called from mount.
308	 */
309	if (flags & XFSMNT_UQUOTA) {
310		mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
311		if (flags & XFSMNT_UQUOTAENF)
312			mp->m_qflags |= XFS_UQUOTA_ENFD;
313	}
314	if (flags & XFSMNT_GQUOTA) {
315		mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
316		if (flags & XFSMNT_GQUOTAENF)
317			mp->m_qflags |= XFS_OQUOTA_ENFD;
318	} else if (flags & XFSMNT_PQUOTA) {
319		mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
320		if (flags & XFSMNT_PQUOTAENF)
321			mp->m_qflags |= XFS_OQUOTA_ENFD;
322	}
323}
324
325/*
326 * Just destroy the quotainfo structure.
327 */
328void
329xfs_qm_unmount_quotadestroy(
330	xfs_mount_t	*mp)
331{
332	if (mp->m_quotainfo)
333		xfs_qm_destroy_quotainfo(mp);
334}
335
336
337/*
338 * This is called from xfs_mountfs to start quotas and initialize all
339 * necessary data structures like quotainfo.  This is also responsible for
340 * running a quotacheck as necessary.  We are guaranteed that the superblock
341 * is consistently read in at this point.
342 */
343int
344xfs_qm_mount_quotas(
345	xfs_mount_t	*mp,
346	int		mfsi_flags)
347{
348	unsigned long	s;
349	int		error = 0;
350	uint		sbf;
351
352
353	/*
354	 * If quotas on realtime volumes is not supported, we disable
355	 * quotas immediately.
356	 */
357	if (mp->m_sb.sb_rextents) {
358		cmn_err(CE_NOTE,
359			"Cannot turn on quotas for realtime filesystem %s",
360			mp->m_fsname);
361		mp->m_qflags = 0;
362		goto write_changes;
363	}
364
365	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
366
367	/*
368	 * Allocate the quotainfo structure inside the mount struct, and
369	 * create quotainode(s), and change/rev superblock if necessary.
370	 */
371	if ((error = xfs_qm_init_quotainfo(mp))) {
372		/*
373		 * We must turn off quotas.
374		 */
375		ASSERT(mp->m_quotainfo == NULL);
376		mp->m_qflags = 0;
377		goto write_changes;
378	}
379	/*
380	 * If any of the quotas are not consistent, do a quotacheck.
381	 */
382	if (XFS_QM_NEED_QUOTACHECK(mp) &&
383		!(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
384		if ((error = xfs_qm_quotacheck(mp))) {
385			/* Quotacheck has failed and quotas have
386			 * been disabled.
387			 */
388			return XFS_ERROR(error);
389		}
390	}
391	/*
392	 * If one type of quotas is off, then it will lose its
393	 * quotachecked status, since we won't be doing accounting for
394	 * that type anymore.
395	 */
396	if (!XFS_IS_UQUOTA_ON(mp)) {
397		mp->m_qflags &= ~XFS_UQUOTA_CHKD;
398	}
399	if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) {
400		mp->m_qflags &= ~XFS_OQUOTA_CHKD;
401	}
402
403 write_changes:
404	s = XFS_SB_LOCK(mp);
405	sbf = mp->m_sb.sb_qflags;
406	mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
407	XFS_SB_UNLOCK(mp, s);
408
409	if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
410		if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
411			/*
412			 * We could only have been turning quotas off.
413			 * We aren't in very good shape actually because
414			 * the incore structures are convinced that quotas are
415			 * off, but the on disk superblock doesn't know that !
416			 */
417			ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
418			xfs_fs_cmn_err(CE_ALERT, mp,
419				"XFS mount_quotas: Superblock update failed!");
420		}
421	}
422
423	if (error) {
424		xfs_fs_cmn_err(CE_WARN, mp,
425			"Failed to initialize disk quotas.");
426	}
427	return XFS_ERROR(error);
428}
429
430/*
431 * Called from the vfsops layer.
432 */
433int
434xfs_qm_unmount_quotas(
435	xfs_mount_t	*mp)
436{
437	xfs_inode_t	*uqp, *gqp;
438	int		error = 0;
439
440	/*
441	 * Release the dquots that root inode, et al might be holding,
442	 * before we flush quotas and blow away the quotainfo structure.
443	 */
444	ASSERT(mp->m_rootip);
445	xfs_qm_dqdetach(mp->m_rootip);
446	if (mp->m_rbmip)
447		xfs_qm_dqdetach(mp->m_rbmip);
448	if (mp->m_rsumip)
449		xfs_qm_dqdetach(mp->m_rsumip);
450
451	/*
452	 * Flush out the quota inodes.
453	 */
454	uqp = gqp = NULL;
455	if (mp->m_quotainfo) {
456		if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) {
457			xfs_ilock(uqp, XFS_ILOCK_EXCL);
458			xfs_iflock(uqp);
459			error = xfs_iflush(uqp, XFS_IFLUSH_SYNC);
460			xfs_iunlock(uqp, XFS_ILOCK_EXCL);
461			if (unlikely(error == EFSCORRUPTED)) {
462				XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)",
463						 XFS_ERRLEVEL_LOW, mp);
464				goto out;
465			}
466		}
467		if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) {
468			xfs_ilock(gqp, XFS_ILOCK_EXCL);
469			xfs_iflock(gqp);
470			error = xfs_iflush(gqp, XFS_IFLUSH_SYNC);
471			xfs_iunlock(gqp, XFS_ILOCK_EXCL);
472			if (unlikely(error == EFSCORRUPTED)) {
473				XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)",
474						 XFS_ERRLEVEL_LOW, mp);
475				goto out;
476			}
477		}
478	}
479	if (uqp) {
480		 XFS_PURGE_INODE(uqp);
481		 mp->m_quotainfo->qi_uquotaip = NULL;
482	}
483	if (gqp) {
484		XFS_PURGE_INODE(gqp);
485		mp->m_quotainfo->qi_gquotaip = NULL;
486	}
487out:
488	return XFS_ERROR(error);
489}
490
491/*
492 * Flush all dquots of the given file system to disk. The dquots are
493 * _not_ purged from memory here, just their data written to disk.
494 */
495STATIC int
496xfs_qm_dqflush_all(
497	xfs_mount_t	*mp,
498	int		flags)
499{
500	int		recl;
501	xfs_dquot_t	*dqp;
502	int		niters;
503	int		error;
504
505	if (mp->m_quotainfo == NULL)
506		return 0;
507	niters = 0;
508again:
509	xfs_qm_mplist_lock(mp);
510	FOREACH_DQUOT_IN_MP(dqp, mp) {
511		xfs_dqlock(dqp);
512		if (! XFS_DQ_IS_DIRTY(dqp)) {
513			xfs_dqunlock(dqp);
514			continue;
515		}
516		xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
517		recl = XFS_QI_MPLRECLAIMS(mp);
518		if (! xfs_qm_dqflock_nowait(dqp)) {
519			/*
520			 * If we can't grab the flush lock then check
521			 * to see if the dquot has been flushed delayed
522			 * write.  If so, grab its buffer and send it
523			 * out immediately.  We'll be able to acquire
524			 * the flush lock when the I/O completes.
525			 */
526			xfs_qm_dqflock_pushbuf_wait(dqp);
527		}
528		/*
529		 * Let go of the mplist lock. We don't want to hold it
530		 * across a disk write.
531		 */
532		xfs_qm_mplist_unlock(mp);
533		error = xfs_qm_dqflush(dqp, flags);
534		xfs_dqunlock(dqp);
535		if (error)
536			return error;
537
538		xfs_qm_mplist_lock(mp);
539		if (recl != XFS_QI_MPLRECLAIMS(mp)) {
540			xfs_qm_mplist_unlock(mp);
541			goto again;
542		}
543	}
544
545	xfs_qm_mplist_unlock(mp);
546	/* return ! busy */
547	return 0;
548}
549/*
550 * Release the group dquot pointers the user dquots may be
551 * carrying around as a hint. mplist is locked on entry and exit.
552 */
553STATIC void
554xfs_qm_detach_gdquots(
555	xfs_mount_t	*mp)
556{
557	xfs_dquot_t	*dqp, *gdqp;
558	int		nrecl;
559
560 again:
561	ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
562	dqp = XFS_QI_MPLNEXT(mp);
563	while (dqp) {
564		xfs_dqlock(dqp);
565		if ((gdqp = dqp->q_gdquot)) {
566			xfs_dqlock(gdqp);
567			dqp->q_gdquot = NULL;
568		}
569		xfs_dqunlock(dqp);
570
571		if (gdqp) {
572			/*
573			 * Can't hold the mplist lock across a dqput.
574			 * XXXmust convert to marker based iterations here.
575			 */
576			nrecl = XFS_QI_MPLRECLAIMS(mp);
577			xfs_qm_mplist_unlock(mp);
578			xfs_qm_dqput(gdqp);
579
580			xfs_qm_mplist_lock(mp);
581			if (nrecl != XFS_QI_MPLRECLAIMS(mp))
582				goto again;
583		}
584		dqp = dqp->MPL_NEXT;
585	}
586}
587
588/*
589 * Go through all the incore dquots of this file system and take them
590 * off the mplist and hashlist, if the dquot type matches the dqtype
591 * parameter. This is used when turning off quota accounting for
592 * users and/or groups, as well as when the filesystem is unmounting.
593 */
594STATIC int
595xfs_qm_dqpurge_int(
596	xfs_mount_t	*mp,
597	uint		flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */
598{
599	xfs_dquot_t	*dqp;
600	uint		dqtype;
601	int		nrecl;
602	xfs_dquot_t	*nextdqp;
603	int		nmisses;
604
605	if (mp->m_quotainfo == NULL)
606		return 0;
607
608	dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
609	dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
610	dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
611
612	xfs_qm_mplist_lock(mp);
613
614	/*
615	 * In the first pass through all incore dquots of this filesystem,
616	 * we release the group dquot pointers the user dquots may be
617	 * carrying around as a hint. We need to do this irrespective of
618	 * what's being turned off.
619	 */
620	xfs_qm_detach_gdquots(mp);
621
622      again:
623	nmisses = 0;
624	ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
625	/*
626	 * Try to get rid of all of the unwanted dquots. The idea is to
627	 * get them off mplist and hashlist, but leave them on freelist.
628	 */
629	dqp = XFS_QI_MPLNEXT(mp);
630	while (dqp) {
631		/*
632		 * It's OK to look at the type without taking dqlock here.
633		 * We're holding the mplist lock here, and that's needed for
634		 * a dqreclaim.
635		 */
636		if ((dqp->dq_flags & dqtype) == 0) {
637			dqp = dqp->MPL_NEXT;
638			continue;
639		}
640
641		if (! xfs_qm_dqhashlock_nowait(dqp)) {
642			nrecl = XFS_QI_MPLRECLAIMS(mp);
643			xfs_qm_mplist_unlock(mp);
644			XFS_DQ_HASH_LOCK(dqp->q_hash);
645			xfs_qm_mplist_lock(mp);
646
647			/*
648			 * XXXTheoretically, we can get into a very long
649			 * ping pong game here.
650			 * No one can be adding dquots to the mplist at
651			 * this point, but somebody might be taking things off.
652			 */
653			if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
654				XFS_DQ_HASH_UNLOCK(dqp->q_hash);
655				goto again;
656			}
657		}
658
659		/*
660		 * Take the dquot off the mplist and hashlist. It may remain on
661		 * freelist in INACTIVE state.
662		 */
663		nextdqp = dqp->MPL_NEXT;
664		nmisses += xfs_qm_dqpurge(dqp, flags);
665		dqp = nextdqp;
666	}
667	xfs_qm_mplist_unlock(mp);
668	return nmisses;
669}
670
671int
672xfs_qm_dqpurge_all(
673	xfs_mount_t	*mp,
674	uint		flags)
675{
676	int		ndquots;
677
678	/*
679	 * Purge the dquot cache.
680	 * None of the dquots should really be busy at this point.
681	 */
682	if (mp->m_quotainfo) {
683		while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
684			delay(ndquots * 10);
685		}
686	}
687	return 0;
688}
689
690STATIC int
691xfs_qm_dqattach_one(
692	xfs_inode_t	*ip,
693	xfs_dqid_t	id,
694	uint		type,
695	uint		doalloc,
696	uint		dolock,
697	xfs_dquot_t	*udqhint, /* hint */
698	xfs_dquot_t	**IO_idqpp)
699{
700	xfs_dquot_t	*dqp;
701	int		error;
702
703	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
704	error = 0;
705	/*
706	 * See if we already have it in the inode itself. IO_idqpp is
707	 * &i_udquot or &i_gdquot. This made the code look weird, but
708	 * made the logic a lot simpler.
709	 */
710	if ((dqp = *IO_idqpp)) {
711		if (dolock)
712			xfs_dqlock(dqp);
713		xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
714		goto done;
715	}
716
717	/*
718	 * udqhint is the i_udquot field in inode, and is non-NULL only
719	 * when the type arg is group/project. Its purpose is to save a
720	 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
721	 * the user dquot.
722	 */
723	ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
724	if (udqhint && !dolock)
725		xfs_dqlock(udqhint);
726
727	/*
728	 * No need to take dqlock to look at the id.
729	 * The ID can't change until it gets reclaimed, and it won't
730	 * be reclaimed as long as we have a ref from inode and we hold
731	 * the ilock.
732	 */
733	if (udqhint &&
734	    (dqp = udqhint->q_gdquot) &&
735	    (be32_to_cpu(dqp->q_core.d_id) == id)) {
736		ASSERT(XFS_DQ_IS_LOCKED(udqhint));
737		xfs_dqlock(dqp);
738		XFS_DQHOLD(dqp);
739		ASSERT(*IO_idqpp == NULL);
740		*IO_idqpp = dqp;
741		if (!dolock) {
742			xfs_dqunlock(dqp);
743			xfs_dqunlock(udqhint);
744		}
745		goto done;
746	}
747	/*
748	 * We can't hold a dquot lock when we call the dqget code.
749	 * We'll deadlock in no time, because of (not conforming to)
750	 * lock ordering - the inodelock comes before any dquot lock,
751	 * and we may drop and reacquire the ilock in xfs_qm_dqget().
752	 */
753	if (udqhint)
754		xfs_dqunlock(udqhint);
755	/*
756	 * Find the dquot from somewhere. This bumps the
757	 * reference count of dquot and returns it locked.
758	 * This can return ENOENT if dquot didn't exist on
759	 * disk and we didn't ask it to allocate;
760	 * ESRCH if quotas got turned off suddenly.
761	 */
762	if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type,
763				 doalloc|XFS_QMOPT_DOWARN, &dqp))) {
764		if (udqhint && dolock)
765			xfs_dqlock(udqhint);
766		goto done;
767	}
768
769	xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
770	/*
771	 * dqget may have dropped and re-acquired the ilock, but it guarantees
772	 * that the dquot returned is the one that should go in the inode.
773	 */
774	*IO_idqpp = dqp;
775	ASSERT(dqp);
776	ASSERT(XFS_DQ_IS_LOCKED(dqp));
777	if (! dolock) {
778		xfs_dqunlock(dqp);
779		goto done;
780	}
781	if (! udqhint)
782		goto done;
783
784	ASSERT(udqhint);
785	ASSERT(dolock);
786	ASSERT(XFS_DQ_IS_LOCKED(dqp));
787	if (! xfs_qm_dqlock_nowait(udqhint)) {
788		xfs_dqunlock(dqp);
789		xfs_dqlock(udqhint);
790		xfs_dqlock(dqp);
791	}
792      done:
793#ifdef QUOTADEBUG
794	if (udqhint) {
795		if (dolock)
796			ASSERT(XFS_DQ_IS_LOCKED(udqhint));
797	}
798	if (! error) {
799		if (dolock)
800			ASSERT(XFS_DQ_IS_LOCKED(dqp));
801	}
802#endif
803	return error;
804}
805
806
807/*
808 * Given a udquot and gdquot, attach a ptr to the group dquot in the
809 * udquot as a hint for future lookups. The idea sounds simple, but the
810 * execution isn't, because the udquot might have a group dquot attached
811 * already and getting rid of that gets us into lock ordering constraints.
812 * The process is complicated more by the fact that the dquots may or may not
813 * be locked on entry.
814 */
815STATIC void
816xfs_qm_dqattach_grouphint(
817	xfs_dquot_t	*udq,
818	xfs_dquot_t	*gdq,
819	uint		locked)
820{
821	xfs_dquot_t	*tmp;
822
823#ifdef QUOTADEBUG
824	if (locked) {
825		ASSERT(XFS_DQ_IS_LOCKED(udq));
826		ASSERT(XFS_DQ_IS_LOCKED(gdq));
827	}
828#endif
829	if (! locked)
830		xfs_dqlock(udq);
831
832	if ((tmp = udq->q_gdquot)) {
833		if (tmp == gdq) {
834			if (! locked)
835				xfs_dqunlock(udq);
836			return;
837		}
838
839		udq->q_gdquot = NULL;
840		/*
841		 * We can't keep any dqlocks when calling dqrele,
842		 * because the freelist lock comes before dqlocks.
843		 */
844		xfs_dqunlock(udq);
845		if (locked)
846			xfs_dqunlock(gdq);
847		/*
848		 * we took a hard reference once upon a time in dqget,
849		 * so give it back when the udquot no longer points at it
850		 * dqput() does the unlocking of the dquot.
851		 */
852		xfs_qm_dqrele(tmp);
853
854		xfs_dqlock(udq);
855		xfs_dqlock(gdq);
856
857	} else {
858		ASSERT(XFS_DQ_IS_LOCKED(udq));
859		if (! locked) {
860			xfs_dqlock(gdq);
861		}
862	}
863
864	ASSERT(XFS_DQ_IS_LOCKED(udq));
865	ASSERT(XFS_DQ_IS_LOCKED(gdq));
866	/*
867	 * Somebody could have attached a gdquot here,
868	 * when we dropped the uqlock. If so, just do nothing.
869	 */
870	if (udq->q_gdquot == NULL) {
871		XFS_DQHOLD(gdq);
872		udq->q_gdquot = gdq;
873	}
874	if (! locked) {
875		xfs_dqunlock(gdq);
876		xfs_dqunlock(udq);
877	}
878}
879
880
881/*
882 * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
883 * into account.
884 * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
885 * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
886 * much made this code a complete mess, but it has been pretty useful.
887 * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
888 * Inode may get unlocked and relocked in here, and the caller must deal with
889 * the consequences.
890 */
891int
892xfs_qm_dqattach(
893	xfs_inode_t	*ip,
894	uint		flags)
895{
896	xfs_mount_t	*mp = ip->i_mount;
897	uint		nquotas = 0;
898	int		error = 0;
899
900	if ((! XFS_IS_QUOTA_ON(mp)) ||
901	    (! XFS_NOT_DQATTACHED(mp, ip)) ||
902	    (ip->i_ino == mp->m_sb.sb_uquotino) ||
903	    (ip->i_ino == mp->m_sb.sb_gquotino))
904		return 0;
905
906	ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
907	       XFS_ISLOCKED_INODE_EXCL(ip));
908
909	if (! (flags & XFS_QMOPT_ILOCKED))
910		xfs_ilock(ip, XFS_ILOCK_EXCL);
911
912	if (XFS_IS_UQUOTA_ON(mp)) {
913		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
914						flags & XFS_QMOPT_DQALLOC,
915						flags & XFS_QMOPT_DQLOCK,
916						NULL, &ip->i_udquot);
917		if (error)
918			goto done;
919		nquotas++;
920	}
921	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
922	if (XFS_IS_OQUOTA_ON(mp)) {
923		error = XFS_IS_GQUOTA_ON(mp) ?
924			xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
925						flags & XFS_QMOPT_DQALLOC,
926						flags & XFS_QMOPT_DQLOCK,
927						ip->i_udquot, &ip->i_gdquot) :
928			xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
929						flags & XFS_QMOPT_DQALLOC,
930						flags & XFS_QMOPT_DQLOCK,
931						ip->i_udquot, &ip->i_gdquot);
932		/*
933		 * Don't worry about the udquot that we may have
934		 * attached above. It'll get detached, if not already.
935		 */
936		if (error)
937			goto done;
938		nquotas++;
939	}
940
941	/*
942	 * Attach this group quota to the user quota as a hint.
943	 * This WON'T, in general, result in a thrash.
944	 */
945	if (nquotas == 2) {
946		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
947		ASSERT(ip->i_udquot);
948		ASSERT(ip->i_gdquot);
949
950		/*
951		 * We may or may not have the i_udquot locked at this point,
952		 * but this check is OK since we don't depend on the i_gdquot to
953		 * be accurate 100% all the time. It is just a hint, and this
954		 * will succeed in general.
955		 */
956		if (ip->i_udquot->q_gdquot == ip->i_gdquot)
957			goto done;
958		/*
959		 * Attach i_gdquot to the gdquot hint inside the i_udquot.
960		 */
961		xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot,
962					 flags & XFS_QMOPT_DQLOCK);
963	}
964
965      done:
966
967#ifdef QUOTADEBUG
968	if (! error) {
969		if (ip->i_udquot) {
970			if (flags & XFS_QMOPT_DQLOCK)
971				ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot));
972		}
973		if (ip->i_gdquot) {
974			if (flags & XFS_QMOPT_DQLOCK)
975				ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot));
976		}
977		if (XFS_IS_UQUOTA_ON(mp))
978			ASSERT(ip->i_udquot);
979		if (XFS_IS_OQUOTA_ON(mp))
980			ASSERT(ip->i_gdquot);
981	}
982#endif
983
984	if (! (flags & XFS_QMOPT_ILOCKED))
985		xfs_iunlock(ip, XFS_ILOCK_EXCL);
986
987#ifdef QUOTADEBUG
988	else
989		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
990#endif
991	return error;
992}
993
994/*
995 * Release dquots (and their references) if any.
996 * The inode should be locked EXCL except when this's called by
997 * xfs_ireclaim.
998 */
999void
1000xfs_qm_dqdetach(
1001	xfs_inode_t	*ip)
1002{
1003	if (!(ip->i_udquot || ip->i_gdquot))
1004		return;
1005
1006	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
1007	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
1008	if (ip->i_udquot) {
1009		xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
1010		xfs_qm_dqrele(ip->i_udquot);
1011		ip->i_udquot = NULL;
1012	}
1013	if (ip->i_gdquot) {
1014		xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
1015		xfs_qm_dqrele(ip->i_gdquot);
1016		ip->i_gdquot = NULL;
1017	}
1018}
1019
1020/*
1021 * This is called by VFS_SYNC and flags arg determines the caller,
1022 * and its motives, as done in xfs_sync.
1023 *
1024 * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31
1025 * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25
1026 * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA
1027 */
1028
1029int
1030xfs_qm_sync(
1031	xfs_mount_t	*mp,
1032	short		flags)
1033{
1034	int		recl, restarts;
1035	xfs_dquot_t	*dqp;
1036	uint		flush_flags;
1037	boolean_t	nowait;
1038	int		error;
1039
1040	restarts = 0;
1041	/*
1042	 * We won't block unless we are asked to.
1043	 */
1044	nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
1045
1046  again:
1047	xfs_qm_mplist_lock(mp);
1048	/*
1049	 * dqpurge_all() also takes the mplist lock and iterate thru all dquots
1050	 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
1051	 * when we have the mplist lock, we know that dquots will be consistent
1052	 * as long as we have it locked.
1053	 */
1054	if (! XFS_IS_QUOTA_ON(mp)) {
1055		xfs_qm_mplist_unlock(mp);
1056		return 0;
1057	}
1058	FOREACH_DQUOT_IN_MP(dqp, mp) {
1059		/*
1060		 * If this is vfs_sync calling, then skip the dquots that
1061		 * don't 'seem' to be dirty. ie. don't acquire dqlock.
1062		 * This is very similar to what xfs_sync does with inodes.
1063		 */
1064		if (flags & SYNC_BDFLUSH) {
1065			if (! XFS_DQ_IS_DIRTY(dqp))
1066				continue;
1067		}
1068
1069		if (nowait) {
1070			/*
1071			 * Try to acquire the dquot lock. We are NOT out of
1072			 * lock order, but we just don't want to wait for this
1073			 * lock, unless somebody wanted us to.
1074			 */
1075			if (! xfs_qm_dqlock_nowait(dqp))
1076				continue;
1077		} else {
1078			xfs_dqlock(dqp);
1079		}
1080
1081		/*
1082		 * Now, find out for sure if this dquot is dirty or not.
1083		 */
1084		if (! XFS_DQ_IS_DIRTY(dqp)) {
1085			xfs_dqunlock(dqp);
1086			continue;
1087		}
1088
1089		recl = XFS_QI_MPLRECLAIMS(mp);
1090		if (! xfs_qm_dqflock_nowait(dqp)) {
1091			if (nowait) {
1092				xfs_dqunlock(dqp);
1093				continue;
1094			}
1095			/*
1096			 * If we can't grab the flush lock then if the caller
1097			 * really wanted us to give this our best shot, so
1098			 * see if we can give a push to the buffer before we wait
1099			 * on the flush lock. At this point, we know that
1100			 * even though the dquot is being flushed,
1101			 * it has (new) dirty data.
1102			 */
1103			xfs_qm_dqflock_pushbuf_wait(dqp);
1104		}
1105		/*
1106		 * Let go of the mplist lock. We don't want to hold it
1107		 * across a disk write
1108		 */
1109		flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
1110		xfs_qm_mplist_unlock(mp);
1111		xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
1112		error = xfs_qm_dqflush(dqp, flush_flags);
1113		xfs_dqunlock(dqp);
1114		if (error && XFS_FORCED_SHUTDOWN(mp))
1115			return 0;	/* Need to prevent umount failure */
1116		else if (error)
1117			return error;
1118
1119		xfs_qm_mplist_lock(mp);
1120		if (recl != XFS_QI_MPLRECLAIMS(mp)) {
1121			if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
1122				break;
1123
1124			xfs_qm_mplist_unlock(mp);
1125			goto again;
1126		}
1127	}
1128
1129	xfs_qm_mplist_unlock(mp);
1130	return 0;
1131}
1132
1133
1134/*
1135 * This initializes all the quota information that's kept in the
1136 * mount structure
1137 */
1138STATIC int
1139xfs_qm_init_quotainfo(
1140	xfs_mount_t	*mp)
1141{
1142	xfs_quotainfo_t *qinf;
1143	int		error;
1144	xfs_dquot_t	*dqp;
1145
1146	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1147
1148	/*
1149	 * Tell XQM that we exist as soon as possible.
1150	 */
1151	if ((error = xfs_qm_hold_quotafs_ref(mp))) {
1152		return error;
1153	}
1154
1155	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
1156
1157	/*
1158	 * See if quotainodes are setup, and if not, allocate them,
1159	 * and change the superblock accordingly.
1160	 */
1161	if ((error = xfs_qm_init_quotainos(mp))) {
1162		kmem_free(qinf, sizeof(xfs_quotainfo_t));
1163		mp->m_quotainfo = NULL;
1164		return error;
1165	}
1166
1167	spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin");
1168	xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
1169	qinf->qi_dqreclaims = 0;
1170
1171	/* mutex used to serialize quotaoffs */
1172	mutex_init(&qinf->qi_quotaofflock);
1173
1174	/* Precalc some constants */
1175	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1176	ASSERT(qinf->qi_dqchunklen);
1177	qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
1178	do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
1179
1180	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
1181
1182	/*
1183	 * We try to get the limits from the superuser's limits fields.
1184	 * This is quite hacky, but it is standard quota practice.
1185	 * We look at the USR dquot with id == 0 first, but if user quotas
1186	 * are not enabled we goto the GRP dquot with id == 0.
1187	 * We don't really care to keep separate default limits for user
1188	 * and group quotas, at least not at this point.
1189	 */
1190	error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
1191			     XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
1192			     (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
1193				XFS_DQ_PROJ),
1194			     XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
1195			     &dqp);
1196	if (! error) {
1197		xfs_disk_dquot_t	*ddqp = &dqp->q_core;
1198
1199		/*
1200		 * The warnings and timers set the grace period given to
1201		 * a user or group before he or she can not perform any
1202		 * more writing. If it is zero, a default is used.
1203		 */
1204		qinf->qi_btimelimit = ddqp->d_btimer ?
1205			be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
1206		qinf->qi_itimelimit = ddqp->d_itimer ?
1207			be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
1208		qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
1209			be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
1210		qinf->qi_bwarnlimit = ddqp->d_bwarns ?
1211			be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
1212		qinf->qi_iwarnlimit = ddqp->d_iwarns ?
1213			be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
1214		qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
1215			be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
1216		qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
1217		qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
1218		qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
1219		qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
1220		qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
1221		qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
1222
1223		/*
1224		 * We sent the XFS_QMOPT_DQSUSER flag to dqget because
1225		 * we don't want this dquot cached. We haven't done a
1226		 * quotacheck yet, and quotacheck doesn't like incore dquots.
1227		 */
1228		xfs_qm_dqdestroy(dqp);
1229	} else {
1230		qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
1231		qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
1232		qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
1233		qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
1234		qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
1235		qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
1236	}
1237
1238	return 0;
1239}
1240
1241
1242/*
1243 * Gets called when unmounting a filesystem or when all quotas get
1244 * turned off.
1245 * This purges the quota inodes, destroys locks and frees itself.
1246 */
1247void
1248xfs_qm_destroy_quotainfo(
1249	xfs_mount_t	*mp)
1250{
1251	xfs_quotainfo_t *qi;
1252
1253	qi = mp->m_quotainfo;
1254	ASSERT(qi != NULL);
1255	ASSERT(xfs_Gqm != NULL);
1256
1257	/*
1258	 * Release the reference that XQM kept, so that we know
1259	 * when the XQM structure should be freed. We cannot assume
1260	 * that xfs_Gqm is non-null after this point.
1261	 */
1262	xfs_qm_rele_quotafs_ref(mp);
1263
1264	spinlock_destroy(&qi->qi_pinlock);
1265	xfs_qm_list_destroy(&qi->qi_dqlist);
1266
1267	if (qi->qi_uquotaip) {
1268		XFS_PURGE_INODE(qi->qi_uquotaip);
1269		qi->qi_uquotaip = NULL; /* paranoia */
1270	}
1271	if (qi->qi_gquotaip) {
1272		XFS_PURGE_INODE(qi->qi_gquotaip);
1273		qi->qi_gquotaip = NULL;
1274	}
1275	mutex_destroy(&qi->qi_quotaofflock);
1276	kmem_free(qi, sizeof(xfs_quotainfo_t));
1277	mp->m_quotainfo = NULL;
1278}
1279
1280
1281
1282/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
1283
1284/* ARGSUSED */
1285STATIC void
1286xfs_qm_list_init(
1287	xfs_dqlist_t	*list,
1288	char		*str,
1289	int		n)
1290{
1291	mutex_init(&list->qh_lock);
1292	list->qh_next = NULL;
1293	list->qh_version = 0;
1294	list->qh_nelems = 0;
1295}
1296
1297STATIC void
1298xfs_qm_list_destroy(
1299	xfs_dqlist_t	*list)
1300{
1301	mutex_destroy(&(list->qh_lock));
1302}
1303
1304
1305/*
1306 * Stripped down version of dqattach. This doesn't attach, or even look at the
1307 * dquots attached to the inode. The rationale is that there won't be any
1308 * attached at the time this is called from quotacheck.
1309 */
1310STATIC int
1311xfs_qm_dqget_noattach(
1312	xfs_inode_t	*ip,
1313	xfs_dquot_t	**O_udqpp,
1314	xfs_dquot_t	**O_gdqpp)
1315{
1316	int		error;
1317	xfs_mount_t	*mp;
1318	xfs_dquot_t	*udqp, *gdqp;
1319
1320	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
1321	mp = ip->i_mount;
1322	udqp = NULL;
1323	gdqp = NULL;
1324
1325	if (XFS_IS_UQUOTA_ON(mp)) {
1326		ASSERT(ip->i_udquot == NULL);
1327		/*
1328		 * We want the dquot allocated if it doesn't exist.
1329		 */
1330		if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
1331					 XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
1332					 &udqp))) {
1333			/*
1334			 * Shouldn't be able to turn off quotas here.
1335			 */
1336			ASSERT(error != ESRCH);
1337			ASSERT(error != ENOENT);
1338			return error;
1339		}
1340		ASSERT(udqp);
1341	}
1342
1343	if (XFS_IS_OQUOTA_ON(mp)) {
1344		ASSERT(ip->i_gdquot == NULL);
1345		if (udqp)
1346			xfs_dqunlock(udqp);
1347		error = XFS_IS_GQUOTA_ON(mp) ?
1348				xfs_qm_dqget(mp, ip,
1349					     ip->i_d.di_gid, XFS_DQ_GROUP,
1350					     XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1351					     &gdqp) :
1352				xfs_qm_dqget(mp, ip,
1353					     ip->i_d.di_projid, XFS_DQ_PROJ,
1354					     XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1355					     &gdqp);
1356		if (error) {
1357			if (udqp)
1358				xfs_qm_dqrele(udqp);
1359			ASSERT(error != ESRCH);
1360			ASSERT(error != ENOENT);
1361			return error;
1362		}
1363		ASSERT(gdqp);
1364
1365		/* Reacquire the locks in the right order */
1366		if (udqp) {
1367			if (! xfs_qm_dqlock_nowait(udqp)) {
1368				xfs_dqunlock(gdqp);
1369				xfs_dqlock(udqp);
1370				xfs_dqlock(gdqp);
1371			}
1372		}
1373	}
1374
1375	*O_udqpp = udqp;
1376	*O_gdqpp = gdqp;
1377
1378#ifdef QUOTADEBUG
1379	if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
1380	if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
1381#endif
1382	return 0;
1383}
1384
1385/*
1386 * Create an inode and return with a reference already taken, but unlocked
1387 * This is how we create quota inodes
1388 */
1389STATIC int
1390xfs_qm_qino_alloc(
1391	xfs_mount_t	*mp,
1392	xfs_inode_t	**ip,
1393	__int64_t	sbfields,
1394	uint		flags)
1395{
1396	xfs_trans_t	*tp;
1397	int		error;
1398	unsigned long	s;
1399	int		committed;
1400
1401	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
1402	if ((error = xfs_trans_reserve(tp,
1403				      XFS_QM_QINOCREATE_SPACE_RES(mp),
1404				      XFS_CREATE_LOG_RES(mp), 0,
1405				      XFS_TRANS_PERM_LOG_RES,
1406				      XFS_CREATE_LOG_COUNT))) {
1407		xfs_trans_cancel(tp, 0);
1408		return error;
1409	}
1410
1411	if ((error = xfs_dir_ialloc(&tp, &xfs_zeroino, S_IFREG, 1, 0,
1412				   &xfs_zerocr, 0, 1, ip, &committed))) {
1413		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1414				 XFS_TRANS_ABORT);
1415		return error;
1416	}
1417
1418	/*
1419	 * Keep an extra reference to this quota inode. This inode is
1420	 * locked exclusively and joined to the transaction already.
1421	 */
1422	ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip));
1423	VN_HOLD(XFS_ITOV((*ip)));
1424
1425	/*
1426	 * Make the changes in the superblock, and log those too.
1427	 * sbfields arg may contain fields other than *QUOTINO;
1428	 * VERSIONNUM for example.
1429	 */
1430	s = XFS_SB_LOCK(mp);
1431	if (flags & XFS_QMOPT_SBVERSION) {
1432#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1433		unsigned oldv = mp->m_sb.sb_versionnum;
1434#endif
1435		ASSERT(!XFS_SB_VERSION_HASQUOTA(&mp->m_sb));
1436		ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1437				   XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
1438		       (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1439			XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
1440
1441		XFS_SB_VERSION_ADDQUOTA(&mp->m_sb);
1442		mp->m_sb.sb_uquotino = NULLFSINO;
1443		mp->m_sb.sb_gquotino = NULLFSINO;
1444
1445		/* qflags will get updated _after_ quotacheck */
1446		mp->m_sb.sb_qflags = 0;
1447#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1448		cmn_err(CE_NOTE,
1449			"Old superblock version %x, converting to %x.",
1450			oldv, mp->m_sb.sb_versionnum);
1451#endif
1452	}
1453	if (flags & XFS_QMOPT_UQUOTA)
1454		mp->m_sb.sb_uquotino = (*ip)->i_ino;
1455	else
1456		mp->m_sb.sb_gquotino = (*ip)->i_ino;
1457	XFS_SB_UNLOCK(mp, s);
1458	xfs_mod_sb(tp, sbfields);
1459
1460	if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1461		xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
1462		return error;
1463	}
1464	return 0;
1465}
1466
1467
1468STATIC int
1469xfs_qm_reset_dqcounts(
1470	xfs_mount_t	*mp,
1471	xfs_buf_t	*bp,
1472	xfs_dqid_t	id,
1473	uint		type)
1474{
1475	xfs_disk_dquot_t	*ddq;
1476	int			j;
1477
1478	xfs_buftrace("RESET DQUOTS", bp);
1479	/*
1480	 * Reset all counters and timers. They'll be
1481	 * started afresh by xfs_qm_quotacheck.
1482	 */
1483#ifdef DEBUG
1484	j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1485	do_div(j, sizeof(xfs_dqblk_t));
1486	ASSERT(XFS_QM_DQPERBLK(mp) == j);
1487#endif
1488	ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
1489	for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
1490		/*
1491		 * Do a sanity check, and if needed, repair the dqblk. Don't
1492		 * output any warnings because it's perfectly possible to
1493		 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
1494		 */
1495		(void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1496				      "xfs_quotacheck");
1497		ddq->d_bcount = 0;
1498		ddq->d_icount = 0;
1499		ddq->d_rtbcount = 0;
1500		ddq->d_btimer = 0;
1501		ddq->d_itimer = 0;
1502		ddq->d_rtbtimer = 0;
1503		ddq->d_bwarns = 0;
1504		ddq->d_iwarns = 0;
1505		ddq->d_rtbwarns = 0;
1506		ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1507	}
1508
1509	return 0;
1510}
1511
1512STATIC int
1513xfs_qm_dqiter_bufs(
1514	xfs_mount_t	*mp,
1515	xfs_dqid_t	firstid,
1516	xfs_fsblock_t	bno,
1517	xfs_filblks_t	blkcnt,
1518	uint		flags)
1519{
1520	xfs_buf_t	*bp;
1521	int		error;
1522	int		notcommitted;
1523	int		incr;
1524	int		type;
1525
1526	ASSERT(blkcnt > 0);
1527	notcommitted = 0;
1528	incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1529		XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1530	type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1531		(flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1532	error = 0;
1533
1534	/*
1535	 * Blkcnt arg can be a very big number, and might even be
1536	 * larger than the log itself. So, we have to break it up into
1537	 * manageable-sized transactions.
1538	 * Note that we don't start a permanent transaction here; we might
1539	 * not be able to get a log reservation for the whole thing up front,
1540	 * and we don't really care to either, because we just discard
1541	 * everything if we were to crash in the middle of this loop.
1542	 */
1543	while (blkcnt--) {
1544		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1545			      XFS_FSB_TO_DADDR(mp, bno),
1546			      (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
1547		if (error)
1548			break;
1549
1550		(void) xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1551		xfs_bdwrite(mp, bp);
1552		/*
1553		 * goto the next block.
1554		 */
1555		bno++;
1556		firstid += XFS_QM_DQPERBLK(mp);
1557	}
1558	return error;
1559}
1560
1561/*
1562 * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
1563 * caller supplied function for every chunk of dquots that we find.
1564 */
1565STATIC int
1566xfs_qm_dqiterate(
1567	xfs_mount_t	*mp,
1568	xfs_inode_t	*qip,
1569	uint		flags)
1570{
1571	xfs_bmbt_irec_t		*map;
1572	int			i, nmaps;	/* number of map entries */
1573	int			error;		/* return value */
1574	xfs_fileoff_t		lblkno;
1575	xfs_filblks_t		maxlblkcnt;
1576	xfs_dqid_t		firstid;
1577	xfs_fsblock_t		rablkno;
1578	xfs_filblks_t		rablkcnt;
1579
1580	error = 0;
1581	/*
1582	 * This looks racy, but we can't keep an inode lock across a
1583	 * trans_reserve. But, this gets called during quotacheck, and that
1584	 * happens only at mount time which is single threaded.
1585	 */
1586	if (qip->i_d.di_nblocks == 0)
1587		return 0;
1588
1589	map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1590
1591	lblkno = 0;
1592	maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1593	do {
1594		nmaps = XFS_DQITER_MAP_SIZE;
1595		/*
1596		 * We aren't changing the inode itself. Just changing
1597		 * some of its data. No new blocks are added here, and
1598		 * the inode is never added to the transaction.
1599		 */
1600		xfs_ilock(qip, XFS_ILOCK_SHARED);
1601		error = xfs_bmapi(NULL, qip, lblkno,
1602				  maxlblkcnt - lblkno,
1603				  XFS_BMAPI_METADATA,
1604				  NULL,
1605				  0, map, &nmaps, NULL, NULL);
1606		xfs_iunlock(qip, XFS_ILOCK_SHARED);
1607		if (error)
1608			break;
1609
1610		ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1611		for (i = 0; i < nmaps; i++) {
1612			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1613			ASSERT(map[i].br_blockcount);
1614
1615
1616			lblkno += map[i].br_blockcount;
1617
1618			if (map[i].br_startblock == HOLESTARTBLOCK)
1619				continue;
1620
1621			firstid = (xfs_dqid_t) map[i].br_startoff *
1622				XFS_QM_DQPERBLK(mp);
1623			/*
1624			 * Do a read-ahead on the next extent.
1625			 */
1626			if ((i+1 < nmaps) &&
1627			    (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1628				rablkcnt =  map[i+1].br_blockcount;
1629				rablkno = map[i+1].br_startblock;
1630				while (rablkcnt--) {
1631					xfs_baread(mp->m_ddev_targp,
1632					       XFS_FSB_TO_DADDR(mp, rablkno),
1633					       (int)XFS_QI_DQCHUNKLEN(mp));
1634					rablkno++;
1635				}
1636			}
1637			/*
1638			 * Iterate thru all the blks in the extent and
1639			 * reset the counters of all the dquots inside them.
1640			 */
1641			if ((error = xfs_qm_dqiter_bufs(mp,
1642						       firstid,
1643						       map[i].br_startblock,
1644						       map[i].br_blockcount,
1645						       flags))) {
1646				break;
1647			}
1648		}
1649
1650		if (error)
1651			break;
1652	} while (nmaps > 0);
1653
1654	kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
1655
1656	return error;
1657}
1658
1659/*
1660 * Called by dqusage_adjust in doing a quotacheck.
1661 * Given the inode, and a dquot (either USR or GRP, doesn't matter),
1662 * this updates its incore copy as well as the buffer copy. This is
1663 * so that once the quotacheck is done, we can just log all the buffers,
1664 * as opposed to logging numerous updates to individual dquots.
1665 */
1666STATIC void
1667xfs_qm_quotacheck_dqadjust(
1668	xfs_dquot_t		*dqp,
1669	xfs_qcnt_t		nblks,
1670	xfs_qcnt_t		rtblks)
1671{
1672	ASSERT(XFS_DQ_IS_LOCKED(dqp));
1673	xfs_dqtrace_entry(dqp, "QCHECK DQADJUST");
1674	/*
1675	 * Adjust the inode count and the block count to reflect this inode's
1676	 * resource usage.
1677	 */
1678	be64_add(&dqp->q_core.d_icount, 1);
1679	dqp->q_res_icount++;
1680	if (nblks) {
1681		be64_add(&dqp->q_core.d_bcount, nblks);
1682		dqp->q_res_bcount += nblks;
1683	}
1684	if (rtblks) {
1685		be64_add(&dqp->q_core.d_rtbcount, rtblks);
1686		dqp->q_res_rtbcount += rtblks;
1687	}
1688
1689	/*
1690	 * Set default limits, adjust timers (since we changed usages)
1691	 */
1692	if (! XFS_IS_SUSER_DQUOT(dqp)) {
1693		xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
1694		xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
1695	}
1696
1697	dqp->dq_flags |= XFS_DQ_DIRTY;
1698}
1699
1700STATIC int
1701xfs_qm_get_rtblks(
1702	xfs_inode_t	*ip,
1703	xfs_qcnt_t	*O_rtblks)
1704{
1705	xfs_filblks_t	rtblks;			/* total rt blks */
1706	xfs_extnum_t	idx;			/* extent record index */
1707	xfs_ifork_t	*ifp;			/* inode fork pointer */
1708	xfs_extnum_t	nextents;		/* number of extent entries */
1709	xfs_bmbt_rec_t	*ep;			/* pointer to an extent entry */
1710	int		error;
1711
1712	ASSERT(XFS_IS_REALTIME_INODE(ip));
1713	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1714	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1715		if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1716			return error;
1717	}
1718	rtblks = 0;
1719	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1720	for (idx = 0; idx < nextents; idx++) {
1721		ep = xfs_iext_get_ext(ifp, idx);
1722		rtblks += xfs_bmbt_get_blockcount(ep);
1723	}
1724	*O_rtblks = (xfs_qcnt_t)rtblks;
1725	return 0;
1726}
1727
1728/*
1729 * callback routine supplied to bulkstat(). Given an inumber, find its
1730 * dquots and update them to account for resources taken by that inode.
1731 */
1732/* ARGSUSED */
1733STATIC int
1734xfs_qm_dqusage_adjust(
1735	xfs_mount_t	*mp,		/* mount point for filesystem */
1736	xfs_ino_t	ino,		/* inode number to get data for */
1737	void		__user *buffer,	/* not used */
1738	int		ubsize,		/* not used */
1739	void		*private_data,	/* not used */
1740	xfs_daddr_t	bno,		/* starting block of inode cluster */
1741	int		*ubused,	/* not used */
1742	void		*dip,		/* on-disk inode pointer (not used) */
1743	int		*res)		/* result code value */
1744{
1745	xfs_inode_t	*ip;
1746	xfs_dquot_t	*udqp, *gdqp;
1747	xfs_qcnt_t	nblks, rtblks;
1748	int		error;
1749
1750	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1751
1752	/*
1753	 * rootino must have its resources accounted for, not so with the quota
1754	 * inodes.
1755	 */
1756	if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1757		*res = BULKSTAT_RV_NOTHING;
1758		return XFS_ERROR(EINVAL);
1759	}
1760
1761	/*
1762	 * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1763	 * interface expects the inode to be exclusively locked because that's
1764	 * the case in all other instances. It's OK that we do this because
1765	 * quotacheck is done only at mount time.
1766	 */
1767	if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
1768		*res = BULKSTAT_RV_NOTHING;
1769		return error;
1770	}
1771
1772	if (ip->i_d.di_mode == 0) {
1773		xfs_iput_new(ip, XFS_ILOCK_EXCL);
1774		*res = BULKSTAT_RV_NOTHING;
1775		return XFS_ERROR(ENOENT);
1776	}
1777
1778	/*
1779	 * Obtain the locked dquots. In case of an error (eg. allocation
1780	 * fails for ENOSPC), we return the negative of the error number
1781	 * to bulkstat, so that it can get propagated to quotacheck() and
1782	 * making us disable quotas for the file system.
1783	 */
1784	if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
1785		xfs_iput(ip, XFS_ILOCK_EXCL);
1786		*res = BULKSTAT_RV_GIVEUP;
1787		return error;
1788	}
1789
1790	rtblks = 0;
1791	if (! XFS_IS_REALTIME_INODE(ip)) {
1792		nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
1793	} else {
1794		/*
1795		 * Walk thru the extent list and count the realtime blocks.
1796		 */
1797		if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
1798			xfs_iput(ip, XFS_ILOCK_EXCL);
1799			if (udqp)
1800				xfs_qm_dqput(udqp);
1801			if (gdqp)
1802				xfs_qm_dqput(gdqp);
1803			*res = BULKSTAT_RV_GIVEUP;
1804			return error;
1805		}
1806		nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1807	}
1808	ASSERT(ip->i_delayed_blks == 0);
1809
1810	/*
1811	 * We can't release the inode while holding its dquot locks.
1812	 * The inode can go into inactive and might try to acquire the dquotlocks.
1813	 * So, just unlock here and do a vn_rele at the end.
1814	 */
1815	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1816
1817	/*
1818	 * Add the (disk blocks and inode) resources occupied by this
1819	 * inode to its dquots. We do this adjustment in the incore dquot,
1820	 * and also copy the changes to its buffer.
1821	 * We don't care about putting these changes in a transaction
1822	 * envelope because if we crash in the middle of a 'quotacheck'
1823	 * we have to start from the beginning anyway.
1824	 * Once we're done, we'll log all the dquot bufs.
1825	 *
1826	 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1827	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
1828	 */
1829	if (XFS_IS_UQUOTA_ON(mp)) {
1830		ASSERT(udqp);
1831		xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
1832		xfs_qm_dqput(udqp);
1833	}
1834	if (XFS_IS_OQUOTA_ON(mp)) {
1835		ASSERT(gdqp);
1836		xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
1837		xfs_qm_dqput(gdqp);
1838	}
1839	/*
1840	 * Now release the inode. This will send it to 'inactive', and
1841	 * possibly even free blocks.
1842	 */
1843	VN_RELE(XFS_ITOV(ip));
1844
1845	/*
1846	 * Goto next inode.
1847	 */
1848	*res = BULKSTAT_RV_DIDONE;
1849	return 0;
1850}
1851
1852/*
1853 * Walk thru all the filesystem inodes and construct a consistent view
1854 * of the disk quota world. If the quotacheck fails, disable quotas.
1855 */
1856int
1857xfs_qm_quotacheck(
1858	xfs_mount_t	*mp)
1859{
1860	int		done, count, error;
1861	xfs_ino_t	lastino;
1862	size_t		structsz;
1863	xfs_inode_t	*uip, *gip;
1864	uint		flags;
1865
1866	count = INT_MAX;
1867	structsz = 1;
1868	lastino = 0;
1869	flags = 0;
1870
1871	ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
1872	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1873
1874	/*
1875	 * There should be no cached dquots. The (simplistic) quotacheck
1876	 * algorithm doesn't like that.
1877	 */
1878	ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0);
1879
1880	cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
1881
1882	/*
1883	 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1884	 * their counters to zero. We need a clean slate.
1885	 * We don't log our changes till later.
1886	 */
1887	if ((uip = XFS_QI_UQIP(mp))) {
1888		if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
1889			goto error_return;
1890		flags |= XFS_UQUOTA_CHKD;
1891	}
1892
1893	if ((gip = XFS_QI_GQIP(mp))) {
1894		if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1895					XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA)))
1896			goto error_return;
1897		flags |= XFS_OQUOTA_CHKD;
1898	}
1899
1900	do {
1901		/*
1902		 * Iterate thru all the inodes in the file system,
1903		 * adjusting the corresponding dquot counters in core.
1904		 */
1905		if ((error = xfs_bulkstat(mp, &lastino, &count,
1906				     xfs_qm_dqusage_adjust, NULL,
1907				     structsz, NULL, BULKSTAT_FG_IGET, &done)))
1908			break;
1909
1910	} while (! done);
1911
1912	/*
1913	 * We can get this error if we couldn't do a dquot allocation inside
1914	 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1915	 * dirty dquots that might be cached, we just want to get rid of them
1916	 * and turn quotaoff. The dquots won't be attached to any of the inodes
1917	 * at this point (because we intentionally didn't in dqget_noattach).
1918	 */
1919	if (error) {
1920		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
1921		goto error_return;
1922	}
1923	/*
1924	 * We've made all the changes that we need to make incore.
1925	 * Now flush_them down to disk buffers.
1926	 */
1927	xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
1928
1929	/*
1930	 * We didn't log anything, because if we crashed, we'll have to
1931	 * start the quotacheck from scratch anyway. However, we must make
1932	 * sure that our dquot changes are secure before we put the
1933	 * quotacheck'd stamp on the superblock. So, here we do a synchronous
1934	 * flush.
1935	 */
1936	XFS_bflush(mp->m_ddev_targp);
1937
1938	/*
1939	 * If one type of quotas is off, then it will lose its
1940	 * quotachecked status, since we won't be doing accounting for
1941	 * that type anymore.
1942	 */
1943	mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1944	mp->m_qflags |= flags;
1945
1946	XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
1947
1948 error_return:
1949	if (error) {
1950		cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): "
1951			"Disabling quotas.",
1952			mp->m_fsname, error);
1953		/*
1954		 * We must turn off quotas.
1955		 */
1956		ASSERT(mp->m_quotainfo != NULL);
1957		ASSERT(xfs_Gqm != NULL);
1958		xfs_qm_destroy_quotainfo(mp);
1959		(void)xfs_mount_reset_sbqflags(mp);
1960	} else {
1961		cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
1962	}
1963	return (error);
1964}
1965
1966/*
1967 * This is called after the superblock has been read in and we're ready to
1968 * iget the quota inodes.
1969 */
1970STATIC int
1971xfs_qm_init_quotainos(
1972	xfs_mount_t	*mp)
1973{
1974	xfs_inode_t	*uip, *gip;
1975	int		error;
1976	__int64_t	sbflags;
1977	uint		flags;
1978
1979	ASSERT(mp->m_quotainfo);
1980	uip = gip = NULL;
1981	sbflags = 0;
1982	flags = 0;
1983
1984	/*
1985	 * Get the uquota and gquota inodes
1986	 */
1987	if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
1988		if (XFS_IS_UQUOTA_ON(mp) &&
1989		    mp->m_sb.sb_uquotino != NULLFSINO) {
1990			ASSERT(mp->m_sb.sb_uquotino > 0);
1991			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1992					     0, 0, &uip, 0)))
1993				return XFS_ERROR(error);
1994		}
1995		if (XFS_IS_OQUOTA_ON(mp) &&
1996		    mp->m_sb.sb_gquotino != NULLFSINO) {
1997			ASSERT(mp->m_sb.sb_gquotino > 0);
1998			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1999					     0, 0, &gip, 0))) {
2000				if (uip)
2001					VN_RELE(XFS_ITOV(uip));
2002				return XFS_ERROR(error);
2003			}
2004		}
2005	} else {
2006		flags |= XFS_QMOPT_SBVERSION;
2007		sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
2008			    XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
2009	}
2010
2011	/*
2012	 * Create the two inodes, if they don't exist already. The changes
2013	 * made above will get added to a transaction and logged in one of
2014	 * the qino_alloc calls below.  If the device is readonly,
2015	 * temporarily switch to read-write to do this.
2016	 */
2017	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
2018		if ((error = xfs_qm_qino_alloc(mp, &uip,
2019					      sbflags | XFS_SB_UQUOTINO,
2020					      flags | XFS_QMOPT_UQUOTA)))
2021			return XFS_ERROR(error);
2022
2023		flags &= ~XFS_QMOPT_SBVERSION;
2024	}
2025	if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
2026		flags |= (XFS_IS_GQUOTA_ON(mp) ?
2027				XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
2028		error = xfs_qm_qino_alloc(mp, &gip,
2029					  sbflags | XFS_SB_GQUOTINO, flags);
2030		if (error) {
2031			if (uip)
2032				VN_RELE(XFS_ITOV(uip));
2033
2034			return XFS_ERROR(error);
2035		}
2036	}
2037
2038	XFS_QI_UQIP(mp) = uip;
2039	XFS_QI_GQIP(mp) = gip;
2040
2041	return 0;
2042}
2043
2044
2045/*
2046 * Traverse the freelist of dquots and attempt to reclaim a maximum of
2047 * 'howmany' dquots. This operation races with dqlookup(), and attempts to
2048 * favor the lookup function ...
2049 * XXXsup merge this with qm_reclaim_one().
2050 */
2051STATIC int
2052xfs_qm_shake_freelist(
2053	int howmany)
2054{
2055	int		nreclaimed;
2056	xfs_dqhash_t	*hash;
2057	xfs_dquot_t	*dqp, *nextdqp;
2058	int		restarts;
2059	int		nflushes;
2060
2061	if (howmany <= 0)
2062		return 0;
2063
2064	nreclaimed = 0;
2065	restarts = 0;
2066	nflushes = 0;
2067
2068#ifdef QUOTADEBUG
2069	cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
2070#endif
2071	/* lock order is : hashchainlock, freelistlock, mplistlock */
2072 tryagain:
2073	xfs_qm_freelist_lock(xfs_Gqm);
2074
2075	for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
2076	     ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
2077	      nreclaimed < howmany); ) {
2078		xfs_dqlock(dqp);
2079
2080		/*
2081		 * We are racing with dqlookup here. Naturally we don't
2082		 * want to reclaim a dquot that lookup wants.
2083		 */
2084		if (dqp->dq_flags & XFS_DQ_WANT) {
2085			xfs_dqunlock(dqp);
2086			xfs_qm_freelist_unlock(xfs_Gqm);
2087			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2088				return nreclaimed;
2089			XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2090			goto tryagain;
2091		}
2092
2093		/*
2094		 * If the dquot is inactive, we are assured that it is
2095		 * not on the mplist or the hashlist, and that makes our
2096		 * life easier.
2097		 */
2098		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2099			ASSERT(dqp->q_mount == NULL);
2100			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2101			ASSERT(dqp->HL_PREVP == NULL);
2102			ASSERT(dqp->MPL_PREVP == NULL);
2103			XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2104			nextdqp = dqp->dq_flnext;
2105			goto off_freelist;
2106		}
2107
2108		ASSERT(dqp->MPL_PREVP);
2109		/*
2110		 * Try to grab the flush lock. If this dquot is in the process of
2111		 * getting flushed to disk, we don't want to reclaim it.
2112		 */
2113		if (! xfs_qm_dqflock_nowait(dqp)) {
2114			xfs_dqunlock(dqp);
2115			dqp = dqp->dq_flnext;
2116			continue;
2117		}
2118
2119		/*
2120		 * We have the flush lock so we know that this is not in the
2121		 * process of being flushed. So, if this is dirty, flush it
2122		 * DELWRI so that we don't get a freelist infested with
2123		 * dirty dquots.
2124		 */
2125		if (XFS_DQ_IS_DIRTY(dqp)) {
2126			xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
2127			/*
2128			 * We flush it delayed write, so don't bother
2129			 * releasing the mplock.
2130			 */
2131			(void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2132			xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2133			dqp = dqp->dq_flnext;
2134			continue;
2135		}
2136		/*
2137		 * We're trying to get the hashlock out of order. This races
2138		 * with dqlookup; so, we giveup and goto the next dquot if
2139		 * we couldn't get the hashlock. This way, we won't starve
2140		 * a dqlookup process that holds the hashlock that is
2141		 * waiting for the freelist lock.
2142		 */
2143		if (! xfs_qm_dqhashlock_nowait(dqp)) {
2144			xfs_dqfunlock(dqp);
2145			xfs_dqunlock(dqp);
2146			dqp = dqp->dq_flnext;
2147			continue;
2148		}
2149		/*
2150		 * This races with dquot allocation code as well as dqflush_all
2151		 * and reclaim code. So, if we failed to grab the mplist lock,
2152		 * giveup everything and start over.
2153		 */
2154		hash = dqp->q_hash;
2155		ASSERT(hash);
2156		if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2157			xfs_dqfunlock(dqp);
2158			xfs_dqunlock(dqp);
2159			XFS_DQ_HASH_UNLOCK(hash);
2160			xfs_qm_freelist_unlock(xfs_Gqm);
2161			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2162				return nreclaimed;
2163			goto tryagain;
2164		}
2165		xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
2166#ifdef QUOTADEBUG
2167		cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2168			dqp, be32_to_cpu(dqp->q_core.d_id));
2169#endif
2170		ASSERT(dqp->q_nrefs == 0);
2171		nextdqp = dqp->dq_flnext;
2172		XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2173		XQM_HASHLIST_REMOVE(hash, dqp);
2174		xfs_dqfunlock(dqp);
2175		xfs_qm_mplist_unlock(dqp->q_mount);
2176		XFS_DQ_HASH_UNLOCK(hash);
2177
2178 off_freelist:
2179		XQM_FREELIST_REMOVE(dqp);
2180		xfs_dqunlock(dqp);
2181		nreclaimed++;
2182		XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
2183		xfs_qm_dqdestroy(dqp);
2184		dqp = nextdqp;
2185	}
2186	xfs_qm_freelist_unlock(xfs_Gqm);
2187	return nreclaimed;
2188}
2189
2190
2191/*
2192 * The kmem_shake interface is invoked when memory is running low.
2193 */
2194/* ARGSUSED */
2195STATIC int
2196xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
2197{
2198	int	ndqused, nfree, n;
2199
2200	if (!kmem_shake_allow(gfp_mask))
2201		return 0;
2202	if (!xfs_Gqm)
2203		return 0;
2204
2205	nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
2206	/* incore dquots in all f/s's */
2207	ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
2208
2209	ASSERT(ndqused >= 0);
2210
2211	if (nfree <= ndqused && nfree < ndquot)
2212		return 0;
2213
2214	ndqused *= xfs_Gqm->qm_dqfree_ratio;	/* target # of free dquots */
2215	n = nfree - ndqused - ndquot;		/* # over target */
2216
2217	return xfs_qm_shake_freelist(MAX(nfree, n));
2218}
2219
2220
2221/*
2222 * Just pop the least recently used dquot off the freelist and
2223 * recycle it. The returned dquot is locked.
2224 */
2225STATIC xfs_dquot_t *
2226xfs_qm_dqreclaim_one(void)
2227{
2228	xfs_dquot_t	*dqpout;
2229	xfs_dquot_t	*dqp;
2230	int		restarts;
2231	int		nflushes;
2232
2233	restarts = 0;
2234	dqpout = NULL;
2235	nflushes = 0;
2236
2237	/* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
2238 startagain:
2239	xfs_qm_freelist_lock(xfs_Gqm);
2240
2241	FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
2242		xfs_dqlock(dqp);
2243
2244		/*
2245		 * We are racing with dqlookup here. Naturally we don't
2246		 * want to reclaim a dquot that lookup wants. We release the
2247		 * freelist lock and start over, so that lookup will grab
2248		 * both the dquot and the freelistlock.
2249		 */
2250		if (dqp->dq_flags & XFS_DQ_WANT) {
2251			ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2252			xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT");
2253			xfs_dqunlock(dqp);
2254			xfs_qm_freelist_unlock(xfs_Gqm);
2255			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2256				return NULL;
2257			XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2258			goto startagain;
2259		}
2260
2261		/*
2262		 * If the dquot is inactive, we are assured that it is
2263		 * not on the mplist or the hashlist, and that makes our
2264		 * life easier.
2265		 */
2266		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2267			ASSERT(dqp->q_mount == NULL);
2268			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2269			ASSERT(dqp->HL_PREVP == NULL);
2270			ASSERT(dqp->MPL_PREVP == NULL);
2271			XQM_FREELIST_REMOVE(dqp);
2272			xfs_dqunlock(dqp);
2273			dqpout = dqp;
2274			XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2275			break;
2276		}
2277
2278		ASSERT(dqp->q_hash);
2279		ASSERT(dqp->MPL_PREVP);
2280
2281		/*
2282		 * Try to grab the flush lock. If this dquot is in the process of
2283		 * getting flushed to disk, we don't want to reclaim it.
2284		 */
2285		if (! xfs_qm_dqflock_nowait(dqp)) {
2286			xfs_dqunlock(dqp);
2287			continue;
2288		}
2289
2290		/*
2291		 * We have the flush lock so we know that this is not in the
2292		 * process of being flushed. So, if this is dirty, flush it
2293		 * DELWRI so that we don't get a freelist infested with
2294		 * dirty dquots.
2295		 */
2296		if (XFS_DQ_IS_DIRTY(dqp)) {
2297			xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
2298			/*
2299			 * We flush it delayed write, so don't bother
2300			 * releasing the freelist lock.
2301			 */
2302			(void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2303			xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2304			continue;
2305		}
2306
2307		if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2308			xfs_dqfunlock(dqp);
2309			xfs_dqunlock(dqp);
2310			continue;
2311		}
2312
2313		if (! xfs_qm_dqhashlock_nowait(dqp))
2314			goto mplistunlock;
2315
2316		ASSERT(dqp->q_nrefs == 0);
2317		xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
2318		XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2319		XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2320		XQM_FREELIST_REMOVE(dqp);
2321		dqpout = dqp;
2322		XFS_DQ_HASH_UNLOCK(dqp->q_hash);
2323 mplistunlock:
2324		xfs_qm_mplist_unlock(dqp->q_mount);
2325		xfs_dqfunlock(dqp);
2326		xfs_dqunlock(dqp);
2327		if (dqpout)
2328			break;
2329	}
2330
2331	xfs_qm_freelist_unlock(xfs_Gqm);
2332	return dqpout;
2333}
2334
2335
2336/*------------------------------------------------------------------*/
2337
2338/*
2339 * Return a new incore dquot. Depending on the number of
2340 * dquots in the system, we either allocate a new one on the kernel heap,
2341 * or reclaim a free one.
2342 * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
2343 * to reclaim an existing one from the freelist.
2344 */
2345boolean_t
2346xfs_qm_dqalloc_incore(
2347	xfs_dquot_t **O_dqpp)
2348{
2349	xfs_dquot_t	*dqp;
2350
2351	/*
2352	 * Check against high water mark to see if we want to pop
2353	 * a nincompoop dquot off the freelist.
2354	 */
2355	if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
2356		/*
2357		 * Try to recycle a dquot from the freelist.
2358		 */
2359		if ((dqp = xfs_qm_dqreclaim_one())) {
2360			XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
2361			memset(&dqp->q_core, 0, sizeof(dqp->q_core));
2362			*O_dqpp = dqp;
2363			return B_FALSE;
2364		}
2365		XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
2366	}
2367
2368	/*
2369	 * Allocate a brand new dquot on the kernel heap and return it
2370	 * to the caller to initialize.
2371	 */
2372	ASSERT(xfs_Gqm->qm_dqzone != NULL);
2373	*O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
2374	atomic_inc(&xfs_Gqm->qm_totaldquots);
2375
2376	return B_TRUE;
2377}
2378
2379
2380/*
2381 * Start a transaction and write the incore superblock changes to
2382 * disk. flags parameter indicates which fields have changed.
2383 */
2384int
2385xfs_qm_write_sb_changes(
2386	xfs_mount_t	*mp,
2387	__int64_t	flags)
2388{
2389	xfs_trans_t	*tp;
2390	int		error;
2391
2392#ifdef QUOTADEBUG
2393	cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname);
2394#endif
2395	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2396	if ((error = xfs_trans_reserve(tp, 0,
2397				      mp->m_sb.sb_sectsize + 128, 0,
2398				      0,
2399				      XFS_DEFAULT_LOG_COUNT))) {
2400		xfs_trans_cancel(tp, 0);
2401		return error;
2402	}
2403
2404	xfs_mod_sb(tp, flags);
2405	(void) xfs_trans_commit(tp, 0);
2406
2407	return 0;
2408}
2409
2410
2411/* --------------- utility functions for vnodeops ---------------- */
2412
2413
2414/*
2415 * Given an inode, a uid and gid (from cred_t) make sure that we have
2416 * allocated relevant dquot(s) on disk, and that we won't exceed inode
2417 * quotas by creating this file.
2418 * This also attaches dquot(s) to the given inode after locking it,
2419 * and returns the dquots corresponding to the uid and/or gid.
2420 *
2421 * in	: inode (unlocked)
2422 * out	: udquot, gdquot with references taken and unlocked
2423 */
2424int
2425xfs_qm_vop_dqalloc(
2426	xfs_mount_t	*mp,
2427	xfs_inode_t	*ip,
2428	uid_t		uid,
2429	gid_t		gid,
2430	prid_t		prid,
2431	uint		flags,
2432	xfs_dquot_t	**O_udqpp,
2433	xfs_dquot_t	**O_gdqpp)
2434{
2435	int		error;
2436	xfs_dquot_t	*uq, *gq;
2437	uint		lockflags;
2438
2439	if (!XFS_IS_QUOTA_ON(mp))
2440		return 0;
2441
2442	lockflags = XFS_ILOCK_EXCL;
2443	xfs_ilock(ip, lockflags);
2444
2445	if ((flags & XFS_QMOPT_INHERIT) &&
2446	    XFS_INHERIT_GID(ip, XFS_MTOVFS(mp)))
2447		gid = ip->i_d.di_gid;
2448
2449	/*
2450	 * Attach the dquot(s) to this inode, doing a dquot allocation
2451	 * if necessary. The dquot(s) will not be locked.
2452	 */
2453	if (XFS_NOT_DQATTACHED(mp, ip)) {
2454		if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
2455					    XFS_QMOPT_ILOCKED))) {
2456			xfs_iunlock(ip, lockflags);
2457			return error;
2458		}
2459	}
2460
2461	uq = gq = NULL;
2462	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
2463		if (ip->i_d.di_uid != uid) {
2464			/*
2465			 * What we need is the dquot that has this uid, and
2466			 * if we send the inode to dqget, the uid of the inode
2467			 * takes priority over what's sent in the uid argument.
2468			 * We must unlock inode here before calling dqget if
2469			 * we're not sending the inode, because otherwise
2470			 * we'll deadlock by doing trans_reserve while
2471			 * holding ilock.
2472			 */
2473			xfs_iunlock(ip, lockflags);
2474			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
2475						 XFS_DQ_USER,
2476						 XFS_QMOPT_DQALLOC |
2477						 XFS_QMOPT_DOWARN,
2478						 &uq))) {
2479				ASSERT(error != ENOENT);
2480				return error;
2481			}
2482			/*
2483			 * Get the ilock in the right order.
2484			 */
2485			xfs_dqunlock(uq);
2486			lockflags = XFS_ILOCK_SHARED;
2487			xfs_ilock(ip, lockflags);
2488		} else {
2489			/*
2490			 * Take an extra reference, because we'll return
2491			 * this to caller
2492			 */
2493			ASSERT(ip->i_udquot);
2494			uq = ip->i_udquot;
2495			xfs_dqlock(uq);
2496			XFS_DQHOLD(uq);
2497			xfs_dqunlock(uq);
2498		}
2499	}
2500	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
2501		if (ip->i_d.di_gid != gid) {
2502			xfs_iunlock(ip, lockflags);
2503			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
2504						 XFS_DQ_GROUP,
2505						 XFS_QMOPT_DQALLOC |
2506						 XFS_QMOPT_DOWARN,
2507						 &gq))) {
2508				if (uq)
2509					xfs_qm_dqrele(uq);
2510				ASSERT(error != ENOENT);
2511				return error;
2512			}
2513			xfs_dqunlock(gq);
2514			lockflags = XFS_ILOCK_SHARED;
2515			xfs_ilock(ip, lockflags);
2516		} else {
2517			ASSERT(ip->i_gdquot);
2518			gq = ip->i_gdquot;
2519			xfs_dqlock(gq);
2520			XFS_DQHOLD(gq);
2521			xfs_dqunlock(gq);
2522		}
2523	} else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2524		if (ip->i_d.di_projid != prid) {
2525			xfs_iunlock(ip, lockflags);
2526			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
2527						 XFS_DQ_PROJ,
2528						 XFS_QMOPT_DQALLOC |
2529						 XFS_QMOPT_DOWARN,
2530						 &gq))) {
2531				if (uq)
2532					xfs_qm_dqrele(uq);
2533				ASSERT(error != ENOENT);
2534				return (error);
2535			}
2536			xfs_dqunlock(gq);
2537			lockflags = XFS_ILOCK_SHARED;
2538			xfs_ilock(ip, lockflags);
2539		} else {
2540			ASSERT(ip->i_gdquot);
2541			gq = ip->i_gdquot;
2542			xfs_dqlock(gq);
2543			XFS_DQHOLD(gq);
2544			xfs_dqunlock(gq);
2545		}
2546	}
2547	if (uq)
2548		xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
2549
2550	xfs_iunlock(ip, lockflags);
2551	if (O_udqpp)
2552		*O_udqpp = uq;
2553	else if (uq)
2554		xfs_qm_dqrele(uq);
2555	if (O_gdqpp)
2556		*O_gdqpp = gq;
2557	else if (gq)
2558		xfs_qm_dqrele(gq);
2559	return 0;
2560}
2561
2562/*
2563 * Actually transfer ownership, and do dquot modifications.
2564 * These were already reserved.
2565 */
2566xfs_dquot_t *
2567xfs_qm_vop_chown(
2568	xfs_trans_t	*tp,
2569	xfs_inode_t	*ip,
2570	xfs_dquot_t	**IO_olddq,
2571	xfs_dquot_t	*newdq)
2572{
2573	xfs_dquot_t	*prevdq;
2574	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
2575				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2576
2577	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
2578	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2579
2580	/* old dquot */
2581	prevdq = *IO_olddq;
2582	ASSERT(prevdq);
2583	ASSERT(prevdq != newdq);
2584
2585	xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
2586	xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
2587
2588	/* the sparkling new dquot */
2589	xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
2590	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2591
2592	/*
2593	 * Take an extra reference, because the inode
2594	 * is going to keep this dquot pointer even
2595	 * after the trans_commit.
2596	 */
2597	xfs_dqlock(newdq);
2598	XFS_DQHOLD(newdq);
2599	xfs_dqunlock(newdq);
2600	*IO_olddq = newdq;
2601
2602	return prevdq;
2603}
2604
2605/*
2606 * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
2607 */
2608int
2609xfs_qm_vop_chown_reserve(
2610	xfs_trans_t	*tp,
2611	xfs_inode_t	*ip,
2612	xfs_dquot_t	*udqp,
2613	xfs_dquot_t	*gdqp,
2614	uint		flags)
2615{
2616	int		error;
2617	xfs_mount_t	*mp;
2618	uint		delblks, blkflags, prjflags = 0;
2619	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq;
2620
2621	ASSERT(XFS_ISLOCKED_INODE(ip));
2622	mp = ip->i_mount;
2623	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2624
2625	delblks = ip->i_delayed_blks;
2626	delblksudq = delblksgdq = unresudq = unresgdq = NULL;
2627	blkflags = XFS_IS_REALTIME_INODE(ip) ?
2628			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
2629
2630	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
2631	    ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
2632		delblksudq = udqp;
2633		/*
2634		 * If there are delayed allocation blocks, then we have to
2635		 * unreserve those from the old dquot, and add them to the
2636		 * new dquot.
2637		 */
2638		if (delblks) {
2639			ASSERT(ip->i_udquot);
2640			unresudq = ip->i_udquot;
2641		}
2642	}
2643	if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
2644		if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
2645		     ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))
2646			prjflags = XFS_QMOPT_ENOSPC;
2647
2648		if (prjflags ||
2649		    (XFS_IS_GQUOTA_ON(ip->i_mount) &&
2650		     ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
2651			delblksgdq = gdqp;
2652			if (delblks) {
2653				ASSERT(ip->i_gdquot);
2654				unresgdq = ip->i_gdquot;
2655			}
2656		}
2657	}
2658
2659	if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2660				delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
2661				flags | blkflags | prjflags)))
2662		return (error);
2663
2664	/*
2665	 * Do the delayed blks reservations/unreservations now. Since, these
2666	 * are done without the help of a transaction, if a reservation fails
2667	 * its previous reservations won't be automatically undone by trans
2668	 * code. So, we have to do it manually here.
2669	 */
2670	if (delblks) {
2671		/*
2672		 * Do the reservations first. Unreservation can't fail.
2673		 */
2674		ASSERT(delblksudq || delblksgdq);
2675		ASSERT(unresudq || unresgdq);
2676		if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2677				delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
2678				flags | blkflags | prjflags)))
2679			return (error);
2680		xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2681				unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
2682				blkflags);
2683	}
2684
2685	return (0);
2686}
2687
2688int
2689xfs_qm_vop_rename_dqattach(
2690	xfs_inode_t	**i_tab)
2691{
2692	xfs_inode_t	*ip;
2693	int		i;
2694	int		error;
2695
2696	ip = i_tab[0];
2697
2698	if (! XFS_IS_QUOTA_ON(ip->i_mount))
2699		return 0;
2700
2701	if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
2702		error = xfs_qm_dqattach(ip, 0);
2703		if (error)
2704			return error;
2705	}
2706	for (i = 1; (i < 4 && i_tab[i]); i++) {
2707		/*
2708		 * Watch out for duplicate entries in the table.
2709		 */
2710		if ((ip = i_tab[i]) != i_tab[i-1]) {
2711			if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
2712				error = xfs_qm_dqattach(ip, 0);
2713				if (error)
2714					return error;
2715			}
2716		}
2717	}
2718	return 0;
2719}
2720
2721void
2722xfs_qm_vop_dqattach_and_dqmod_newinode(
2723	xfs_trans_t	*tp,
2724	xfs_inode_t	*ip,
2725	xfs_dquot_t	*udqp,
2726	xfs_dquot_t	*gdqp)
2727{
2728	if (!XFS_IS_QUOTA_ON(tp->t_mountp))
2729		return;
2730
2731	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
2732	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
2733
2734	if (udqp) {
2735		xfs_dqlock(udqp);
2736		XFS_DQHOLD(udqp);
2737		xfs_dqunlock(udqp);
2738		ASSERT(ip->i_udquot == NULL);
2739		ip->i_udquot = udqp;
2740		ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
2741		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2742		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2743	}
2744	if (gdqp) {
2745		xfs_dqlock(gdqp);
2746		XFS_DQHOLD(gdqp);
2747		xfs_dqunlock(gdqp);
2748		ASSERT(ip->i_gdquot == NULL);
2749		ip->i_gdquot = gdqp;
2750		ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
2751		ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
2752			ip->i_d.di_gid : ip->i_d.di_projid) ==
2753				be32_to_cpu(gdqp->q_core.d_id));
2754		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2755	}
2756}
2757
2758/* ------------- list stuff -----------------*/
2759STATIC void
2760xfs_qm_freelist_init(xfs_frlist_t *ql)
2761{
2762	ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
2763	mutex_init(&ql->qh_lock);
2764	ql->qh_version = 0;
2765	ql->qh_nelems = 0;
2766}
2767
2768STATIC void
2769xfs_qm_freelist_destroy(xfs_frlist_t *ql)
2770{
2771	xfs_dquot_t	*dqp, *nextdqp;
2772
2773	mutex_lock(&ql->qh_lock);
2774	for (dqp = ql->qh_next;
2775	     dqp != (xfs_dquot_t *)ql; ) {
2776		xfs_dqlock(dqp);
2777		nextdqp = dqp->dq_flnext;
2778#ifdef QUOTADEBUG
2779		cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
2780#endif
2781		XQM_FREELIST_REMOVE(dqp);
2782		xfs_dqunlock(dqp);
2783		xfs_qm_dqdestroy(dqp);
2784		dqp = nextdqp;
2785	}
2786	mutex_unlock(&ql->qh_lock);
2787	mutex_destroy(&ql->qh_lock);
2788
2789	ASSERT(ql->qh_nelems == 0);
2790}
2791
2792STATIC void
2793xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
2794{
2795	dq->dq_flnext = ql->qh_next;
2796	dq->dq_flprev = (xfs_dquot_t *)ql;
2797	ql->qh_next = dq;
2798	dq->dq_flnext->dq_flprev = dq;
2799	xfs_Gqm->qm_dqfreelist.qh_nelems++;
2800	xfs_Gqm->qm_dqfreelist.qh_version++;
2801}
2802
2803void
2804xfs_qm_freelist_unlink(xfs_dquot_t *dq)
2805{
2806	xfs_dquot_t *next = dq->dq_flnext;
2807	xfs_dquot_t *prev = dq->dq_flprev;
2808
2809	next->dq_flprev = prev;
2810	prev->dq_flnext = next;
2811	dq->dq_flnext = dq->dq_flprev = dq;
2812	xfs_Gqm->qm_dqfreelist.qh_nelems--;
2813	xfs_Gqm->qm_dqfreelist.qh_version++;
2814}
2815
2816void
2817xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
2818{
2819	xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
2820}
2821
2822STATIC int
2823xfs_qm_dqhashlock_nowait(
2824	xfs_dquot_t *dqp)
2825{
2826	int locked;
2827
2828	locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
2829	return locked;
2830}
2831
2832int
2833xfs_qm_freelist_lock_nowait(
2834	xfs_qm_t *xqm)
2835{
2836	int locked;
2837
2838	locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
2839	return locked;
2840}
2841
2842STATIC int
2843xfs_qm_mplist_nowait(
2844	xfs_mount_t	*mp)
2845{
2846	int locked;
2847
2848	ASSERT(mp->m_quotainfo);
2849	locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
2850	return locked;
2851}
2852