quota.c revision 4321:a8930ec16e52
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27/*	  All Rights Reserved  	*/
28
29/*
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
33 *
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
37 */
38
39
40#pragma ident	"%Z%%M%	%I%	%E% SMI"
41
42/*
43 * Code pertaining to management of the in-core data structures.
44 */
45#include <sys/types.h>
46#include <sys/t_lock.h>
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/signal.h>
50#include <sys/errno.h>
51#include <sys/user.h>
52#include <sys/proc.h>
53#include <sys/vfs.h>
54#include <sys/vnode.h>
55#include <sys/uio.h>
56#include <sys/buf.h>
57#include <sys/fs/ufs_fs.h>
58#include <sys/fs/ufs_inode.h>
59#include <sys/fs/ufs_quota.h>
60#include <sys/cmn_err.h>
61#include <sys/kmem.h>
62#include <sys/debug.h>
63#include <sys/file.h>
64#include <sys/fs/ufs_panic.h>
65#include <sys/var.h>
66
67
68/*
69 * Dquot in core hash chain headers
70 */
71struct	dqhead	dqhead[NDQHASH];
72
73static kmutex_t dq_cachelock;
74static kmutex_t dq_freelock;
75
76krwlock_t dq_rwlock;
77
78/*
79 * Dquot free list.
80 */
81struct dquot dqfreelist;
82
83#define	dqinsheadfree(DQP) { \
84	mutex_enter(&dq_freelock); \
85	(DQP)->dq_freef = dqfreelist.dq_freef; \
86	(DQP)->dq_freeb = &dqfreelist; \
87	dqfreelist.dq_freef->dq_freeb = (DQP); \
88	dqfreelist.dq_freef = (DQP); \
89	mutex_exit(&dq_freelock); \
90}
91
92#define	dqinstailfree(DQP) { \
93	mutex_enter(&dq_freelock); \
94	(DQP)->dq_freeb = dqfreelist.dq_freeb; \
95	(DQP)->dq_freef = &dqfreelist; \
96	dqfreelist.dq_freeb->dq_freef = (DQP); \
97	dqfreelist.dq_freeb = (DQP); \
98	mutex_exit(&dq_freelock); \
99}
100
101/* (clear pointers to make sure we don't use them; catch problems early) */
102#define	dqremfree(DQP) { \
103	(DQP)->dq_freeb->dq_freef = (DQP)->dq_freef; \
104	(DQP)->dq_freef->dq_freeb = (DQP)->dq_freeb; \
105	(DQP)->dq_freef = (DQP)->dq_freeb = NULL; \
106}
107
108typedef	struct dquot *DQptr;
109
110/*
111 * Initialize quota sub-system init lock.
112 */
113void
114qtinit()
115{
116	rw_init(&dq_rwlock, NULL, RW_DEFAULT, NULL);
117}
118
119/*
120 * qtinit2 allocated space for the quota structures.  Only do this if
121 * if quotas are going to be used so that we can save the space if quotas
122 * aren't used.
123 */
124void
125qtinit2(void)
126{
127	register struct dqhead *dhp;
128	register struct dquot *dqp;
129
130	ASSERT(RW_WRITE_HELD(&dq_rwlock));
131
132	if (ndquot == 0)
133		ndquot = ((maxusers * NMOUNT) / 4) + v.v_proc;
134
135	dquot = kmem_zalloc(ndquot * sizeof (struct dquot), KM_SLEEP);
136	dquotNDQUOT = dquot + ndquot;
137
138	/*
139	 * Initialize the cache between the in-core structures
140	 * and the per-file system quota files on disk.
141	 */
142	for (dhp = &dqhead[0]; dhp < &dqhead[NDQHASH]; dhp++) {
143		dhp->dqh_forw = dhp->dqh_back = (DQptr)dhp;
144	}
145	dqfreelist.dq_freef = dqfreelist.dq_freeb = (DQptr)&dqfreelist;
146	for (dqp = dquot; dqp < dquotNDQUOT; dqp++) {
147		mutex_init(&dqp->dq_lock, NULL, MUTEX_DEFAULT, NULL);
148		dqp->dq_forw = dqp->dq_back = dqp;
149		dqinsheadfree(dqp);
150	}
151}
152
153/*
154 * Obtain the user's on-disk quota limit for file system specified.
155 * dqpp is returned locked.
156 */
157int
158getdiskquota(
159	uid_t uid,
160	struct ufsvfs *ufsvfsp,
161	int force,			/* don't do enable checks */
162	struct dquot **dqpp)		/* resulting dquot ptr */
163{
164	struct dquot *dqp;
165	struct dqhead *dhp;
166	struct inode *qip;
167	int error;
168	extern struct cred *kcred;
169	daddr_t	bn;
170	int contig;
171	int err;
172
173	ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
174
175	dhp = &dqhead[DQHASH(uid, ufsvfsp)];
176loop:
177	/*
178	 * Check for quotas enabled.
179	 */
180	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0 && !force)
181		return (ESRCH);
182	qip = ufsvfsp->vfs_qinod;
183	if (!qip)
184		return (ufs_fault(ufsvfsp->vfs_root, "getdiskquota: NULL qip"));
185	/*
186	 * Check the cache first.
187	 */
188	mutex_enter(&dq_cachelock);
189	for (dqp = dhp->dqh_forw; dqp != (DQptr)dhp; dqp = dqp->dq_forw) {
190		if (dqp->dq_uid != uid || dqp->dq_ufsvfsp != ufsvfsp)
191			continue;
192		mutex_exit(&dq_cachelock);
193		mutex_enter(&dqp->dq_lock);
194		/*
195		 * I may have slept in the mutex_enter.  Make sure this is
196		 * still the one I want.
197		 */
198		if (dqp->dq_uid != uid || dqp->dq_ufsvfsp != ufsvfsp) {
199			mutex_exit(&dqp->dq_lock);
200			goto loop;
201		}
202		if (dqp->dq_flags & DQ_ERROR) {
203			mutex_exit(&dqp->dq_lock);
204			return (EINVAL);
205		}
206		/*
207		 * Cache hit with no references.
208		 * Take the structure off the free list.
209		 */
210		if (dqp->dq_cnt == 0) {
211			mutex_enter(&dq_freelock);
212			dqremfree(dqp);
213			mutex_exit(&dq_freelock);
214		}
215		dqp->dq_cnt++;
216		mutex_exit(&dqp->dq_lock);
217		*dqpp = dqp;
218		return (0);
219	}
220	/*
221	 * Not in cache.
222	 * Get dquot at head of free list.
223	 */
224	mutex_enter(&dq_freelock);
225	if ((dqp = dqfreelist.dq_freef) == &dqfreelist) {
226		mutex_exit(&dq_freelock);
227		mutex_exit(&dq_cachelock);
228		cmn_err(CE_WARN, "dquot table full");
229		return (EUSERS);
230	}
231
232	if (dqp->dq_cnt != 0 || dqp->dq_flags != 0) {
233		panic("getdiskquota: dqp->dq_cnt: "
234		    "%ld != 0 || dqp->dq_flags: 0x%x != 0 (%s)",
235		    dqp->dq_cnt, dqp->dq_flags, qip->i_fs->fs_fsmnt);
236		/*NOTREACHED*/
237	}
238	/*
239	 * Take it off the free list, and off the hash chain it was on.
240	 * Then put it on the new hash chain.
241	 */
242	dqremfree(dqp);
243	mutex_exit(&dq_freelock);
244	remque(dqp);
245	dqp->dq_cnt = 1;
246	dqp->dq_uid = uid;
247	dqp->dq_ufsvfsp = ufsvfsp;
248	dqp->dq_mof = UFS_HOLE;
249	mutex_enter(&dqp->dq_lock);
250	insque(dqp, dhp);
251	mutex_exit(&dq_cachelock);
252	/*
253	 * Check the uid in case it's too large to fit into the 2Gbyte
254	 * 'quotas' file (higher than 67 million or so).
255	 */
256
257	/*
258	 * Large Files: i_size need to be accessed atomically now.
259	 */
260	rw_enter(&qip->i_contents, RW_READER);
261	if (uid <= MAXUID && dqoff(uid) >= 0 && dqoff(uid) < qip->i_size) {
262		/*
263		 * Read quota info off disk.
264		 */
265		error = ufs_rdwri(UIO_READ, FREAD, qip, (caddr_t)&dqp->dq_dqb,
266		    sizeof (struct dqblk), dqoff(uid), UIO_SYSSPACE,
267		    (int *)NULL, kcred);
268		/*
269		 * We must set the dq_mof even if not we are not logging in case
270		 * we are later remount to logging.
271		 */
272		err = bmap_read(qip, dqoff(uid), &bn, &contig);
273		rw_exit(&qip->i_contents);
274		if ((bn != UFS_HOLE) && !err) {
275			dqp->dq_mof = ldbtob(bn) +
276			(offset_t)(dqoff(uid) & (DEV_BSIZE - 1));
277		} else {
278			dqp->dq_mof = UFS_HOLE;
279		}
280		if (error) {
281			/*
282			 * I/O error in reading quota file.
283			 * Put dquot on a private, unfindable hash list,
284			 * put dquot at the head of the free list and
285			 * reflect the problem to caller.
286			 */
287			dqp->dq_flags = DQ_ERROR;
288			/*
289			 * I must exit the dq_lock so that I can acquire the
290			 * dq_cachelock.  If another thread finds dqp before
291			 * I remove it from the cache it will see the
292			 * DQ_ERROR and just return EIO.
293			 */
294			mutex_exit(&dqp->dq_lock);
295			mutex_enter(&dq_cachelock);
296			mutex_enter(&dqp->dq_lock);
297			remque(dqp);
298			mutex_exit(&dqp->dq_lock);
299			mutex_exit(&dq_cachelock);
300			/*
301			 * Don't bother reacquiring dq_lock because the dq is
302			 * not on the freelist or in the cache so only I have
303			 * access to it.
304			 */
305			dqp->dq_cnt = 0;
306			dqp->dq_ufsvfsp = NULL;
307			dqp->dq_forw = dqp;
308			dqp->dq_back = dqp;
309			dqp->dq_mof = UFS_HOLE;
310			dqp->dq_flags = 0;
311			dqinsheadfree(dqp);
312			return (EIO);
313		}
314	} else {
315		rw_exit(&qip->i_contents);	/* done with i_size */
316		bzero(&dqp->dq_dqb, sizeof (struct dqblk));
317		dqp->dq_mof = UFS_HOLE;
318	}
319	mutex_exit(&dqp->dq_lock);
320	*dqpp = dqp;
321	return (0);
322}
323
324/*
325 * Release dquot.
326 */
327void
328dqput(dqp)
329	register struct dquot *dqp;
330{
331
332	ASSERT(dqp->dq_ufsvfsp == NULL ||
333		RW_LOCK_HELD(&dqp->dq_ufsvfsp->vfs_dqrwlock));
334	ASSERT(MUTEX_HELD(&dqp->dq_lock));
335	if (dqp->dq_cnt == 0) {
336		(void) ufs_fault(
337			dqp->dq_ufsvfsp && dqp->dq_ufsvfsp->vfs_root?
338			dqp->dq_ufsvfsp->vfs_root: NULL,
339						    "dqput: dqp->dq_cnt == 0");
340		return;
341	}
342	if (--dqp->dq_cnt == 0) {
343		if (dqp->dq_flags & DQ_MOD)
344			dqupdate(dqp);
345		/*
346		 * DQ_MOD was cleared by dqupdate().
347		 * DQ_ERROR shouldn't be set if this dquot was being used.
348		 * DQ_FILES/DQ_BLKS don't matter at this point.
349		 */
350		dqp->dq_flags = 0;
351		if (dqp->dq_ufsvfsp == NULL ||
352		    dqp->dq_ufsvfsp->vfs_qflags == 0) {
353			/* quotas are disabled, discard this dquot struct */
354			dqinval(dqp);
355		} else
356			dqinstailfree(dqp);
357	}
358}
359
360/*
361 * Update on disk quota info.
362 */
363void
364dqupdate(dqp)
365	register struct dquot *dqp;
366{
367	register struct inode *qip;
368	extern struct cred *kcred;
369	struct ufsvfs	*ufsvfsp;
370	int		newtrans	= 0;
371	struct vnode	*vfs_root;
372
373	ASSERT(MUTEX_HELD(&dqp->dq_lock));
374
375	if (!dqp->dq_ufsvfsp) {
376		(void) ufs_fault(NULL, "dqupdate: NULL dq_ufsvfsp");
377		return;
378	}
379	vfs_root = dqp->dq_ufsvfsp->vfs_root;
380	if (!vfs_root) {
381		(void) ufs_fault(NULL, "dqupdate: NULL vfs_root");
382		return;
383	}
384	/*
385	 * I don't need to hold dq_rwlock when looking at vfs_qinod here
386	 * because vfs_qinod is only cleared by closedq after it has called
387	 * dqput on all dq's.  Since I am holding dq_lock on this dq, closedq
388	 * will have to wait until I am done before it can call dqput on
389	 * this dq so vfs_qinod will not change value until after I return.
390	 */
391	qip = dqp->dq_ufsvfsp->vfs_qinod;
392	if (!qip) {
393		(void) ufs_fault(vfs_root, "dqupdate: NULL vfs_qinod");
394		return;
395	}
396	ufsvfsp = qip->i_ufsvfs;
397	if (!ufsvfsp) {
398		(void) ufs_fault(vfs_root,
399				    "dqupdate: NULL vfs_qinod->i_ufsvfs");
400		return;
401	}
402	if (ufsvfsp != dqp->dq_ufsvfsp) {
403		(void) ufs_fault(vfs_root,
404			    "dqupdate: vfs_qinod->i_ufsvfs != dqp->dq_ufsvfsp");
405		return;
406	}
407	if (!(dqp->dq_flags & DQ_MOD)) {
408		(void) ufs_fault(vfs_root,
409				    "dqupdate: !(dqp->dq_flags & DQ_MOD)");
410		return;
411	}
412
413	if (!(curthread->t_flag & T_DONTBLOCK)) {
414		newtrans++;
415		curthread->t_flag |= T_DONTBLOCK;
416		TRANS_BEGIN_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE);
417	}
418	if (TRANS_ISTRANS(ufsvfsp)) {
419		TRANS_DELTA(ufsvfsp, dqp->dq_mof, sizeof (struct dqblk),
420		    DT_QR, 0, 0);
421		TRANS_LOG(ufsvfsp, (caddr_t)&dqp->dq_dqb, dqp->dq_mof,
422		    (int)(sizeof (struct dqblk)), NULL, 0);
423	} else {
424		/*
425		 * Locknest gets very confused when I lock the quota inode.
426		 * It thinks that qip and ip (the inode that caused the
427		 * quota routines to get called) are the same inode.
428		 */
429		rw_enter(&qip->i_contents, RW_WRITER);
430		/*
431		 * refuse to push if offset would be illegal
432		 */
433		if (dqoff(dqp->dq_uid) >= 0) {
434			(void) ufs_rdwri(UIO_WRITE, FWRITE, qip,
435					(caddr_t)&dqp->dq_dqb,
436					sizeof (struct dqblk),
437					dqoff(dqp->dq_uid), UIO_SYSSPACE,
438					(int *)NULL, kcred);
439		}
440		rw_exit(&qip->i_contents);
441	}
442
443	dqp->dq_flags &= ~DQ_MOD;
444	if (newtrans) {
445		TRANS_END_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE);
446		curthread->t_flag &= ~T_DONTBLOCK;
447	}
448}
449
450/*
451 * Invalidate a dquot.  This function is called when quotas are disabled
452 * for a specific file system via closedq() or when we unmount the file
453 * system and invalidate the quota cache via invalidatedq().
454 *
455 * Take the dquot off its hash list and put it on a private, unfindable
456 * hash list (refers to itself). Also, put it at the head of the free list.
457 * Note that even though dq_cnt is zero, this dquot is NOT yet on the
458 * freelist.
459 */
460void
461dqinval(dqp)
462	register struct dquot *dqp;
463{
464	ASSERT(MUTEX_HELD(&dqp->dq_lock));
465	ASSERT(dqp->dq_cnt == 0);
466	ASSERT(dqp->dq_flags == 0);
467	ASSERT(dqp->dq_freef == NULL && dqp->dq_freeb == NULL);
468	ASSERT(dqp->dq_ufsvfsp &&
469		(dqp->dq_ufsvfsp->vfs_qflags & MQ_ENABLED) == 0);
470
471	/*
472	 * To preserve lock order, we have to drop dq_lock in order to
473	 * grab dq_cachelock.  To prevent someone from grabbing this
474	 * dquot from the quota cache via getdiskquota() while we are
475	 * "unsafe", we clear dq_ufsvfsp so it won't match anything.
476	 */
477	dqp->dq_ufsvfsp = NULL;
478	mutex_exit(&dqp->dq_lock);
479	mutex_enter(&dq_cachelock);
480	mutex_enter(&dqp->dq_lock);
481
482	/*
483	 * The following paranoia is to make sure that getdiskquota()
484	 * has not been broken:
485	 */
486	ASSERT(dqp->dq_cnt == 0);
487	ASSERT(dqp->dq_flags == 0);
488	ASSERT(dqp->dq_freef == NULL && dqp->dq_freeb == NULL);
489	ASSERT(dqp->dq_ufsvfsp == NULL);
490
491	/*
492	 * Now we have the locks in the right order so we can do the
493	 * rest of the work.
494	 */
495	remque(dqp);
496	mutex_exit(&dq_cachelock);
497	dqp->dq_forw = dqp;
498	dqp->dq_back = dqp;
499	dqinsheadfree(dqp);
500}
501
502/*
503 * Invalidate all quota information records for the specified file system.
504 */
505void
506invalidatedq(ufsvfsp)
507	register struct ufsvfs *ufsvfsp;
508{
509	register struct dquot *dqp;
510
511
512	/*
513	 * If quotas are not initialized, then there is nothing to do.
514	 */
515	rw_enter(&dq_rwlock, RW_READER);
516	if (!quotas_initialized) {
517		rw_exit(&dq_rwlock);
518		return;
519	}
520	rw_exit(&dq_rwlock);
521
522
523	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_WRITER);
524
525	ASSERT((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0);
526
527	/*
528	 * Invalidate all the quota info records for this file system
529	 * that are in the quota cache:
530	 */
531	for (dqp = dquot; dqp < dquotNDQUOT; dqp++) {
532		/*
533		 * If someone else has it, then ignore it. For the target
534		 * file system, this is okay for three reasons:
535		 *
536		 * 1) This routine is called after closedq() so the quota
537		 *    sub-system is disabled for this file system.
538		 * 2) We have made the quota sub-system quiescent for
539		 *    this file system.
540		 * 3) We are in the process of unmounting this file
541		 *    system so the quota sub-system can't be enabled
542		 *    for it.
543		 */
544		if (!mutex_tryenter(&dqp->dq_lock)) {
545			continue;
546		}
547
548
549		/*
550		 * At this point, any quota info records that are
551		 * associated with the target file system, should have a
552		 * reference count of zero and be on the free list.
553		 * Why? Because these quota info records went to a zero
554		 * dq_cnt (via dqput()) before the file system was
555		 * unmounted and are waiting to be found in the quota
556		 * cache and reused (via getdiskquota()). The exception
557		 * is when a quota transaction is sitting in the deltamap,
558		 * indicated by DQ_TRANS being set in dq_flags.
559		 * This causes a reference to be held on the quota
560		 * information record and it will only be cleared once
561		 * the transaction has reached the log. If we find
562		 * any of these - we ignore them and let logging do
563		 * the right thing.
564		 */
565		if (dqp->dq_ufsvfsp == ufsvfsp) {
566			ASSERT(dqp->dq_cnt == 0 || (dqp->dq_cnt == 1 &&
567			    (dqp->dq_flags & DQ_TRANS)));
568
569			/* Cope with those orphaned dquots. */
570			if (dqp->dq_cnt == 1 && (dqp->dq_flags & DQ_TRANS)) {
571				mutex_exit(&dqp->dq_lock);
572				continue;
573			}
574
575			ASSERT(dqp->dq_cnt == 0);
576			ASSERT(dqp->dq_freef && dqp->dq_freeb);
577
578			/*
579			 * Take the quota info record off the free list
580			 * so dqinval() can do its job (and put it on the
581			 * front of the free list).
582			 */
583			mutex_enter(&dq_freelock);
584			dqremfree(dqp);
585			mutex_exit(&dq_freelock);
586			dqinval(dqp);
587		}
588
589		mutex_exit(&dqp->dq_lock);
590	}
591	rw_exit(&ufsvfsp->vfs_dqrwlock);
592}
593