1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir.h"
28#include "xfs_dir2.h"
29#include "xfs_dmapi.h"
30#include "xfs_mount.h"
31#include "xfs_bmap_btree.h"
32#include "xfs_dir_sf.h"
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h"
36#include "xfs_inode.h"
37#include "xfs_inode_item.h"
38#include "xfs_bmap.h"
39#include "xfs_error.h"
40#include "xfs_quota.h"
41#include "xfs_rw.h"
42#include "xfs_itable.h"
43#include "xfs_utils.h"
44
45/*
46 * xfs_get_dir_entry is used to get a reference to an inode given
47 * its parent directory inode and the name of the file.	 It does
48 * not lock the child inode, and it unlocks the directory before
49 * returning.  The directory's generation number is returned for
50 * use by a later call to xfs_lock_dir_and_entry.
51 */
52int
53xfs_get_dir_entry(
54	vname_t		*dentry,
55	xfs_inode_t	**ipp)
56{
57	xfs_vnode_t	*vp;
58
59	vp = VNAME_TO_VNODE(dentry);
60
61	*ipp = xfs_vtoi(vp);
62	if (!*ipp)
63		return XFS_ERROR(ENOENT);
64	VN_HOLD(vp);
65	return 0;
66}
67
68int
69xfs_dir_lookup_int(
70	bhv_desc_t	*dir_bdp,
71	uint		lock_mode,
72	vname_t		*dentry,
73	xfs_ino_t	*inum,
74	xfs_inode_t	**ipp)
75{
76	xfs_vnode_t	*dir_vp;
77	xfs_inode_t	*dp;
78	int		error;
79
80	dir_vp = BHV_TO_VNODE(dir_bdp);
81	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
82
83	dp = XFS_BHVTOI(dir_bdp);
84
85	error = XFS_DIR_LOOKUP(dp->i_mount, NULL, dp,
86				VNAME(dentry), VNAMELEN(dentry), inum);
87	if (!error) {
88		/*
89		 * Unlock the directory. We do this because we can't
90		 * hold the directory lock while doing the vn_get()
91		 * in xfs_iget().  Doing so could cause us to hold
92		 * a lock while waiting for the inode to finish
93		 * being inactive while it's waiting for a log
94		 * reservation in the inactive routine.
95		 */
96		xfs_iunlock(dp, lock_mode);
97		error = xfs_iget(dp->i_mount, NULL, *inum, 0, 0, ipp, 0);
98		xfs_ilock(dp, lock_mode);
99
100		if (error) {
101			*ipp = NULL;
102		} else if ((*ipp)->i_d.di_mode == 0) {
103			/*
104			 * The inode has been freed.  Something is
105			 * wrong so just get out of here.
106			 */
107			xfs_iunlock(dp, lock_mode);
108			xfs_iput_new(*ipp, 0);
109			*ipp = NULL;
110			xfs_ilock(dp, lock_mode);
111			error = XFS_ERROR(ENOENT);
112		}
113	}
114	return error;
115}
116
117/*
118 * Allocates a new inode from disk and return a pointer to the
119 * incore copy. This routine will internally commit the current
120 * transaction and allocate a new one if the Space Manager needed
121 * to do an allocation to replenish the inode free-list.
122 *
123 * This routine is designed to be called from xfs_create and
124 * xfs_create_dir.
125 *
126 */
127int
128xfs_dir_ialloc(
129	xfs_trans_t	**tpp,		/* input: current transaction;
130					   output: may be a new transaction. */
131	xfs_inode_t	*dp,		/* directory within whose allocate
132					   the inode. */
133	mode_t		mode,
134	xfs_nlink_t	nlink,
135	xfs_dev_t	rdev,
136	cred_t		*credp,
137	prid_t		prid,		/* project id */
138	int		okalloc,	/* ok to allocate new space */
139	xfs_inode_t	**ipp,		/* pointer to inode; it will be
140					   locked. */
141	int		*committed)
142
143{
144	xfs_trans_t	*tp;
145	xfs_trans_t	*ntp;
146	xfs_inode_t	*ip;
147	xfs_buf_t	*ialloc_context = NULL;
148	boolean_t	call_again = B_FALSE;
149	int		code;
150	uint		log_res;
151	uint		log_count;
152	void		*dqinfo;
153	uint		tflags;
154
155	tp = *tpp;
156	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
157
158	/*
159	 * xfs_ialloc will return a pointer to an incore inode if
160	 * the Space Manager has an available inode on the free
161	 * list. Otherwise, it will do an allocation and replenish
162	 * the freelist.  Since we can only do one allocation per
163	 * transaction without deadlocks, we will need to commit the
164	 * current transaction and start a new one.  We will then
165	 * need to call xfs_ialloc again to get the inode.
166	 *
167	 * If xfs_ialloc did an allocation to replenish the freelist,
168	 * it returns the bp containing the head of the freelist as
169	 * ialloc_context. We will hold a lock on it across the
170	 * transaction commit so that no other process can steal
171	 * the inode(s) that we've just allocated.
172	 */
173	code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, okalloc,
174			  &ialloc_context, &call_again, &ip);
175
176	/*
177	 * Return an error if we were unable to allocate a new inode.
178	 * This should only happen if we run out of space on disk or
179	 * encounter a disk error.
180	 */
181	if (code) {
182		*ipp = NULL;
183		return code;
184	}
185	if (!call_again && (ip == NULL)) {
186		*ipp = NULL;
187		return XFS_ERROR(ENOSPC);
188	}
189
190	/*
191	 * If call_again is set, then we were unable to get an
192	 * inode in one operation.  We need to commit the current
193	 * transaction and call xfs_ialloc() again.  It is guaranteed
194	 * to succeed the second time.
195	 */
196	if (call_again) {
197
198		/*
199		 * Normally, xfs_trans_commit releases all the locks.
200		 * We call bhold to hang on to the ialloc_context across
201		 * the commit.  Holding this buffer prevents any other
202		 * processes from doing any allocations in this
203		 * allocation group.
204		 */
205		xfs_trans_bhold(tp, ialloc_context);
206		/*
207		 * Save the log reservation so we can use
208		 * them in the next transaction.
209		 */
210		log_res = xfs_trans_get_log_res(tp);
211		log_count = xfs_trans_get_log_count(tp);
212
213		/*
214		 * We want the quota changes to be associated with the next
215		 * transaction, NOT this one. So, detach the dqinfo from this
216		 * and attach it to the next transaction.
217		 */
218		dqinfo = NULL;
219		tflags = 0;
220		if (tp->t_dqinfo) {
221			dqinfo = (void *)tp->t_dqinfo;
222			tp->t_dqinfo = NULL;
223			tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY;
224			tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
225		}
226
227		ntp = xfs_trans_dup(tp);
228		code = xfs_trans_commit(tp, 0, NULL);
229		tp = ntp;
230		if (committed != NULL) {
231			*committed = 1;
232		}
233		/*
234		 * If we get an error during the commit processing,
235		 * release the buffer that is still held and return
236		 * to the caller.
237		 */
238		if (code) {
239			xfs_buf_relse(ialloc_context);
240			if (dqinfo) {
241				tp->t_dqinfo = dqinfo;
242				XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
243			}
244			*tpp = ntp;
245			*ipp = NULL;
246			return code;
247		}
248		code = xfs_trans_reserve(tp, 0, log_res, 0,
249					 XFS_TRANS_PERM_LOG_RES, log_count);
250		/*
251		 * Re-attach the quota info that we detached from prev trx.
252		 */
253		if (dqinfo) {
254			tp->t_dqinfo = dqinfo;
255			tp->t_flags |= tflags;
256		}
257
258		if (code) {
259			xfs_buf_relse(ialloc_context);
260			*tpp = ntp;
261			*ipp = NULL;
262			return code;
263		}
264		xfs_trans_bjoin(tp, ialloc_context);
265
266		/*
267		 * Call ialloc again. Since we've locked out all
268		 * other allocations in this allocation group,
269		 * this call should always succeed.
270		 */
271		code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid,
272				  okalloc, &ialloc_context, &call_again, &ip);
273
274		/*
275		 * If we get an error at this point, return to the caller
276		 * so that the current transaction can be aborted.
277		 */
278		if (code) {
279			*tpp = tp;
280			*ipp = NULL;
281			return code;
282		}
283		ASSERT ((!call_again) && (ip != NULL));
284
285	} else {
286		if (committed != NULL) {
287			*committed = 0;
288		}
289	}
290
291	*ipp = ip;
292	*tpp = tp;
293
294	return 0;
295}
296
297/*
298 * Decrement the link count on an inode & log the change.
299 * If this causes the link count to go to zero, initiate the
300 * logging activity required to truncate a file.
301 */
302int				/* error */
303xfs_droplink(
304	xfs_trans_t *tp,
305	xfs_inode_t *ip)
306{
307	int	error;
308
309	xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
310
311	ASSERT (ip->i_d.di_nlink > 0);
312	ip->i_d.di_nlink--;
313	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
314
315	error = 0;
316	if (ip->i_d.di_nlink == 0) {
317		/*
318		 * We're dropping the last link to this file.
319		 * Move the on-disk inode to the AGI unlinked list.
320		 * From xfs_inactive() we will pull the inode from
321		 * the list and free it.
322		 */
323		error = xfs_iunlink(tp, ip);
324	}
325	return error;
326}
327
328/*
329 * This gets called when the inode's version needs to be changed from 1 to 2.
330 * Currently this happens when the nlink field overflows the old 16-bit value
331 * or when chproj is called to change the project for the first time.
332 * As a side effect the superblock version will also get rev'd
333 * to contain the NLINK bit.
334 */
335void
336xfs_bump_ino_vers2(
337	xfs_trans_t	*tp,
338	xfs_inode_t	*ip)
339{
340	xfs_mount_t	*mp;
341	unsigned long		s;
342
343	ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE));
344	ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1);
345
346	ip->i_d.di_version = XFS_DINODE_VERSION_2;
347	ip->i_d.di_onlink = 0;
348	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
349	mp = tp->t_mountp;
350	if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
351		s = XFS_SB_LOCK(mp);
352		if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
353			XFS_SB_VERSION_ADDNLINK(&mp->m_sb);
354			XFS_SB_UNLOCK(mp, s);
355			xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
356		} else {
357			XFS_SB_UNLOCK(mp, s);
358		}
359	}
360	/* Caller must log the inode */
361}
362
363/*
364 * Increment the link count on an inode & log the change.
365 */
366int
367xfs_bumplink(
368	xfs_trans_t *tp,
369	xfs_inode_t *ip)
370{
371	if (ip->i_d.di_nlink >= XFS_MAXLINK)
372		return XFS_ERROR(EMLINK);
373	xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
374
375	ASSERT(ip->i_d.di_nlink > 0);
376	ip->i_d.di_nlink++;
377	if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) &&
378	    (ip->i_d.di_nlink > XFS_MAXLINK_1)) {
379		/*
380		 * The inode has increased its number of links beyond
381		 * what can fit in an old format inode.  It now needs
382		 * to be converted to a version 2 inode with a 32 bit
383		 * link count.  If this is the first inode in the file
384		 * system to do this, then we need to bump the superblock
385		 * version number as well.
386		 */
387		xfs_bump_ino_vers2(tp, ip);
388	}
389
390	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
391	return 0;
392}
393
394/*
395 * Try to truncate the given file to 0 length.  Currently called
396 * only out of xfs_remove when it has to truncate a file to free
397 * up space for the remove to proceed.
398 */
399int
400xfs_truncate_file(
401	xfs_mount_t	*mp,
402	xfs_inode_t	*ip)
403{
404	xfs_trans_t	*tp;
405	int		error;
406
407#ifdef QUOTADEBUG
408	/*
409	 * This is called to truncate the quotainodes too.
410	 */
411	if (XFS_IS_UQUOTA_ON(mp)) {
412		if (ip->i_ino != mp->m_sb.sb_uquotino)
413			ASSERT(ip->i_udquot);
414	}
415	if (XFS_IS_OQUOTA_ON(mp)) {
416		if (ip->i_ino != mp->m_sb.sb_gquotino)
417			ASSERT(ip->i_gdquot);
418	}
419#endif
420	/*
421	 * Make the call to xfs_itruncate_start before starting the
422	 * transaction, because we cannot make the call while we're
423	 * in a transaction.
424	 */
425	xfs_ilock(ip, XFS_IOLOCK_EXCL);
426	xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)0);
427
428	tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
429	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
430				      XFS_TRANS_PERM_LOG_RES,
431				      XFS_ITRUNCATE_LOG_COUNT))) {
432		xfs_trans_cancel(tp, 0);
433		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
434		return error;
435	}
436
437	/*
438	 * Follow the normal truncate locking protocol.  Since we
439	 * hold the inode in the transaction, we know that it's number
440	 * of references will stay constant.
441	 */
442	xfs_ilock(ip, XFS_ILOCK_EXCL);
443	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
444	xfs_trans_ihold(tp, ip);
445	/*
446	 * Signal a sync xaction.  The only case where that isn't
447	 * the case is if we're truncating an already unlinked file
448	 * on a wsync fs.  In that case, we know the blocks can't
449	 * reappear in the file because the links to file are
450	 * permanently toast.  Currently, we're always going to
451	 * want a sync transaction because this code is being
452	 * called from places where nlink is guaranteed to be 1
453	 * but I'm leaving the tests in to protect against future
454	 * changes -- rcc.
455	 */
456	error = xfs_itruncate_finish(&tp, ip, (xfs_fsize_t)0,
457				     XFS_DATA_FORK,
458				     ((ip->i_d.di_nlink != 0 ||
459				       !(mp->m_flags & XFS_MOUNT_WSYNC))
460				      ? 1 : 0));
461	if (error) {
462		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
463				 XFS_TRANS_ABORT);
464	} else {
465		xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
466		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
467					 NULL);
468	}
469	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
470
471	return error;
472}
473