1159452Srodrigc/*
2159452Srodrigc * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3159452Srodrigc * All Rights Reserved.
4159452Srodrigc *
5159452Srodrigc * This program is free software; you can redistribute it and/or
6159452Srodrigc * modify it under the terms of the GNU General Public License as
7159452Srodrigc * published by the Free Software Foundation.
8159452Srodrigc *
9159452Srodrigc * This program is distributed in the hope that it would be useful,
10159452Srodrigc * but WITHOUT ANY WARRANTY; without even the implied warranty of
11159452Srodrigc * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12159452Srodrigc * GNU General Public License for more details.
13159452Srodrigc *
14159452Srodrigc * You should have received a copy of the GNU General Public License
15159452Srodrigc * along with this program; if not, write the Free Software Foundation,
16159452Srodrigc * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17159452Srodrigc */
18159452Srodrigc#include "xfs.h"
19159452Srodrigc#include "xfs_fs.h"
20159452Srodrigc#include "xfs_types.h"
21159452Srodrigc#include "xfs_bit.h"
22159452Srodrigc#include "xfs_log.h"
23159452Srodrigc#include "xfs_inum.h"
24159452Srodrigc#include "xfs_trans.h"
25159452Srodrigc#include "xfs_sb.h"
26159452Srodrigc#include "xfs_ag.h"
27159452Srodrigc#include "xfs_dir.h"
28159452Srodrigc#include "xfs_dir2.h"
29159452Srodrigc#include "xfs_dmapi.h"
30159452Srodrigc#include "xfs_mount.h"
31159452Srodrigc#include "xfs_bmap_btree.h"
32159452Srodrigc#include "xfs_alloc_btree.h"
33159452Srodrigc#include "xfs_ialloc_btree.h"
34159452Srodrigc#include "xfs_dir_sf.h"
35159452Srodrigc#include "xfs_dir2_sf.h"
36159452Srodrigc#include "xfs_attr_sf.h"
37159452Srodrigc#include "xfs_dinode.h"
38159452Srodrigc#include "xfs_inode.h"
39159452Srodrigc#include "xfs_btree.h"
40159452Srodrigc#include "xfs_ialloc.h"
41159452Srodrigc#include "xfs_quota.h"
42159452Srodrigc#include "xfs_utils.h"
43159452Srodrigc
44159452Srodrigc/*
45159452Srodrigc * Initialize the inode hash table for the newly mounted file system.
46159452Srodrigc * Choose an initial table size based on user specified value, else
47159452Srodrigc * use a simple algorithm using the maximum number of inodes as an
48159452Srodrigc * indicator for table size, and clamp it between one and some large
49159452Srodrigc * number of pages.
50159452Srodrigc */
51159452Srodrigcvoid
52159452Srodrigcxfs_ihash_init(xfs_mount_t *mp)
53159452Srodrigc{
54159452Srodrigc	__uint64_t	icount;
55159452Srodrigc	uint		i, flags = KM_SLEEP | KM_MAYFAIL;
56159452Srodrigc
57159452Srodrigc	if (!mp->m_ihsize) {
58159452Srodrigc		icount = mp->m_maxicount ? mp->m_maxicount :
59159452Srodrigc			 (mp->m_sb.sb_dblocks << mp->m_sb.sb_inopblog);
60159452Srodrigc		mp->m_ihsize = 1 << max_t(uint, 8,
61159452Srodrigc					(xfs_highbit64(icount) + 1) / 2);
62159452Srodrigc		mp->m_ihsize = min_t(uint, mp->m_ihsize,
63159452Srodrigc					(64 * NBPP) / sizeof(xfs_ihash_t));
64159452Srodrigc	}
65159452Srodrigc
66159452Srodrigc	while (!(mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize *
67159452Srodrigc						sizeof(xfs_ihash_t), flags))) {
68159452Srodrigc		if ((mp->m_ihsize >>= 1) <= NBPP)
69159452Srodrigc			flags = KM_SLEEP;
70159452Srodrigc	}
71159452Srodrigc	for (i = 0; i < mp->m_ihsize; i++) {
72159452Srodrigc		rwlock_init(&(mp->m_ihash[i].ih_lock));
73159452Srodrigc	}
74159452Srodrigc}
75159452Srodrigc
76159452Srodrigc/*
77159452Srodrigc * Free up structures allocated by xfs_ihash_init, at unmount time.
78159452Srodrigc */
79159452Srodrigcvoid
80159452Srodrigcxfs_ihash_free(xfs_mount_t *mp)
81159452Srodrigc{
82159452Srodrigc	kmem_free(mp->m_ihash, mp->m_ihsize*sizeof(xfs_ihash_t));
83159452Srodrigc	mp->m_ihash = NULL;
84159452Srodrigc}
85159452Srodrigc
86159452Srodrigc/*
87159452Srodrigc * Initialize the inode cluster hash table for the newly mounted file system.
88159452Srodrigc * Its size is derived from the ihash table size.
89159452Srodrigc */
90159452Srodrigcvoid
91159452Srodrigcxfs_chash_init(xfs_mount_t *mp)
92159452Srodrigc{
93159452Srodrigc	uint	i;
94159452Srodrigc
95159452Srodrigc	mp->m_chsize = max_t(uint, 1, mp->m_ihsize /
96159452Srodrigc			 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog));
97159452Srodrigc	mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize);
98159452Srodrigc	mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize
99159452Srodrigc						 * sizeof(xfs_chash_t),
100159452Srodrigc						 KM_SLEEP);
101159452Srodrigc	for (i = 0; i < mp->m_chsize; i++) {
102159452Srodrigc		spinlock_init(&mp->m_chash[i].ch_lock,"xfshash");
103159452Srodrigc	}
104159452Srodrigc}
105159452Srodrigc
106159452Srodrigc/*
107159452Srodrigc * Free up structures allocated by xfs_chash_init, at unmount time.
108159452Srodrigc */
109159452Srodrigcvoid
110159452Srodrigcxfs_chash_free(xfs_mount_t *mp)
111159452Srodrigc{
112159452Srodrigc	int	i;
113159452Srodrigc
114159452Srodrigc	for (i = 0; i < mp->m_chsize; i++) {
115159452Srodrigc		spinlock_destroy(&mp->m_chash[i].ch_lock);
116159452Srodrigc	}
117159452Srodrigc
118159452Srodrigc	kmem_free(mp->m_chash, mp->m_chsize*sizeof(xfs_chash_t));
119159452Srodrigc	mp->m_chash = NULL;
120159452Srodrigc}
121159452Srodrigc
122159452Srodrigc/*
123159452Srodrigc * Try to move an inode to the front of its hash list if possible
124159452Srodrigc * (and if its not there already).  Called right after obtaining
125159452Srodrigc * the list version number and then dropping the read_lock on the
126159452Srodrigc * hash list in question (which is done right after looking up the
127159452Srodrigc * inode in question...).
128159452Srodrigc */
129159452SrodrigcSTATIC void
130159452Srodrigcxfs_ihash_promote(
131159452Srodrigc	xfs_ihash_t	*ih,
132159452Srodrigc	xfs_inode_t	*ip,
133159452Srodrigc	ulong		version)
134159452Srodrigc{
135159452Srodrigc	xfs_inode_t	*iq;
136159452Srodrigc
137159452Srodrigc	if ((ip->i_prevp != &ih->ih_next) && write_trylock(&ih->ih_lock)) {
138159452Srodrigc		if (likely(version == ih->ih_version)) {
139159452Srodrigc			/* remove from list */
140159452Srodrigc			if ((iq = ip->i_next)) {
141159452Srodrigc				iq->i_prevp = ip->i_prevp;
142159452Srodrigc			}
143159452Srodrigc			*ip->i_prevp = iq;
144159452Srodrigc
145159452Srodrigc			/* insert at list head */
146159452Srodrigc			iq = ih->ih_next;
147159452Srodrigc			iq->i_prevp = &ip->i_next;
148159452Srodrigc			ip->i_next = iq;
149159452Srodrigc			ip->i_prevp = &ih->ih_next;
150159452Srodrigc			ih->ih_next = ip;
151159452Srodrigc		}
152159452Srodrigc		write_unlock(&ih->ih_lock);
153159452Srodrigc	}
154159452Srodrigc}
155159452Srodrigc
156159452Srodrigc/*
157159452Srodrigc * Look up an inode by number in the given file system.
158159452Srodrigc * The inode is looked up in the hash table for the file system
159159452Srodrigc * represented by the mount point parameter mp.  Each bucket of
160159452Srodrigc * the hash table is guarded by an individual semaphore.
161159452Srodrigc *
162159452Srodrigc * If the inode is found in the hash table, its corresponding vnode
163159452Srodrigc * is obtained with a call to vn_get().  This call takes care of
164159452Srodrigc * coordination with the reclamation of the inode and vnode.  Note
165159452Srodrigc * that the vmap structure is filled in while holding the hash lock.
166159452Srodrigc * This gives us the state of the inode/vnode when we found it and
167159452Srodrigc * is used for coordination in vn_get().
168159452Srodrigc *
169159452Srodrigc * If it is not in core, read it in from the file system's device and
170159452Srodrigc * add the inode into the hash table.
171159452Srodrigc *
172159452Srodrigc * The inode is locked according to the value of the lock_flags parameter.
173159452Srodrigc * This flag parameter indicates how and if the inode's IO lock and inode lock
174159452Srodrigc * should be taken.
175159452Srodrigc *
176159452Srodrigc * mp -- the mount point structure for the current file system.  It points
177159452Srodrigc *       to the inode hash table.
178159452Srodrigc * tp -- a pointer to the current transaction if there is one.  This is
179159452Srodrigc *       simply passed through to the xfs_iread() call.
180159452Srodrigc * ino -- the number of the inode desired.  This is the unique identifier
181159452Srodrigc *        within the file system for the inode being requested.
182159452Srodrigc * lock_flags -- flags indicating how to lock the inode.  See the comment
183159452Srodrigc *		 for xfs_ilock() for a list of valid values.
184159452Srodrigc * bno -- the block number starting the buffer containing the inode,
185159452Srodrigc *	  if known (as by bulkstat), else 0.
186159452Srodrigc */
187159452Srodrigc#ifdef RMC
188159452SrodrigcSTATIC int
189159452Srodrigcxfs_iget_core(
190159452Srodrigc	xfs_vnode_t	*vp,
191159452Srodrigc	xfs_mount_t	*mp,
192159452Srodrigc	xfs_trans_t	*tp,
193159452Srodrigc	xfs_ino_t	ino,
194159452Srodrigc	uint		flags,
195159452Srodrigc	uint		lock_flags,
196159452Srodrigc	xfs_inode_t	**ipp,
197159452Srodrigc	xfs_daddr_t	bno)
198159452Srodrigc{
199159452Srodrigc	xfs_ihash_t	*ih;
200159452Srodrigc	xfs_inode_t	*ip;
201159452Srodrigc	xfs_inode_t	*iq;
202159452Srodrigc	xfs_vnode_t	*inode_vp;
203159452Srodrigc	ulong		version;
204159452Srodrigc	int		error;
205159452Srodrigc	/* REFERENCED */
206159452Srodrigc	xfs_chash_t	*ch;
207159452Srodrigc	xfs_chashlist_t	*chl, *chlnew;
208159452Srodrigc	SPLDECL(s);
209159452Srodrigc
210159452Srodrigc
211159452Srodrigc	ih = XFS_IHASH(mp, ino);
212159452Srodrigc
213159452Srodrigcagain:
214159452Srodrigc	read_lock(&ih->ih_lock);
215159452Srodrigc
216159452Srodrigc	for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
217159452Srodrigc		if (ip->i_ino == ino) {
218159452Srodrigc			/*
219159452Srodrigc			 * If INEW is set this inode is being set up
220159452Srodrigc			 * we need to pause and try again.
221159452Srodrigc			 */
222159452Srodrigc			if (ip->i_flags & XFS_INEW) {
223159452Srodrigc				read_unlock(&ih->ih_lock);
224159452Srodrigc				delay(1);
225159452Srodrigc				XFS_STATS_INC(xs_ig_frecycle);
226159452Srodrigc
227159452Srodrigc				goto again;
228159452Srodrigc			}
229159452Srodrigc
230159452Srodrigc			inode_vp = XFS_ITOV_NULL(ip);
231159452Srodrigc			if (inode_vp == NULL) {
232159452Srodrigc				/*
233159452Srodrigc				 * If IRECLAIM is set this inode is
234159452Srodrigc				 * on its way out of the system,
235159452Srodrigc				 * we need to pause and try again.
236159452Srodrigc				 */
237159452Srodrigc				if (ip->i_flags & XFS_IRECLAIM) {
238159452Srodrigc					read_unlock(&ih->ih_lock);
239159452Srodrigc					delay(1);
240159452Srodrigc					XFS_STATS_INC(xs_ig_frecycle);
241159452Srodrigc
242159452Srodrigc					goto again;
243159452Srodrigc				}
244159452Srodrigc
245159452Srodrigc				vn_trace_exit(vp, "xfs_iget.alloc",
246159452Srodrigc					(inst_t *)__return_address);
247159452Srodrigc
248159452Srodrigc				XFS_STATS_INC(xs_ig_found);
249159452Srodrigc
250159452Srodrigc				ip->i_flags &= ~XFS_IRECLAIMABLE;
251159452Srodrigc				version = ih->ih_version;
252159452Srodrigc				read_unlock(&ih->ih_lock);
253159452Srodrigc				xfs_ihash_promote(ih, ip, version);
254159452Srodrigc
255159452Srodrigc#ifdef RMC
256159452Srodrigc				XFS_MOUNT_ILOCK(mp);
257159452Srodrigc				list_del_init(&ip->i_reclaim);
258159452Srodrigc				XFS_MOUNT_IUNLOCK(mp);
259159452Srodrigc#endif
260159452Srodrigc
261159452Srodrigc				goto finish_inode;
262159452Srodrigc
263159452Srodrigc			} else if (vp != inode_vp) {
264159452Srodrigc#ifdef RMC
265159452Srodrigc				struct inode *inode = vn_to_inode(inode_vp);
266159452Srodrigc
267159452Srodrigc				/* The inode is being torn down, pause and
268159452Srodrigc				 * try again.
269159452Srodrigc				 */
270159452Srodrigc				if (inode->i_state & (I_FREEING | I_CLEAR)) {
271159452Srodrigc					read_unlock(&ih->ih_lock);
272159452Srodrigc					delay(1);
273159452Srodrigc					XFS_STATS_INC(xs_ig_frecycle);
274159452Srodrigc
275159452Srodrigc					goto again;
276159452Srodrigc				}
277159452Srodrigc#endif
278159452Srodrigc/* Chances are the other vnode (the one in the inode) is being torn
279159452Srodrigc * down right now, and we landed on top of it. Question is, what do
280159452Srodrigc * we do? Unhook the old inode and hook up the new one?
281159452Srodrigc */
282159452Srodrigc				cmn_err(CE_PANIC,
283159452Srodrigc			"xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
284159452Srodrigc						inode_vp, vp);
285159452Srodrigc			}
286159452Srodrigc
287159452Srodrigc			/*
288159452Srodrigc			 * Inode cache hit: if ip is not at the front of
289159452Srodrigc			 * its hash chain, move it there now.
290159452Srodrigc			 * Do this with the lock held for update, but
291159452Srodrigc			 * do statistics after releasing the lock.
292159452Srodrigc			 */
293159452Srodrigc			version = ih->ih_version;
294159452Srodrigc			read_unlock(&ih->ih_lock);
295159452Srodrigc			xfs_ihash_promote(ih, ip, version);
296159452Srodrigc			XFS_STATS_INC(xs_ig_found);
297159452Srodrigc
298159452Srodrigcfinish_inode:
299159452Srodrigc			if (ip->i_d.di_mode == 0) {
300159452Srodrigc				if (!(flags & IGET_CREATE))
301159452Srodrigc					return ENOENT;
302159452Srodrigc				xfs_iocore_inode_reinit(ip);
303159452Srodrigc			}
304159452Srodrigc
305159452Srodrigc			if (lock_flags != 0)
306159452Srodrigc				xfs_ilock(ip, lock_flags);
307159452Srodrigc
308159452Srodrigc			ip->i_flags &= ~XFS_ISTALE;
309159452Srodrigc
310159452Srodrigc			vn_trace_exit(vp, "xfs_iget.found",
311159452Srodrigc						(inst_t *)__return_address);
312159452Srodrigc			goto return_ip;
313159452Srodrigc		}
314159452Srodrigc	}
315159452Srodrigc
316159452Srodrigc	/*
317159452Srodrigc	 * Inode cache miss: save the hash chain version stamp and unlock
318159452Srodrigc	 * the chain, so we don't deadlock in vn_alloc.
319159452Srodrigc	 */
320159452Srodrigc	XFS_STATS_INC(xs_ig_missed);
321159452Srodrigc
322159452Srodrigc	version = ih->ih_version;
323159452Srodrigc
324159452Srodrigc	read_unlock(&ih->ih_lock);
325159452Srodrigc
326159452Srodrigc	/*
327159452Srodrigc	 * Read the disk inode attributes into a new inode structure and get
328159452Srodrigc	 * a new vnode for it. This should also initialize i_ino and i_mount.
329159452Srodrigc	 */
330159452Srodrigc	error = xfs_iread(mp, tp, ino, &ip, bno);
331159452Srodrigc	if (error) {
332159452Srodrigc		return error;
333159452Srodrigc	}
334159452Srodrigc
335159452Srodrigc	vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address);
336159452Srodrigc
337159452Srodrigc	xfs_inode_lock_init(ip, vp);
338159452Srodrigc	xfs_iocore_inode_init(ip);
339159452Srodrigc
340159452Srodrigc	if (lock_flags != 0) {
341159452Srodrigc		xfs_ilock(ip, lock_flags);
342159452Srodrigc	}
343159452Srodrigc
344159452Srodrigc	if ((ip->i_d.di_mode == 0) && !(flags & IGET_CREATE)) {
345159452Srodrigc		xfs_idestroy(ip);
346159452Srodrigc		return ENOENT;
347159452Srodrigc	}
348159452Srodrigc
349159452Srodrigc	/*
350159452Srodrigc	 * Put ip on its hash chain, unless someone else hashed a duplicate
351159452Srodrigc	 * after we released the hash lock.
352159452Srodrigc	 */
353159452Srodrigc	write_lock(&ih->ih_lock);
354159452Srodrigc
355159452Srodrigc	if (ih->ih_version != version) {
356159452Srodrigc		for (iq = ih->ih_next; iq != NULL; iq = iq->i_next) {
357159452Srodrigc			if (iq->i_ino == ino) {
358159452Srodrigc				write_unlock(&ih->ih_lock);
359159452Srodrigc				xfs_idestroy(ip);
360159452Srodrigc
361159452Srodrigc				XFS_STATS_INC(xs_ig_dup);
362159452Srodrigc				goto again;
363159452Srodrigc			}
364159452Srodrigc		}
365159452Srodrigc	}
366159452Srodrigc
367159452Srodrigc	/*
368159452Srodrigc	 * These values _must_ be set before releasing ihlock!
369159452Srodrigc	 */
370159452Srodrigc	ip->i_hash = ih;
371159452Srodrigc	if ((iq = ih->ih_next)) {
372159452Srodrigc		iq->i_prevp = &ip->i_next;
373159452Srodrigc	}
374159452Srodrigc	ip->i_next = iq;
375159452Srodrigc	ip->i_prevp = &ih->ih_next;
376159452Srodrigc	ih->ih_next = ip;
377159452Srodrigc	ip->i_udquot = ip->i_gdquot = NULL;
378159452Srodrigc	ih->ih_version++;
379159452Srodrigc	ip->i_flags |= XFS_INEW;
380159452Srodrigc
381159452Srodrigc	write_unlock(&ih->ih_lock);
382159452Srodrigc
383159452Srodrigc	/*
384159452Srodrigc	 * put ip on its cluster's hash chain
385159452Srodrigc	 */
386159452Srodrigc	ASSERT(ip->i_chash == NULL && ip->i_cprev == NULL &&
387159452Srodrigc	       ip->i_cnext == NULL);
388159452Srodrigc
389159452Srodrigc	chlnew = NULL;
390159452Srodrigc	ch = XFS_CHASH(mp, ip->i_blkno);
391159452Srodrigc chlredo:
392159452Srodrigc	s = mutex_spinlock(&ch->ch_lock);
393159452Srodrigc	for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) {
394159452Srodrigc		if (chl->chl_blkno == ip->i_blkno) {
395159452Srodrigc
396159452Srodrigc			/* insert this inode into the doubly-linked list
397159452Srodrigc			 * where chl points */
398159452Srodrigc			if ((iq = chl->chl_ip)) {
399159452Srodrigc				ip->i_cprev = iq->i_cprev;
400159452Srodrigc				iq->i_cprev->i_cnext = ip;
401159452Srodrigc				iq->i_cprev = ip;
402159452Srodrigc				ip->i_cnext = iq;
403159452Srodrigc			} else {
404159452Srodrigc				ip->i_cnext = ip;
405159452Srodrigc				ip->i_cprev = ip;
406159452Srodrigc			}
407159452Srodrigc			chl->chl_ip = ip;
408159452Srodrigc			ip->i_chash = chl;
409159452Srodrigc			break;
410159452Srodrigc		}
411159452Srodrigc	}
412159452Srodrigc
413159452Srodrigc	/* no hash list found for this block; add a new hash list */
414159452Srodrigc	if (chl == NULL)  {
415159452Srodrigc		if (chlnew == NULL) {
416159452Srodrigc			mutex_spinunlock(&ch->ch_lock, s);
417159452Srodrigc			ASSERT(xfs_chashlist_zone != NULL);
418159452Srodrigc			chlnew = (xfs_chashlist_t *)
419159452Srodrigc					kmem_zone_alloc(xfs_chashlist_zone,
420159452Srodrigc						KM_SLEEP);
421159452Srodrigc			ASSERT(chlnew != NULL);
422159452Srodrigc			goto chlredo;
423159452Srodrigc		} else {
424159452Srodrigc			ip->i_cnext = ip;
425159452Srodrigc			ip->i_cprev = ip;
426159452Srodrigc			ip->i_chash = chlnew;
427159452Srodrigc			chlnew->chl_ip = ip;
428159452Srodrigc			chlnew->chl_blkno = ip->i_blkno;
429159452Srodrigc			if (ch->ch_list)
430159452Srodrigc				ch->ch_list->chl_prev = chlnew;
431159452Srodrigc			chlnew->chl_next = ch->ch_list;
432159452Srodrigc			chlnew->chl_prev = NULL;
433159452Srodrigc			ch->ch_list = chlnew;
434159452Srodrigc			chlnew = NULL;
435159452Srodrigc		}
436159452Srodrigc	} else {
437159452Srodrigc		if (chlnew != NULL) {
438159452Srodrigc			kmem_zone_free(xfs_chashlist_zone, chlnew);
439159452Srodrigc		}
440159452Srodrigc	}
441159452Srodrigc
442159452Srodrigc	mutex_spinunlock(&ch->ch_lock, s);
443159452Srodrigc
444159452Srodrigc
445159452Srodrigc	/*
446159452Srodrigc	 * Link ip to its mount and thread it on the mount's inode list.
447159452Srodrigc	 */
448159452Srodrigc	XFS_MOUNT_ILOCK(mp);
449159452Srodrigc	if ((iq = mp->m_inodes)) {
450159452Srodrigc		ASSERT(iq->i_mprev->i_mnext == iq);
451159452Srodrigc		ip->i_mprev = iq->i_mprev;
452159452Srodrigc		iq->i_mprev->i_mnext = ip;
453159452Srodrigc		iq->i_mprev = ip;
454159452Srodrigc		ip->i_mnext = iq;
455159452Srodrigc	} else {
456159452Srodrigc		ip->i_mnext = ip;
457159452Srodrigc		ip->i_mprev = ip;
458159452Srodrigc	}
459159452Srodrigc	mp->m_inodes = ip;
460159452Srodrigc
461159452Srodrigc	XFS_MOUNT_IUNLOCK(mp);
462159452Srodrigc
463159452Srodrigc return_ip:
464159452Srodrigc	ASSERT(ip->i_df.if_ext_max ==
465159452Srodrigc	       XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
466159452Srodrigc
467159452Srodrigc	ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
468159452Srodrigc	       ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
469159452Srodrigc
470159452Srodrigc	*ipp = ip;
471159452Srodrigc
472159452Srodrigc	/*
473159452Srodrigc	 * If we have a real type for an on-disk inode, we can set ops(&unlock)
474159452Srodrigc	 * now.	 If it's a new inode being created, xfs_ialloc will handle it.
475159452Srodrigc	 */
476159452Srodrigc	XVFS_INIT_VNODE(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1);
477159452Srodrigc
478159452Srodrigc	return 0;
479159452Srodrigc}
480159452Srodrigc#endif
481159452Srodrigc
482159452Srodrigc#ifdef RMC
483159452Srodrigc/*
484159452Srodrigc * The 'normal' internal xfs_iget, if needed it will
485159452Srodrigc * 'allocate', or 'get', the vnode.
486159452Srodrigc */
487159452Srodrigcint
488159452Srodrigcxfs_iget(
489159452Srodrigc	xfs_mount_t	*mp,
490159452Srodrigc	xfs_trans_t	*tp,
491159452Srodrigc	xfs_ino_t	ino,
492159452Srodrigc	uint		flags,
493159452Srodrigc	uint		lock_flags,
494159452Srodrigc	xfs_inode_t	**ipp,
495159452Srodrigc	xfs_daddr_t	bno)
496159452Srodrigc{
497159452Srodrigc	int		error;
498159452Srodrigc	struct inode	*inode;
499159452Srodrigc	xfs_vnode_t	*vp = NULL;
500159452Srodrigc
501159452Srodrigc	XFS_STATS_INC(xs_ig_attempts);
502159452Srodrigc
503159452Srodrigcretry:
504159452Srodrigc	if ((inode = VFS_GET_INODE(XFS_MTOVFS(mp), ino, 0))) {
505159452Srodrigc		xfs_inode_t	*ip;
506159452Srodrigc
507159452Srodrigc		vp = vn_from_inode(inode);
508159452Srodrigc		if (inode->i_state & I_NEW) {
509159452Srodrigc			vn_initialize(inode);
510159452Srodrigc			error = xfs_iget_core(vp, mp, tp, ino, flags,
511159452Srodrigc					lock_flags, ipp, bno);
512159452Srodrigc			if (error) {
513159452Srodrigc				vn_mark_bad(vp);
514159452Srodrigc				if (inode->i_state & I_NEW)
515159452Srodrigc					unlock_new_inode(inode);
516159452Srodrigc				iput(inode);
517159452Srodrigc			}
518159452Srodrigc		} else {
519159452Srodrigc			/*
520159452Srodrigc			 * If the inode is not fully constructed due to
521159452Srodrigc			 * filehandle mismatches wait for the inode to go
522159452Srodrigc			 * away and try again.
523159452Srodrigc			 *
524159452Srodrigc			 * iget_locked will call __wait_on_freeing_inode
525159452Srodrigc			 * to wait for the inode to go away.
526159452Srodrigc			 */
527159452Srodrigc			if (is_bad_inode(inode) ||
528159452Srodrigc			    ((ip = xfs_vtoi(vp)) == NULL)) {
529159452Srodrigc				iput(inode);
530159452Srodrigc				delay(1);
531159452Srodrigc				goto retry;
532159452Srodrigc			}
533159452Srodrigc
534159452Srodrigc			if (lock_flags != 0)
535159452Srodrigc				xfs_ilock(ip, lock_flags);
536159452Srodrigc			XFS_STATS_INC(xs_ig_found);
537159452Srodrigc			*ipp = ip;
538159452Srodrigc			error = 0;
539159452Srodrigc		}
540159452Srodrigc	} else
541159452Srodrigc		error = ENOMEM;	/* If we got no inode we are out of memory */
542159452Srodrigc
543159452Srodrigc	return error;
544159452Srodrigc}
545159452Srodrigc#endif
546159452Srodrigc
547159452Srodrigc/*
548159452Srodrigc * Do the setup for the various locks within the incore inode.
549159452Srodrigc */
550159452Srodrigcvoid
551159452Srodrigcxfs_inode_lock_init(
552159452Srodrigc	xfs_inode_t	*ip,
553159452Srodrigc	xfs_vnode_t	*vp)
554159452Srodrigc{
555159452Srodrigc	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
556159452Srodrigc		     "xfsino", (long)vp->v_number);
557159452Srodrigc	mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", vp->v_number);
558159452Srodrigc#ifdef RMC
559159452Srodrigc	init_waitqueue_head(&ip->i_ipin_wait);
560159452Srodrigc#endif
561159452Srodrigc	atomic_set(&ip->i_pincount, 0);
562159452Srodrigc	init_sema(&ip->i_flock, 1, "xfsfino", vp->v_number);
563159452Srodrigc}
564159452Srodrigc
565159452Srodrigc/*
566159452Srodrigc * Look for the inode corresponding to the given ino in the hash table.
567159452Srodrigc * If it is there and its i_transp pointer matches tp, return it.
568159452Srodrigc * Otherwise, return NULL.
569159452Srodrigc */
570159452Srodrigcxfs_inode_t *
571159452Srodrigcxfs_inode_incore(xfs_mount_t	*mp,
572159452Srodrigc		 xfs_ino_t	ino,
573159452Srodrigc		 xfs_trans_t	*tp)
574159452Srodrigc{
575159452Srodrigc	xfs_ihash_t	*ih;
576159452Srodrigc	xfs_inode_t	*ip;
577159452Srodrigc	ulong		version;
578159452Srodrigc
579159452Srodrigc	ih = XFS_IHASH(mp, ino);
580159452Srodrigc	read_lock(&ih->ih_lock);
581159452Srodrigc	for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
582159452Srodrigc		if (ip->i_ino == ino) {
583159452Srodrigc			/*
584159452Srodrigc			 * If we find it and tp matches, return it.
585159452Srodrigc			 * Also move it to the front of the hash list
586159452Srodrigc			 * if we find it and it is not already there.
587159452Srodrigc			 * Otherwise break from the loop and return
588159452Srodrigc			 * NULL.
589159452Srodrigc			 */
590159452Srodrigc			if (ip->i_transp == tp) {
591159452Srodrigc				version = ih->ih_version;
592159452Srodrigc				read_unlock(&ih->ih_lock);
593159452Srodrigc				xfs_ihash_promote(ih, ip, version);
594159452Srodrigc				return (ip);
595159452Srodrigc			}
596159452Srodrigc			break;
597159452Srodrigc		}
598159452Srodrigc	}
599159452Srodrigc	read_unlock(&ih->ih_lock);
600159452Srodrigc	return (NULL);
601159452Srodrigc}
602159452Srodrigc
603159452Srodrigc/*
604159452Srodrigc * Decrement reference count of an inode structure and unlock it.
605159452Srodrigc *
606159452Srodrigc * ip -- the inode being released
607159452Srodrigc * lock_flags -- this parameter indicates the inode's locks to be
608159452Srodrigc *       to be released.  See the comment on xfs_iunlock() for a list
609159452Srodrigc *	 of valid values.
610159452Srodrigc */
611159452Srodrigcvoid
612159452Srodrigcxfs_iput(xfs_inode_t	*ip,
613159452Srodrigc	 uint		lock_flags)
614159452Srodrigc{
615159452Srodrigc	xfs_vnode_t	*vp = XFS_ITOV(ip);
616159452Srodrigc
617159452Srodrigc	vn_trace_entry(vp, "xfs_iput", (inst_t *)__return_address);
618159452Srodrigc
619159452Srodrigc	xfs_iunlock(ip, lock_flags);
620159452Srodrigc
621159452Srodrigc	VN_RELE(vp);
622159452Srodrigc}
623159452Srodrigc
624159452Srodrigc#ifdef RMC
625159452Srodrigc/* in xfs_freebsd_iget.c
626159452Srodrigc * Special iput for brand-new inodes that are still locked
627159452Srodrigc */
628159452Srodrigcvoid
629159452Srodrigcxfs_iput_new(xfs_inode_t	*ip,
630159452Srodrigc	     uint		lock_flags)
631159452Srodrigc{
632159452Srodrigc	xfs_vnode_t	*vp = XFS_ITOV(ip);
633159452Srodrigc	struct inode	*inode = vn_to_inode(vp);
634159452Srodrigc
635159452Srodrigc	vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address);
636159452Srodrigc
637159452Srodrigc	if ((ip->i_d.di_mode == 0)) {
638159452Srodrigc		ASSERT(!(ip->i_flags & XFS_IRECLAIMABLE));
639159452Srodrigc		vn_mark_bad(vp);
640159452Srodrigc	}
641159452Srodrigc	if (inode->i_state & I_NEW)
642159452Srodrigc		unlock_new_inode(inode);
643159452Srodrigc	if (lock_flags)
644159452Srodrigc		xfs_iunlock(ip, lock_flags);
645159452Srodrigc	VN_RELE(vp);
646159452Srodrigc}
647159452Srodrigc#endif
648159452Srodrigc
649159452Srodrigc
650159452Srodrigc/*
651159452Srodrigc * This routine embodies the part of the reclaim code that pulls
652159452Srodrigc * the inode from the inode hash table and the mount structure's
653159452Srodrigc * inode list.
654159452Srodrigc * This should only be called from xfs_reclaim().
655159452Srodrigc */
656159452Srodrigcvoid
657159452Srodrigcxfs_ireclaim(
658159452Srodrigc	     xfs_inode_t *ip)
659159452Srodrigc{
660159452Srodrigc	xfs_vnode_t	*vp;
661159452Srodrigc
662159452Srodrigc	/*
663159452Srodrigc	 * Remove from old hash list and mount list.
664159452Srodrigc	 */
665159452Srodrigc	XFS_STATS_INC(xs_ig_reclaims);
666159452Srodrigc
667159452Srodrigc	xfs_iextract(ip);
668159452Srodrigc
669159452Srodrigc	/*
670159452Srodrigc	 * Here we do a spurious inode lock in order to coordinate with
671159452Srodrigc	 * xfs_sync().  This is because xfs_sync() references the inodes
672159452Srodrigc	 * in the mount list without taking references on the corresponding
673159452Srodrigc	 * vnodes.  We make that OK here by ensuring that we wait until
674159452Srodrigc	 * the inode is unlocked in xfs_sync() before we go ahead and
675159452Srodrigc	 * free it.  We get both the regular lock and the io lock because
676159452Srodrigc	 * the xfs_sync() code may need to drop the regular one but will
677159452Srodrigc	 * still hold the io lock.
678159452Srodrigc	 */
679159452Srodrigc	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
680159452Srodrigc
681159452Srodrigc	/*
682159452Srodrigc	 * Release dquots (and their references) if any. An inode may escape
683159452Srodrigc	 * xfs_inactive and get here via vn_alloc->vn_reclaim path.
684159452Srodrigc	 */
685159452Srodrigc	XFS_QM_DQDETACH(ip->i_mount, ip);
686159452Srodrigc
687159452Srodrigc	/*
688159452Srodrigc	 * Pull our behavior descriptor from the vnode chain.
689159452Srodrigc	 */
690159452Srodrigc	vp = XFS_ITOV_NULL(ip);
691159452Srodrigc	if (vp) {
692159452Srodrigc		vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
693159452Srodrigc	}
694159452Srodrigc
695159452Srodrigc	/*
696159452Srodrigc	 * Free all memory associated with the inode.
697159452Srodrigc	 */
698159452Srodrigc	xfs_idestroy(ip);
699159452Srodrigc}
700159452Srodrigc
701159452Srodrigc/*
702159452Srodrigc * This routine removes an about-to-be-destroyed inode from
703159452Srodrigc * all of the lists in which it is located with the exception
704159452Srodrigc * of the behavior chain.
705159452Srodrigc */
706159452Srodrigcvoid
707159452Srodrigcxfs_iextract(
708159452Srodrigc	xfs_inode_t	*ip)
709159452Srodrigc{
710159452Srodrigc	xfs_ihash_t	*ih;
711159452Srodrigc	xfs_inode_t	*iq;
712159452Srodrigc	xfs_mount_t	*mp;
713159452Srodrigc	xfs_chash_t	*ch;
714159452Srodrigc	xfs_chashlist_t *chl, *chm;
715159452Srodrigc	SPLDECL(s);
716159452Srodrigc
717159452Srodrigc	ih = ip->i_hash;
718159452Srodrigc	write_lock(&ih->ih_lock);
719159452Srodrigc	if ((iq = ip->i_next)) {
720159452Srodrigc		iq->i_prevp = ip->i_prevp;
721159452Srodrigc	}
722159452Srodrigc	*ip->i_prevp = iq;
723159452Srodrigc	ih->ih_version++;
724159452Srodrigc	write_unlock(&ih->ih_lock);
725159452Srodrigc
726159452Srodrigc	/*
727159452Srodrigc	 * Remove from cluster hash list
728159452Srodrigc	 *   1) delete the chashlist if this is the last inode on the chashlist
729159452Srodrigc	 *   2) unchain from list of inodes
730159452Srodrigc	 *   3) point chashlist->chl_ip to 'chl_next' if to this inode.
731159452Srodrigc	 */
732159452Srodrigc	mp = ip->i_mount;
733159452Srodrigc	ch = XFS_CHASH(mp, ip->i_blkno);
734159452Srodrigc	s = mutex_spinlock(&ch->ch_lock);
735159452Srodrigc
736159452Srodrigc	if (ip->i_cnext == ip) {
737159452Srodrigc		/* Last inode on chashlist */
738159452Srodrigc		ASSERT(ip->i_cnext == ip && ip->i_cprev == ip);
739159452Srodrigc		ASSERT(ip->i_chash != NULL);
740159452Srodrigc		chm=NULL;
741159452Srodrigc		chl = ip->i_chash;
742159452Srodrigc		if (chl->chl_prev)
743159452Srodrigc			chl->chl_prev->chl_next = chl->chl_next;
744159452Srodrigc		else
745159452Srodrigc			ch->ch_list = chl->chl_next;
746159452Srodrigc		if (chl->chl_next)
747159452Srodrigc			chl->chl_next->chl_prev = chl->chl_prev;
748159452Srodrigc		kmem_zone_free(xfs_chashlist_zone, chl);
749159452Srodrigc	} else {
750159452Srodrigc		/* delete one inode from a non-empty list */
751159452Srodrigc		iq = ip->i_cnext;
752159452Srodrigc		iq->i_cprev = ip->i_cprev;
753159452Srodrigc		ip->i_cprev->i_cnext = iq;
754159452Srodrigc		if (ip->i_chash->chl_ip == ip) {
755159452Srodrigc			ip->i_chash->chl_ip = iq;
756159452Srodrigc		}
757159452Srodrigc		ip->i_chash = __return_address;
758159452Srodrigc		ip->i_cprev = __return_address;
759159452Srodrigc		ip->i_cnext = __return_address;
760159452Srodrigc	}
761159452Srodrigc	mutex_spinunlock(&ch->ch_lock, s);
762159452Srodrigc
763159452Srodrigc	/*
764159452Srodrigc	 * Remove from mount's inode list.
765159452Srodrigc	 */
766159452Srodrigc	XFS_MOUNT_ILOCK(mp);
767159452Srodrigc	ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
768159452Srodrigc	iq = ip->i_mnext;
769159452Srodrigc	iq->i_mprev = ip->i_mprev;
770159452Srodrigc	ip->i_mprev->i_mnext = iq;
771159452Srodrigc
772159452Srodrigc	/*
773159452Srodrigc	 * Fix up the head pointer if it points to the inode being deleted.
774159452Srodrigc	 */
775159452Srodrigc	if (mp->m_inodes == ip) {
776159452Srodrigc		if (ip == iq) {
777159452Srodrigc			mp->m_inodes = NULL;
778159452Srodrigc		} else {
779159452Srodrigc			mp->m_inodes = iq;
780159452Srodrigc		}
781159452Srodrigc	}
782159452Srodrigc
783159452Srodrigc	/* Deal with the deleted inodes list */
784159452Srodrigc#ifdef RMC
785159452Srodrigc	list_del_init(&ip->i_reclaim);
786159452Srodrigc#endif
787159452Srodrigc
788159452Srodrigc	mp->m_ireclaims++;
789159452Srodrigc	XFS_MOUNT_IUNLOCK(mp);
790159452Srodrigc}
791159452Srodrigc
792159452Srodrigc/*
793159452Srodrigc * This is a wrapper routine around the xfs_ilock() routine
794159452Srodrigc * used to centralize some grungy code.  It is used in places
795159452Srodrigc * that wish to lock the inode solely for reading the extents.
796159452Srodrigc * The reason these places can't just call xfs_ilock(SHARED)
797159452Srodrigc * is that the inode lock also guards to bringing in of the
798159452Srodrigc * extents from disk for a file in b-tree format.  If the inode
799159452Srodrigc * is in b-tree format, then we need to lock the inode exclusively
800159452Srodrigc * until the extents are read in.  Locking it exclusively all
801159452Srodrigc * the time would limit our parallelism unnecessarily, though.
802159452Srodrigc * What we do instead is check to see if the extents have been
803159452Srodrigc * read in yet, and only lock the inode exclusively if they
804159452Srodrigc * have not.
805159452Srodrigc *
806159452Srodrigc * The function returns a value which should be given to the
807159452Srodrigc * corresponding xfs_iunlock_map_shared().  This value is
808159452Srodrigc * the mode in which the lock was actually taken.
809159452Srodrigc */
810159452Srodrigcuint
811159452Srodrigcxfs_ilock_map_shared(
812159452Srodrigc	xfs_inode_t	*ip)
813159452Srodrigc{
814159452Srodrigc	uint	lock_mode;
815159452Srodrigc
816159452Srodrigc	if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) &&
817159452Srodrigc	    ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) {
818159452Srodrigc		lock_mode = XFS_ILOCK_EXCL;
819159452Srodrigc	} else {
820159452Srodrigc		lock_mode = XFS_ILOCK_SHARED;
821159452Srodrigc	}
822159452Srodrigc
823159452Srodrigc	xfs_ilock(ip, lock_mode);
824159452Srodrigc
825159452Srodrigc	return lock_mode;
826159452Srodrigc}
827159452Srodrigc
828159452Srodrigc/*
829159452Srodrigc * This is simply the unlock routine to go with xfs_ilock_map_shared().
830159452Srodrigc * All it does is call xfs_iunlock() with the given lock_mode.
831159452Srodrigc */
832159452Srodrigcvoid
833159452Srodrigcxfs_iunlock_map_shared(
834159452Srodrigc	xfs_inode_t	*ip,
835159452Srodrigc	unsigned int	lock_mode)
836159452Srodrigc{
837159452Srodrigc	xfs_iunlock(ip, lock_mode);
838159452Srodrigc}
839159452Srodrigc
840159452Srodrigc/*
841159452Srodrigc * The xfs inode contains 2 locks: a multi-reader lock called the
842159452Srodrigc * i_iolock and a multi-reader lock called the i_lock.  This routine
843159452Srodrigc * allows either or both of the locks to be obtained.
844159452Srodrigc *
845159452Srodrigc * The 2 locks should always be ordered so that the IO lock is
846159452Srodrigc * obtained first in order to prevent deadlock.
847159452Srodrigc *
848159452Srodrigc * ip -- the inode being locked
849159452Srodrigc * lock_flags -- this parameter indicates the inode's locks
850159452Srodrigc *       to be locked.  It can be:
851159452Srodrigc *		XFS_IOLOCK_SHARED,
852159452Srodrigc *		XFS_IOLOCK_EXCL,
853159452Srodrigc *		XFS_ILOCK_SHARED,
854159452Srodrigc *		XFS_ILOCK_EXCL,
855159452Srodrigc *		XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
856159452Srodrigc *		XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
857159452Srodrigc *		XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
858159452Srodrigc *		XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
859159452Srodrigc */
860159452Srodrigcvoid
861159452Srodrigcxfs_ilock(xfs_inode_t	*ip,
862159452Srodrigc	  uint		lock_flags)
863159452Srodrigc{
864159452Srodrigc	/*
865159452Srodrigc	 * You can't set both SHARED and EXCL for the same lock,
866159452Srodrigc	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
867159452Srodrigc	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
868159452Srodrigc	 */
869159452Srodrigc	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
870159452Srodrigc	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
871159452Srodrigc	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
872159452Srodrigc	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
873159452Srodrigc	ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0);
874159452Srodrigc
875159452Srodrigc	if (lock_flags & XFS_IOLOCK_EXCL) {
876159452Srodrigc		mrupdate(&ip->i_iolock);
877159452Srodrigc	} else if (lock_flags & XFS_IOLOCK_SHARED) {
878159452Srodrigc		mraccess(&ip->i_iolock);
879159452Srodrigc	}
880159452Srodrigc	if (lock_flags & XFS_ILOCK_EXCL) {
881159452Srodrigc		mrupdate(&ip->i_lock);
882159452Srodrigc	} else if (lock_flags & XFS_ILOCK_SHARED) {
883159452Srodrigc		mraccess(&ip->i_lock);
884159452Srodrigc	}
885159452Srodrigc	xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
886159452Srodrigc}
887159452Srodrigc
888159452Srodrigc/*
889159452Srodrigc * This is just like xfs_ilock(), except that the caller
890159452Srodrigc * is guaranteed not to sleep.  It returns 1 if it gets
891159452Srodrigc * the requested locks and 0 otherwise.  If the IO lock is
892159452Srodrigc * obtained but the inode lock cannot be, then the IO lock
893159452Srodrigc * is dropped before returning.
894159452Srodrigc *
895159452Srodrigc * ip -- the inode being locked
896159452Srodrigc * lock_flags -- this parameter indicates the inode's locks to be
897159452Srodrigc *       to be locked.  See the comment for xfs_ilock() for a list
898159452Srodrigc *	 of valid values.
899159452Srodrigc *
900159452Srodrigc */
901159452Srodrigcint
902159452Srodrigcxfs_ilock_nowait(xfs_inode_t	*ip,
903159452Srodrigc		 uint		lock_flags)
904159452Srodrigc{
905159452Srodrigc	int	iolocked;
906159452Srodrigc	int	ilocked;
907159452Srodrigc
908159452Srodrigc	/*
909159452Srodrigc	 * You can't set both SHARED and EXCL for the same lock,
910159452Srodrigc	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
911159452Srodrigc	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
912159452Srodrigc	 */
913159452Srodrigc	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
914159452Srodrigc	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
915159452Srodrigc	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
916159452Srodrigc	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
917159452Srodrigc	ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0);
918159452Srodrigc
919159452Srodrigc	iolocked = 0;
920159452Srodrigc	if (lock_flags & XFS_IOLOCK_EXCL) {
921159452Srodrigc		iolocked = mrtryupdate(&ip->i_iolock);
922159452Srodrigc		if (!iolocked) {
923159452Srodrigc			return 0;
924159452Srodrigc		}
925159452Srodrigc	} else if (lock_flags & XFS_IOLOCK_SHARED) {
926159452Srodrigc		iolocked = mrtryaccess(&ip->i_iolock);
927159452Srodrigc		if (!iolocked) {
928159452Srodrigc			return 0;
929159452Srodrigc		}
930159452Srodrigc	}
931159452Srodrigc	if (lock_flags & XFS_ILOCK_EXCL) {
932159452Srodrigc		ilocked = mrtryupdate(&ip->i_lock);
933159452Srodrigc		if (!ilocked) {
934159452Srodrigc			if (iolocked) {
935159452Srodrigc				mrunlock(&ip->i_iolock);
936159452Srodrigc			}
937159452Srodrigc			return 0;
938159452Srodrigc		}
939159452Srodrigc	} else if (lock_flags & XFS_ILOCK_SHARED) {
940159452Srodrigc		ilocked = mrtryaccess(&ip->i_lock);
941159452Srodrigc		if (!ilocked) {
942159452Srodrigc			if (iolocked) {
943159452Srodrigc				mrunlock(&ip->i_iolock);
944159452Srodrigc			}
945159452Srodrigc			return 0;
946159452Srodrigc		}
947159452Srodrigc	}
948159452Srodrigc	xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address);
949159452Srodrigc	return 1;
950159452Srodrigc}
951159452Srodrigc
952159452Srodrigc/*
953159452Srodrigc * xfs_iunlock() is used to drop the inode locks acquired with
954159452Srodrigc * xfs_ilock() and xfs_ilock_nowait().  The caller must pass
955159452Srodrigc * in the flags given to xfs_ilock() or xfs_ilock_nowait() so
956159452Srodrigc * that we know which locks to drop.
957159452Srodrigc *
958159452Srodrigc * ip -- the inode being unlocked
959159452Srodrigc * lock_flags -- this parameter indicates the inode's locks to be
960159452Srodrigc *       to be unlocked.  See the comment for xfs_ilock() for a list
961159452Srodrigc *	 of valid values for this parameter.
962159452Srodrigc *
963159452Srodrigc */
964159452Srodrigcvoid
965159452Srodrigcxfs_iunlock(xfs_inode_t	*ip,
966159452Srodrigc	    uint	lock_flags)
967159452Srodrigc{
968159452Srodrigc	/*
969159452Srodrigc	 * You can't set both SHARED and EXCL for the same lock,
970159452Srodrigc	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
971159452Srodrigc	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
972159452Srodrigc	 */
973159452Srodrigc	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
974159452Srodrigc	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
975159452Srodrigc	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
976159452Srodrigc	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
977159452Srodrigc	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY)) == 0);
978159452Srodrigc	ASSERT(lock_flags != 0);
979159452Srodrigc
980159452Srodrigc	if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) {
981159452Srodrigc		ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) ||
982159452Srodrigc		       (ismrlocked(&ip->i_iolock, MR_ACCESS)));
983159452Srodrigc		ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) ||
984159452Srodrigc		       (ismrlocked(&ip->i_iolock, MR_UPDATE)));
985159452Srodrigc		mrunlock(&ip->i_iolock);
986159452Srodrigc	}
987159452Srodrigc
988159452Srodrigc	if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) {
989159452Srodrigc		ASSERT(!(lock_flags & XFS_ILOCK_SHARED) ||
990159452Srodrigc		       (ismrlocked(&ip->i_lock, MR_ACCESS)));
991159452Srodrigc		ASSERT(!(lock_flags & XFS_ILOCK_EXCL) ||
992159452Srodrigc		       (ismrlocked(&ip->i_lock, MR_UPDATE)));
993159452Srodrigc		mrunlock(&ip->i_lock);
994159452Srodrigc
995159452Srodrigc		/*
996159452Srodrigc		 * Let the AIL know that this item has been unlocked in case
997159452Srodrigc		 * it is in the AIL and anyone is waiting on it.  Don't do
998159452Srodrigc		 * this if the caller has asked us not to.
999159452Srodrigc		 */
1000159452Srodrigc		if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) &&
1001159452Srodrigc		     ip->i_itemp != NULL) {
1002159452Srodrigc			xfs_trans_unlocked_item(ip->i_mount,
1003159452Srodrigc						(xfs_log_item_t*)(ip->i_itemp));
1004159452Srodrigc		}
1005159452Srodrigc	}
1006159452Srodrigc	xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
1007159452Srodrigc}
1008159452Srodrigc
1009159452Srodrigc/*
1010159452Srodrigc * give up write locks.  the i/o lock cannot be held nested
1011159452Srodrigc * if it is being demoted.
1012159452Srodrigc */
1013159452Srodrigcvoid
1014159452Srodrigcxfs_ilock_demote(xfs_inode_t	*ip,
1015159452Srodrigc		 uint		lock_flags)
1016159452Srodrigc{
1017159452Srodrigc	ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
1018159452Srodrigc	ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
1019159452Srodrigc
1020159452Srodrigc	if (lock_flags & XFS_ILOCK_EXCL) {
1021159452Srodrigc		ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
1022159452Srodrigc		mrdemote(&ip->i_lock);
1023159452Srodrigc	}
1024159452Srodrigc	if (lock_flags & XFS_IOLOCK_EXCL) {
1025159452Srodrigc		ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
1026159452Srodrigc		mrdemote(&ip->i_iolock);
1027159452Srodrigc	}
1028159452Srodrigc}
1029159452Srodrigc
1030159452Srodrigc/*
1031159452Srodrigc * The following three routines simply manage the i_flock
1032159452Srodrigc * semaphore embedded in the inode.  This semaphore synchronizes
1033159452Srodrigc * processes attempting to flush the in-core inode back to disk.
1034159452Srodrigc */
1035159452Srodrigcvoid
1036159452Srodrigcxfs_iflock(xfs_inode_t *ip)
1037159452Srodrigc{
1038159452Srodrigc	psema(&(ip->i_flock), PINOD|PLTWAIT);
1039159452Srodrigc}
1040159452Srodrigc
1041159452Srodrigcint
1042159452Srodrigcxfs_iflock_nowait(xfs_inode_t *ip)
1043159452Srodrigc{
1044159452Srodrigc	return (cpsema(&(ip->i_flock)));
1045159452Srodrigc}
1046159452Srodrigc
1047159452Srodrigcvoid
1048159452Srodrigcxfs_ifunlock(xfs_inode_t *ip)
1049159452Srodrigc{
1050159452Srodrigc	ASSERT(valusema(&(ip->i_flock)) <= 0);
1051159452Srodrigc	vsema(&(ip->i_flock));
1052159452Srodrigc}
1053