1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * extent_map.c
4 *
5 * Block/Cluster mapping functions
6 *
7 * Copyright (C) 2004 Oracle.  All rights reserved.
8 */
9
10#include <linux/fs.h>
11#include <linux/init.h>
12#include <linux/slab.h>
13#include <linux/types.h>
14#include <linux/fiemap.h>
15
16#include <cluster/masklog.h>
17
18#include "ocfs2.h"
19
20#include "alloc.h"
21#include "dlmglue.h"
22#include "extent_map.h"
23#include "inode.h"
24#include "super.h"
25#include "symlink.h"
26#include "aops.h"
27#include "ocfs2_trace.h"
28
29#include "buffer_head_io.h"
30
31/*
32 * The extent caching implementation is intentionally trivial.
33 *
34 * We only cache a small number of extents stored directly on the
35 * inode, so linear order operations are acceptable. If we ever want
36 * to increase the size of the extent map, then these algorithms must
37 * get smarter.
38 */
39
40void ocfs2_extent_map_init(struct inode *inode)
41{
42	struct ocfs2_inode_info *oi = OCFS2_I(inode);
43
44	oi->ip_extent_map.em_num_items = 0;
45	INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
46}
47
48static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
49				      unsigned int cpos,
50				      struct ocfs2_extent_map_item **ret_emi)
51{
52	unsigned int range;
53	struct ocfs2_extent_map_item *emi;
54
55	*ret_emi = NULL;
56
57	list_for_each_entry(emi, &em->em_list, ei_list) {
58		range = emi->ei_cpos + emi->ei_clusters;
59
60		if (cpos >= emi->ei_cpos && cpos < range) {
61			list_move(&emi->ei_list, &em->em_list);
62
63			*ret_emi = emi;
64			break;
65		}
66	}
67}
68
69static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
70				   unsigned int *phys, unsigned int *len,
71				   unsigned int *flags)
72{
73	unsigned int coff;
74	struct ocfs2_inode_info *oi = OCFS2_I(inode);
75	struct ocfs2_extent_map_item *emi;
76
77	spin_lock(&oi->ip_lock);
78
79	__ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
80	if (emi) {
81		coff = cpos - emi->ei_cpos;
82		*phys = emi->ei_phys + coff;
83		if (len)
84			*len = emi->ei_clusters - coff;
85		if (flags)
86			*flags = emi->ei_flags;
87	}
88
89	spin_unlock(&oi->ip_lock);
90
91	if (emi == NULL)
92		return -ENOENT;
93
94	return 0;
95}
96
97/*
98 * Forget about all clusters equal to or greater than cpos.
99 */
100void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
101{
102	struct ocfs2_extent_map_item *emi, *n;
103	struct ocfs2_inode_info *oi = OCFS2_I(inode);
104	struct ocfs2_extent_map *em = &oi->ip_extent_map;
105	LIST_HEAD(tmp_list);
106	unsigned int range;
107
108	spin_lock(&oi->ip_lock);
109	list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
110		if (emi->ei_cpos >= cpos) {
111			/* Full truncate of this record. */
112			list_move(&emi->ei_list, &tmp_list);
113			BUG_ON(em->em_num_items == 0);
114			em->em_num_items--;
115			continue;
116		}
117
118		range = emi->ei_cpos + emi->ei_clusters;
119		if (range > cpos) {
120			/* Partial truncate */
121			emi->ei_clusters = cpos - emi->ei_cpos;
122		}
123	}
124	spin_unlock(&oi->ip_lock);
125
126	list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
127		list_del(&emi->ei_list);
128		kfree(emi);
129	}
130}
131
132/*
133 * Is any part of emi2 contained within emi1
134 */
135static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
136				 struct ocfs2_extent_map_item *emi2)
137{
138	unsigned int range1, range2;
139
140	/*
141	 * Check if logical start of emi2 is inside emi1
142	 */
143	range1 = emi1->ei_cpos + emi1->ei_clusters;
144	if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
145		return 1;
146
147	/*
148	 * Check if logical end of emi2 is inside emi1
149	 */
150	range2 = emi2->ei_cpos + emi2->ei_clusters;
151	if (range2 > emi1->ei_cpos && range2 <= range1)
152		return 1;
153
154	return 0;
155}
156
157static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
158				  struct ocfs2_extent_map_item *src)
159{
160	dest->ei_cpos = src->ei_cpos;
161	dest->ei_phys = src->ei_phys;
162	dest->ei_clusters = src->ei_clusters;
163	dest->ei_flags = src->ei_flags;
164}
165
166/*
167 * Try to merge emi with ins. Returns 1 if merge succeeds, zero
168 * otherwise.
169 */
170static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
171					 struct ocfs2_extent_map_item *ins)
172{
173	/*
174	 * Handle contiguousness
175	 */
176	if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
177	    ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
178	    ins->ei_flags == emi->ei_flags) {
179		emi->ei_clusters += ins->ei_clusters;
180		return 1;
181	} else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
182		   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
183		   ins->ei_flags == emi->ei_flags) {
184		emi->ei_phys = ins->ei_phys;
185		emi->ei_cpos = ins->ei_cpos;
186		emi->ei_clusters += ins->ei_clusters;
187		return 1;
188	}
189
190	/*
191	 * Overlapping extents - this shouldn't happen unless we've
192	 * split an extent to change it's flags. That is exceedingly
193	 * rare, so there's no sense in trying to optimize it yet.
194	 */
195	if (ocfs2_ei_is_contained(emi, ins) ||
196	    ocfs2_ei_is_contained(ins, emi)) {
197		ocfs2_copy_emi_fields(emi, ins);
198		return 1;
199	}
200
201	/* No merge was possible. */
202	return 0;
203}
204
205/*
206 * In order to reduce complexity on the caller, this insert function
207 * is intentionally liberal in what it will accept.
208 *
209 * The only rule is that the truncate call *must* be used whenever
210 * records have been deleted. This avoids inserting overlapping
211 * records with different physical mappings.
212 */
213void ocfs2_extent_map_insert_rec(struct inode *inode,
214				 struct ocfs2_extent_rec *rec)
215{
216	struct ocfs2_inode_info *oi = OCFS2_I(inode);
217	struct ocfs2_extent_map *em = &oi->ip_extent_map;
218	struct ocfs2_extent_map_item *emi, *new_emi = NULL;
219	struct ocfs2_extent_map_item ins;
220
221	ins.ei_cpos = le32_to_cpu(rec->e_cpos);
222	ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
223					       le64_to_cpu(rec->e_blkno));
224	ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
225	ins.ei_flags = rec->e_flags;
226
227search:
228	spin_lock(&oi->ip_lock);
229
230	list_for_each_entry(emi, &em->em_list, ei_list) {
231		if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
232			list_move(&emi->ei_list, &em->em_list);
233			spin_unlock(&oi->ip_lock);
234			goto out;
235		}
236	}
237
238	/*
239	 * No item could be merged.
240	 *
241	 * Either allocate and add a new item, or overwrite the last recently
242	 * inserted.
243	 */
244
245	if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
246		if (new_emi == NULL) {
247			spin_unlock(&oi->ip_lock);
248
249			new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
250			if (new_emi == NULL)
251				goto out;
252
253			goto search;
254		}
255
256		ocfs2_copy_emi_fields(new_emi, &ins);
257		list_add(&new_emi->ei_list, &em->em_list);
258		em->em_num_items++;
259		new_emi = NULL;
260	} else {
261		BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
262		emi = list_entry(em->em_list.prev,
263				 struct ocfs2_extent_map_item, ei_list);
264		list_move(&emi->ei_list, &em->em_list);
265		ocfs2_copy_emi_fields(emi, &ins);
266	}
267
268	spin_unlock(&oi->ip_lock);
269
270out:
271	kfree(new_emi);
272}
273
274static int ocfs2_last_eb_is_empty(struct inode *inode,
275				  struct ocfs2_dinode *di)
276{
277	int ret, next_free;
278	u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
279	struct buffer_head *eb_bh = NULL;
280	struct ocfs2_extent_block *eb;
281	struct ocfs2_extent_list *el;
282
283	ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
284	if (ret) {
285		mlog_errno(ret);
286		goto out;
287	}
288
289	eb = (struct ocfs2_extent_block *) eb_bh->b_data;
290	el = &eb->h_list;
291
292	if (el->l_tree_depth) {
293		ocfs2_error(inode->i_sb,
294			    "Inode %lu has non zero tree depth in leaf block %llu\n",
295			    inode->i_ino,
296			    (unsigned long long)eb_bh->b_blocknr);
297		ret = -EROFS;
298		goto out;
299	}
300
301	next_free = le16_to_cpu(el->l_next_free_rec);
302
303	if (next_free == 0 ||
304	    (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
305		ret = 1;
306
307out:
308	brelse(eb_bh);
309	return ret;
310}
311
312/*
313 * Return the 1st index within el which contains an extent start
314 * larger than v_cluster.
315 */
316static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
317				       u32 v_cluster)
318{
319	int i;
320	struct ocfs2_extent_rec *rec;
321
322	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
323		rec = &el->l_recs[i];
324
325		if (v_cluster < le32_to_cpu(rec->e_cpos))
326			break;
327	}
328
329	return i;
330}
331
332/*
333 * Figure out the size of a hole which starts at v_cluster within the given
334 * extent list.
335 *
336 * If there is no more allocation past v_cluster, we return the maximum
337 * cluster size minus v_cluster.
338 *
339 * If we have in-inode extents, then el points to the dinode list and
340 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
341 * containing el.
342 */
343int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
344			       struct ocfs2_extent_list *el,
345			       struct buffer_head *eb_bh,
346			       u32 v_cluster,
347			       u32 *num_clusters)
348{
349	int ret, i;
350	struct buffer_head *next_eb_bh = NULL;
351	struct ocfs2_extent_block *eb, *next_eb;
352
353	i = ocfs2_search_for_hole_index(el, v_cluster);
354
355	if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
356		eb = (struct ocfs2_extent_block *)eb_bh->b_data;
357
358		/*
359		 * Check the next leaf for any extents.
360		 */
361
362		if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
363			goto no_more_extents;
364
365		ret = ocfs2_read_extent_block(ci,
366					      le64_to_cpu(eb->h_next_leaf_blk),
367					      &next_eb_bh);
368		if (ret) {
369			mlog_errno(ret);
370			goto out;
371		}
372
373		next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
374		el = &next_eb->h_list;
375		i = ocfs2_search_for_hole_index(el, v_cluster);
376	}
377
378no_more_extents:
379	if (i == le16_to_cpu(el->l_next_free_rec)) {
380		/*
381		 * We're at the end of our existing allocation. Just
382		 * return the maximum number of clusters we could
383		 * possibly allocate.
384		 */
385		*num_clusters = UINT_MAX - v_cluster;
386	} else {
387		*num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
388	}
389
390	ret = 0;
391out:
392	brelse(next_eb_bh);
393	return ret;
394}
395
396static int ocfs2_get_clusters_nocache(struct inode *inode,
397				      struct buffer_head *di_bh,
398				      u32 v_cluster, unsigned int *hole_len,
399				      struct ocfs2_extent_rec *ret_rec,
400				      unsigned int *is_last)
401{
402	int i, ret, tree_height, len;
403	struct ocfs2_dinode *di;
404	struct ocfs2_extent_block *eb;
405	struct ocfs2_extent_list *el;
406	struct ocfs2_extent_rec *rec;
407	struct buffer_head *eb_bh = NULL;
408
409	memset(ret_rec, 0, sizeof(*ret_rec));
410	if (is_last)
411		*is_last = 0;
412
413	di = (struct ocfs2_dinode *) di_bh->b_data;
414	el = &di->id2.i_list;
415	tree_height = le16_to_cpu(el->l_tree_depth);
416
417	if (tree_height > 0) {
418		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
419				      &eb_bh);
420		if (ret) {
421			mlog_errno(ret);
422			goto out;
423		}
424
425		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
426		el = &eb->h_list;
427
428		if (el->l_tree_depth) {
429			ocfs2_error(inode->i_sb,
430				    "Inode %lu has non zero tree depth in leaf block %llu\n",
431				    inode->i_ino,
432				    (unsigned long long)eb_bh->b_blocknr);
433			ret = -EROFS;
434			goto out;
435		}
436	}
437
438	i = ocfs2_search_extent_list(el, v_cluster);
439	if (i == -1) {
440		/*
441		 * Holes can be larger than the maximum size of an
442		 * extent, so we return their lengths in a separate
443		 * field.
444		 */
445		if (hole_len) {
446			ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
447							 el, eb_bh,
448							 v_cluster, &len);
449			if (ret) {
450				mlog_errno(ret);
451				goto out;
452			}
453
454			*hole_len = len;
455		}
456		goto out_hole;
457	}
458
459	rec = &el->l_recs[i];
460
461	BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
462
463	if (!rec->e_blkno) {
464		ocfs2_error(inode->i_sb,
465			    "Inode %lu has bad extent record (%u, %u, 0)\n",
466			    inode->i_ino,
467			    le32_to_cpu(rec->e_cpos),
468			    ocfs2_rec_clusters(el, rec));
469		ret = -EROFS;
470		goto out;
471	}
472
473	*ret_rec = *rec;
474
475	/*
476	 * Checking for last extent is potentially expensive - we
477	 * might have to look at the next leaf over to see if it's
478	 * empty.
479	 *
480	 * The first two checks are to see whether the caller even
481	 * cares for this information, and if the extent is at least
482	 * the last in it's list.
483	 *
484	 * If those hold true, then the extent is last if any of the
485	 * additional conditions hold true:
486	 *  - Extent list is in-inode
487	 *  - Extent list is right-most
488	 *  - Extent list is 2nd to rightmost, with empty right-most
489	 */
490	if (is_last) {
491		if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
492			if (tree_height == 0)
493				*is_last = 1;
494			else if (eb->h_blkno == di->i_last_eb_blk)
495				*is_last = 1;
496			else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
497				ret = ocfs2_last_eb_is_empty(inode, di);
498				if (ret < 0) {
499					mlog_errno(ret);
500					goto out;
501				}
502				if (ret == 1)
503					*is_last = 1;
504			}
505		}
506	}
507
508out_hole:
509	ret = 0;
510out:
511	brelse(eb_bh);
512	return ret;
513}
514
515static void ocfs2_relative_extent_offsets(struct super_block *sb,
516					  u32 v_cluster,
517					  struct ocfs2_extent_rec *rec,
518					  u32 *p_cluster, u32 *num_clusters)
519
520{
521	u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
522
523	*p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
524	*p_cluster = *p_cluster + coff;
525
526	if (num_clusters)
527		*num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
528}
529
530int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
531			     u32 *p_cluster, u32 *num_clusters,
532			     struct ocfs2_extent_list *el,
533			     unsigned int *extent_flags)
534{
535	int ret = 0, i;
536	struct buffer_head *eb_bh = NULL;
537	struct ocfs2_extent_block *eb;
538	struct ocfs2_extent_rec *rec;
539	u32 coff;
540
541	if (el->l_tree_depth) {
542		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
543				      &eb_bh);
544		if (ret) {
545			mlog_errno(ret);
546			goto out;
547		}
548
549		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
550		el = &eb->h_list;
551
552		if (el->l_tree_depth) {
553			ocfs2_error(inode->i_sb,
554				    "Inode %lu has non zero tree depth in xattr leaf block %llu\n",
555				    inode->i_ino,
556				    (unsigned long long)eb_bh->b_blocknr);
557			ret = -EROFS;
558			goto out;
559		}
560	}
561
562	i = ocfs2_search_extent_list(el, v_cluster);
563	if (i == -1) {
564		ret = -EROFS;
565		mlog_errno(ret);
566		goto out;
567	} else {
568		rec = &el->l_recs[i];
569		BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
570
571		if (!rec->e_blkno) {
572			ocfs2_error(inode->i_sb,
573				    "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
574				    inode->i_ino,
575				    le32_to_cpu(rec->e_cpos),
576				    ocfs2_rec_clusters(el, rec));
577			ret = -EROFS;
578			goto out;
579		}
580		coff = v_cluster - le32_to_cpu(rec->e_cpos);
581		*p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
582						    le64_to_cpu(rec->e_blkno));
583		*p_cluster = *p_cluster + coff;
584		if (num_clusters)
585			*num_clusters = ocfs2_rec_clusters(el, rec) - coff;
586
587		if (extent_flags)
588			*extent_flags = rec->e_flags;
589	}
590out:
591	brelse(eb_bh);
592	return ret;
593}
594
595int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
596		       u32 *p_cluster, u32 *num_clusters,
597		       unsigned int *extent_flags)
598{
599	int ret;
600	unsigned int hole_len, flags = 0;
601	struct buffer_head *di_bh = NULL;
602	struct ocfs2_extent_rec rec;
603
604	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
605		ret = -ERANGE;
606		mlog_errno(ret);
607		goto out;
608	}
609
610	ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
611				      num_clusters, extent_flags);
612	if (ret == 0)
613		goto out;
614
615	ret = ocfs2_read_inode_block(inode, &di_bh);
616	if (ret) {
617		mlog_errno(ret);
618		goto out;
619	}
620
621	ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
622					 &rec, NULL);
623	if (ret) {
624		mlog_errno(ret);
625		goto out;
626	}
627
628	if (rec.e_blkno == 0ULL) {
629		/*
630		 * A hole was found. Return some canned values that
631		 * callers can key on. If asked for, num_clusters will
632		 * be populated with the size of the hole.
633		 */
634		*p_cluster = 0;
635		if (num_clusters) {
636			*num_clusters = hole_len;
637		}
638	} else {
639		ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
640					      p_cluster, num_clusters);
641		flags = rec.e_flags;
642
643		ocfs2_extent_map_insert_rec(inode, &rec);
644	}
645
646	if (extent_flags)
647		*extent_flags = flags;
648
649out:
650	brelse(di_bh);
651	return ret;
652}
653
654/*
655 * This expects alloc_sem to be held. The allocation cannot change at
656 * all while the map is in the process of being updated.
657 */
658int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
659				u64 *ret_count, unsigned int *extent_flags)
660{
661	int ret;
662	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
663	u32 cpos, num_clusters, p_cluster;
664	u64 boff = 0;
665
666	cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
667
668	ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
669				 extent_flags);
670	if (ret) {
671		mlog_errno(ret);
672		goto out;
673	}
674
675	/*
676	 * p_cluster == 0 indicates a hole.
677	 */
678	if (p_cluster) {
679		boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
680		boff += (v_blkno & (u64)(bpc - 1));
681	}
682
683	*p_blkno = boff;
684
685	if (ret_count) {
686		*ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
687		*ret_count -= v_blkno & (u64)(bpc - 1);
688	}
689
690out:
691	return ret;
692}
693
694/*
695 * The ocfs2_fiemap_inline() may be a little bit misleading, since
696 * it not only handles the fiemap for inlined files, but also deals
697 * with the fast symlink, cause they have no difference for extent
698 * mapping per se.
699 */
700static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
701			       struct fiemap_extent_info *fieinfo,
702			       u64 map_start)
703{
704	int ret;
705	unsigned int id_count;
706	struct ocfs2_dinode *di;
707	u64 phys;
708	u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
709	struct ocfs2_inode_info *oi = OCFS2_I(inode);
710
711	di = (struct ocfs2_dinode *)di_bh->b_data;
712	if (ocfs2_inode_is_fast_symlink(inode))
713		id_count = ocfs2_fast_symlink_chars(inode->i_sb);
714	else
715		id_count = le16_to_cpu(di->id2.i_data.id_count);
716
717	if (map_start < id_count) {
718		phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
719		if (ocfs2_inode_is_fast_symlink(inode))
720			phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
721		else
722			phys += offsetof(struct ocfs2_dinode,
723					 id2.i_data.id_data);
724
725		ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
726					      flags);
727		if (ret < 0)
728			return ret;
729	}
730
731	return 0;
732}
733
734int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
735		 u64 map_start, u64 map_len)
736{
737	int ret, is_last;
738	u32 mapping_end, cpos;
739	unsigned int hole_size;
740	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
741	u64 len_bytes, phys_bytes, virt_bytes;
742	struct buffer_head *di_bh = NULL;
743	struct ocfs2_extent_rec rec;
744
745	ret = fiemap_prep(inode, fieinfo, map_start, &map_len, 0);
746	if (ret)
747		return ret;
748
749	ret = ocfs2_inode_lock(inode, &di_bh, 0);
750	if (ret) {
751		mlog_errno(ret);
752		goto out;
753	}
754
755	down_read(&OCFS2_I(inode)->ip_alloc_sem);
756
757	/*
758	 * Handle inline-data and fast symlink separately.
759	 */
760	if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
761	    ocfs2_inode_is_fast_symlink(inode)) {
762		ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
763		goto out_unlock;
764	}
765
766	cpos = map_start >> osb->s_clustersize_bits;
767	mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
768					       map_start + map_len);
769	is_last = 0;
770	while (cpos < mapping_end && !is_last) {
771		u32 fe_flags;
772
773		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
774						 &hole_size, &rec, &is_last);
775		if (ret) {
776			mlog_errno(ret);
777			goto out_unlock;
778		}
779
780		if (rec.e_blkno == 0ULL) {
781			cpos += hole_size;
782			continue;
783		}
784
785		fe_flags = 0;
786		if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
787			fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
788		if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
789			fe_flags |= FIEMAP_EXTENT_SHARED;
790		if (is_last)
791			fe_flags |= FIEMAP_EXTENT_LAST;
792		len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
793		phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
794		virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
795
796		ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
797					      len_bytes, fe_flags);
798		if (ret)
799			break;
800
801		cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
802	}
803
804	if (ret > 0)
805		ret = 0;
806
807out_unlock:
808	brelse(di_bh);
809
810	up_read(&OCFS2_I(inode)->ip_alloc_sem);
811
812	ocfs2_inode_unlock(inode, 0);
813out:
814
815	return ret;
816}
817
818/* Is IO overwriting allocated blocks? */
819int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
820		       u64 map_start, u64 map_len)
821{
822	int ret = 0, is_last;
823	u32 mapping_end, cpos;
824	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
825	struct ocfs2_extent_rec rec;
826
827	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
828		if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
829			return ret;
830		else
831			return -EAGAIN;
832	}
833
834	cpos = map_start >> osb->s_clustersize_bits;
835	mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
836					       map_start + map_len);
837	is_last = 0;
838	while (cpos < mapping_end && !is_last) {
839		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
840						 NULL, &rec, &is_last);
841		if (ret) {
842			mlog_errno(ret);
843			goto out;
844		}
845
846		if (rec.e_blkno == 0ULL)
847			break;
848
849		if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
850			break;
851
852		cpos = le32_to_cpu(rec.e_cpos) +
853			le16_to_cpu(rec.e_leaf_clusters);
854	}
855
856	if (cpos < mapping_end)
857		ret = -EAGAIN;
858out:
859	return ret;
860}
861
862int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
863{
864	struct inode *inode = file->f_mapping->host;
865	int ret;
866	unsigned int is_last = 0, is_data = 0;
867	u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
868	u32 cpos, cend, clen, hole_size;
869	u64 extoff, extlen;
870	struct buffer_head *di_bh = NULL;
871	struct ocfs2_extent_rec rec;
872
873	BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
874
875	ret = ocfs2_inode_lock(inode, &di_bh, 0);
876	if (ret) {
877		mlog_errno(ret);
878		goto out;
879	}
880
881	down_read(&OCFS2_I(inode)->ip_alloc_sem);
882
883	if (*offset >= i_size_read(inode)) {
884		ret = -ENXIO;
885		goto out_unlock;
886	}
887
888	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
889		if (whence == SEEK_HOLE)
890			*offset = i_size_read(inode);
891		goto out_unlock;
892	}
893
894	clen = 0;
895	cpos = *offset >> cs_bits;
896	cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
897
898	while (cpos < cend && !is_last) {
899		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
900						 &rec, &is_last);
901		if (ret) {
902			mlog_errno(ret);
903			goto out_unlock;
904		}
905
906		extoff = cpos;
907		extoff <<= cs_bits;
908
909		if (rec.e_blkno == 0ULL) {
910			clen = hole_size;
911			is_data = 0;
912		} else {
913			clen = le16_to_cpu(rec.e_leaf_clusters) -
914				(cpos - le32_to_cpu(rec.e_cpos));
915			is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
916		}
917
918		if ((!is_data && whence == SEEK_HOLE) ||
919		    (is_data && whence == SEEK_DATA)) {
920			if (extoff > *offset)
921				*offset = extoff;
922			goto out_unlock;
923		}
924
925		if (!is_last)
926			cpos += clen;
927	}
928
929	if (whence == SEEK_HOLE) {
930		extoff = cpos;
931		extoff <<= cs_bits;
932		extlen = clen;
933		extlen <<=  cs_bits;
934
935		if ((extoff + extlen) > i_size_read(inode))
936			extlen = i_size_read(inode) - extoff;
937		extoff += extlen;
938		if (extoff > *offset)
939			*offset = extoff;
940		goto out_unlock;
941	}
942
943	ret = -ENXIO;
944
945out_unlock:
946
947	brelse(di_bh);
948
949	up_read(&OCFS2_I(inode)->ip_alloc_sem);
950
951	ocfs2_inode_unlock(inode, 0);
952out:
953	return ret;
954}
955
956int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
957			   struct buffer_head *bhs[], int flags,
958			   int (*validate)(struct super_block *sb,
959					   struct buffer_head *bh))
960{
961	int rc = 0;
962	u64 p_block, p_count;
963	int i, count, done = 0;
964
965	trace_ocfs2_read_virt_blocks(
966	     inode, (unsigned long long)v_block, nr, bhs, flags,
967	     validate);
968
969	if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
970	    i_size_read(inode)) {
971		BUG_ON(!(flags & OCFS2_BH_READAHEAD));
972		goto out;
973	}
974
975	while (done < nr) {
976		down_read(&OCFS2_I(inode)->ip_alloc_sem);
977		rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
978						 &p_block, &p_count, NULL);
979		up_read(&OCFS2_I(inode)->ip_alloc_sem);
980		if (rc) {
981			mlog_errno(rc);
982			break;
983		}
984
985		if (!p_block) {
986			rc = -EIO;
987			mlog(ML_ERROR,
988			     "Inode #%llu contains a hole at offset %llu\n",
989			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
990			     (unsigned long long)(v_block + done) <<
991			     inode->i_sb->s_blocksize_bits);
992			break;
993		}
994
995		count = nr - done;
996		if (p_count < count)
997			count = p_count;
998
999		/*
1000		 * If the caller passed us bhs, they should have come
1001		 * from a previous readahead call to this function.  Thus,
1002		 * they should have the right b_blocknr.
1003		 */
1004		for (i = 0; i < count; i++) {
1005			if (!bhs[done + i])
1006				continue;
1007			BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
1008		}
1009
1010		rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
1011				       bhs + done, flags, validate);
1012		if (rc) {
1013			mlog_errno(rc);
1014			break;
1015		}
1016		done += count;
1017	}
1018
1019out:
1020	return rc;
1021}
1022
1023
1024