1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2018-2019 HUAWEI, Inc.
4 *             https://www.huawei.com/
5 */
6#include "internal.h"
7#include <asm/unaligned.h>
8#include <trace/events/erofs.h>
9
10struct z_erofs_maprecorder {
11	struct inode *inode;
12	struct erofs_map_blocks *map;
13	void *kaddr;
14
15	unsigned long lcn;
16	/* compression extent information gathered */
17	u8  type, headtype;
18	u16 clusterofs;
19	u16 delta[2];
20	erofs_blk_t pblk, compressedblks;
21	erofs_off_t nextpackoff;
22	bool partialref;
23};
24
25static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m,
26				      unsigned long lcn)
27{
28	struct inode *const inode = m->inode;
29	struct erofs_inode *const vi = EROFS_I(inode);
30	const erofs_off_t pos = Z_EROFS_FULL_INDEX_ALIGN(erofs_iloc(inode) +
31			vi->inode_isize + vi->xattr_isize) +
32			lcn * sizeof(struct z_erofs_lcluster_index);
33	struct z_erofs_lcluster_index *di;
34	unsigned int advise, type;
35
36	m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
37				      erofs_blknr(inode->i_sb, pos), EROFS_KMAP);
38	if (IS_ERR(m->kaddr))
39		return PTR_ERR(m->kaddr);
40
41	m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index);
42	m->lcn = lcn;
43	di = m->kaddr + erofs_blkoff(inode->i_sb, pos);
44
45	advise = le16_to_cpu(di->di_advise);
46	type = (advise >> Z_EROFS_LI_LCLUSTER_TYPE_BIT) &
47		((1 << Z_EROFS_LI_LCLUSTER_TYPE_BITS) - 1);
48	switch (type) {
49	case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
50		m->clusterofs = 1 << vi->z_logical_clusterbits;
51		m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
52		if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) {
53			if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
54					Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
55				DBG_BUGON(1);
56				return -EFSCORRUPTED;
57			}
58			m->compressedblks = m->delta[0] &
59				~Z_EROFS_LI_D0_CBLKCNT;
60			m->delta[0] = 1;
61		}
62		m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
63		break;
64	case Z_EROFS_LCLUSTER_TYPE_PLAIN:
65	case Z_EROFS_LCLUSTER_TYPE_HEAD1:
66	case Z_EROFS_LCLUSTER_TYPE_HEAD2:
67		if (advise & Z_EROFS_LI_PARTIAL_REF)
68			m->partialref = true;
69		m->clusterofs = le16_to_cpu(di->di_clusterofs);
70		if (m->clusterofs >= 1 << vi->z_logical_clusterbits) {
71			DBG_BUGON(1);
72			return -EFSCORRUPTED;
73		}
74		m->pblk = le32_to_cpu(di->di_u.blkaddr);
75		break;
76	default:
77		DBG_BUGON(1);
78		return -EOPNOTSUPP;
79	}
80	m->type = type;
81	return 0;
82}
83
84static unsigned int decode_compactedbits(unsigned int lobits,
85					 u8 *in, unsigned int pos, u8 *type)
86{
87	const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
88	const unsigned int lo = v & ((1 << lobits) - 1);
89
90	*type = (v >> lobits) & 3;
91	return lo;
92}
93
94static int get_compacted_la_distance(unsigned int lobits,
95				     unsigned int encodebits,
96				     unsigned int vcnt, u8 *in, int i)
97{
98	unsigned int lo, d1 = 0;
99	u8 type;
100
101	DBG_BUGON(i >= vcnt);
102
103	do {
104		lo = decode_compactedbits(lobits, in, encodebits * i, &type);
105
106		if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
107			return d1;
108		++d1;
109	} while (++i < vcnt);
110
111	/* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */
112	if (!(lo & Z_EROFS_LI_D0_CBLKCNT))
113		d1 += lo - 1;
114	return d1;
115}
116
117static int unpack_compacted_index(struct z_erofs_maprecorder *m,
118				  unsigned int amortizedshift,
119				  erofs_off_t pos, bool lookahead)
120{
121	struct erofs_inode *const vi = EROFS_I(m->inode);
122	const unsigned int lclusterbits = vi->z_logical_clusterbits;
123	unsigned int vcnt, base, lo, lobits, encodebits, nblk, eofs;
124	int i;
125	u8 *in, type;
126	bool big_pcluster;
127
128	if (1 << amortizedshift == 4 && lclusterbits <= 14)
129		vcnt = 2;
130	else if (1 << amortizedshift == 2 && lclusterbits <= 12)
131		vcnt = 16;
132	else
133		return -EOPNOTSUPP;
134
135	/* it doesn't equal to round_up(..) */
136	m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
137			 (vcnt << amortizedshift);
138	big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
139	lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U);
140	encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
141	eofs = erofs_blkoff(m->inode->i_sb, pos);
142	base = round_down(eofs, vcnt << amortizedshift);
143	in = m->kaddr + base;
144
145	i = (eofs - base) >> amortizedshift;
146
147	lo = decode_compactedbits(lobits, in, encodebits * i, &type);
148	m->type = type;
149	if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
150		m->clusterofs = 1 << lclusterbits;
151
152		/* figure out lookahead_distance: delta[1] if needed */
153		if (lookahead)
154			m->delta[1] = get_compacted_la_distance(lobits,
155						encodebits, vcnt, in, i);
156		if (lo & Z_EROFS_LI_D0_CBLKCNT) {
157			if (!big_pcluster) {
158				DBG_BUGON(1);
159				return -EFSCORRUPTED;
160			}
161			m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT;
162			m->delta[0] = 1;
163			return 0;
164		} else if (i + 1 != (int)vcnt) {
165			m->delta[0] = lo;
166			return 0;
167		}
168		/*
169		 * since the last lcluster in the pack is special,
170		 * of which lo saves delta[1] rather than delta[0].
171		 * Hence, get delta[0] by the previous lcluster indirectly.
172		 */
173		lo = decode_compactedbits(lobits, in,
174					  encodebits * (i - 1), &type);
175		if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
176			lo = 0;
177		else if (lo & Z_EROFS_LI_D0_CBLKCNT)
178			lo = 1;
179		m->delta[0] = lo + 1;
180		return 0;
181	}
182	m->clusterofs = lo;
183	m->delta[0] = 0;
184	/* figout out blkaddr (pblk) for HEAD lclusters */
185	if (!big_pcluster) {
186		nblk = 1;
187		while (i > 0) {
188			--i;
189			lo = decode_compactedbits(lobits, in,
190						  encodebits * i, &type);
191			if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD)
192				i -= lo;
193
194			if (i >= 0)
195				++nblk;
196		}
197	} else {
198		nblk = 0;
199		while (i > 0) {
200			--i;
201			lo = decode_compactedbits(lobits, in,
202						  encodebits * i, &type);
203			if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
204				if (lo & Z_EROFS_LI_D0_CBLKCNT) {
205					--i;
206					nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT;
207					continue;
208				}
209				/* bigpcluster shouldn't have plain d0 == 1 */
210				if (lo <= 1) {
211					DBG_BUGON(1);
212					return -EFSCORRUPTED;
213				}
214				i -= lo - 2;
215				continue;
216			}
217			++nblk;
218		}
219	}
220	in += (vcnt << amortizedshift) - sizeof(__le32);
221	m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
222	return 0;
223}
224
225static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
226					 unsigned long lcn, bool lookahead)
227{
228	struct inode *const inode = m->inode;
229	struct erofs_inode *const vi = EROFS_I(inode);
230	const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
231		ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
232	unsigned int totalidx = erofs_iblks(inode);
233	unsigned int compacted_4b_initial, compacted_2b;
234	unsigned int amortizedshift;
235	erofs_off_t pos;
236
237	if (lcn >= totalidx)
238		return -EINVAL;
239
240	m->lcn = lcn;
241	/* used to align to 32-byte (compacted_2b) alignment */
242	compacted_4b_initial = (32 - ebase % 32) / 4;
243	if (compacted_4b_initial == 32 / 4)
244		compacted_4b_initial = 0;
245
246	if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
247	    compacted_4b_initial < totalidx)
248		compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
249	else
250		compacted_2b = 0;
251
252	pos = ebase;
253	if (lcn < compacted_4b_initial) {
254		amortizedshift = 2;
255		goto out;
256	}
257	pos += compacted_4b_initial * 4;
258	lcn -= compacted_4b_initial;
259
260	if (lcn < compacted_2b) {
261		amortizedshift = 1;
262		goto out;
263	}
264	pos += compacted_2b * 2;
265	lcn -= compacted_2b;
266	amortizedshift = 2;
267out:
268	pos += lcn * (1 << amortizedshift);
269	m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
270				      erofs_blknr(inode->i_sb, pos), EROFS_KMAP);
271	if (IS_ERR(m->kaddr))
272		return PTR_ERR(m->kaddr);
273	return unpack_compacted_index(m, amortizedshift, pos, lookahead);
274}
275
276static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
277					   unsigned int lcn, bool lookahead)
278{
279	switch (EROFS_I(m->inode)->datalayout) {
280	case EROFS_INODE_COMPRESSED_FULL:
281		return z_erofs_load_full_lcluster(m, lcn);
282	case EROFS_INODE_COMPRESSED_COMPACT:
283		return z_erofs_load_compact_lcluster(m, lcn, lookahead);
284	default:
285		return -EINVAL;
286	}
287}
288
289static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
290				   unsigned int lookback_distance)
291{
292	struct super_block *sb = m->inode->i_sb;
293	struct erofs_inode *const vi = EROFS_I(m->inode);
294	const unsigned int lclusterbits = vi->z_logical_clusterbits;
295
296	while (m->lcn >= lookback_distance) {
297		unsigned long lcn = m->lcn - lookback_distance;
298		int err;
299
300		err = z_erofs_load_lcluster_from_disk(m, lcn, false);
301		if (err)
302			return err;
303
304		switch (m->type) {
305		case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
306			lookback_distance = m->delta[0];
307			if (!lookback_distance)
308				goto err_bogus;
309			continue;
310		case Z_EROFS_LCLUSTER_TYPE_PLAIN:
311		case Z_EROFS_LCLUSTER_TYPE_HEAD1:
312		case Z_EROFS_LCLUSTER_TYPE_HEAD2:
313			m->headtype = m->type;
314			m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
315			return 0;
316		default:
317			erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu",
318				  m->type, lcn, vi->nid);
319			DBG_BUGON(1);
320			return -EOPNOTSUPP;
321		}
322	}
323err_bogus:
324	erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu",
325		  lookback_distance, m->lcn, vi->nid);
326	DBG_BUGON(1);
327	return -EFSCORRUPTED;
328}
329
330static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
331					    unsigned int initial_lcn)
332{
333	struct super_block *sb = m->inode->i_sb;
334	struct erofs_inode *const vi = EROFS_I(m->inode);
335	struct erofs_map_blocks *const map = m->map;
336	const unsigned int lclusterbits = vi->z_logical_clusterbits;
337	unsigned long lcn;
338	int err;
339
340	DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN &&
341		  m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1 &&
342		  m->type != Z_EROFS_LCLUSTER_TYPE_HEAD2);
343	DBG_BUGON(m->type != m->headtype);
344
345	if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
346	    ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1) &&
347	     !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
348	    ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) &&
349	     !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
350		map->m_plen = 1ULL << lclusterbits;
351		return 0;
352	}
353	lcn = m->lcn + 1;
354	if (m->compressedblks)
355		goto out;
356
357	err = z_erofs_load_lcluster_from_disk(m, lcn, false);
358	if (err)
359		return err;
360
361	/*
362	 * If the 1st NONHEAD lcluster has already been handled initially w/o
363	 * valid compressedblks, which means at least it mustn't be CBLKCNT, or
364	 * an internal implemenatation error is detected.
365	 *
366	 * The following code can also handle it properly anyway, but let's
367	 * BUG_ON in the debugging mode only for developers to notice that.
368	 */
369	DBG_BUGON(lcn == initial_lcn &&
370		  m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
371
372	switch (m->type) {
373	case Z_EROFS_LCLUSTER_TYPE_PLAIN:
374	case Z_EROFS_LCLUSTER_TYPE_HEAD1:
375	case Z_EROFS_LCLUSTER_TYPE_HEAD2:
376		/*
377		 * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
378		 * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
379		 */
380		m->compressedblks = 1 << (lclusterbits - sb->s_blocksize_bits);
381		break;
382	case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
383		if (m->delta[0] != 1)
384			goto err_bonus_cblkcnt;
385		if (m->compressedblks)
386			break;
387		fallthrough;
388	default:
389		erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn,
390			  vi->nid);
391		DBG_BUGON(1);
392		return -EFSCORRUPTED;
393	}
394out:
395	map->m_plen = erofs_pos(sb, m->compressedblks);
396	return 0;
397err_bonus_cblkcnt:
398	erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
399	DBG_BUGON(1);
400	return -EFSCORRUPTED;
401}
402
403static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
404{
405	struct inode *inode = m->inode;
406	struct erofs_inode *vi = EROFS_I(inode);
407	struct erofs_map_blocks *map = m->map;
408	unsigned int lclusterbits = vi->z_logical_clusterbits;
409	u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits;
410	int err;
411
412	do {
413		/* handle the last EOF pcluster (no next HEAD lcluster) */
414		if ((lcn << lclusterbits) >= inode->i_size) {
415			map->m_llen = inode->i_size - map->m_la;
416			return 0;
417		}
418
419		err = z_erofs_load_lcluster_from_disk(m, lcn, true);
420		if (err)
421			return err;
422
423		if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
424			DBG_BUGON(!m->delta[1] &&
425				  m->clusterofs != 1 << lclusterbits);
426		} else if (m->type == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
427			   m->type == Z_EROFS_LCLUSTER_TYPE_HEAD1 ||
428			   m->type == Z_EROFS_LCLUSTER_TYPE_HEAD2) {
429			/* go on until the next HEAD lcluster */
430			if (lcn != headlcn)
431				break;
432			m->delta[1] = 1;
433		} else {
434			erofs_err(inode->i_sb, "unknown type %u @ lcn %llu of nid %llu",
435				  m->type, lcn, vi->nid);
436			DBG_BUGON(1);
437			return -EOPNOTSUPP;
438		}
439		lcn += m->delta[1];
440	} while (m->delta[1]);
441
442	map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la;
443	return 0;
444}
445
446static int z_erofs_do_map_blocks(struct inode *inode,
447				 struct erofs_map_blocks *map, int flags)
448{
449	struct erofs_inode *const vi = EROFS_I(inode);
450	bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
451	bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
452	struct z_erofs_maprecorder m = {
453		.inode = inode,
454		.map = map,
455	};
456	int err = 0;
457	unsigned int lclusterbits, endoff, afmt;
458	unsigned long initial_lcn;
459	unsigned long long ofs, end;
460
461	lclusterbits = vi->z_logical_clusterbits;
462	ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
463	initial_lcn = ofs >> lclusterbits;
464	endoff = ofs & ((1 << lclusterbits) - 1);
465
466	err = z_erofs_load_lcluster_from_disk(&m, initial_lcn, false);
467	if (err)
468		goto unmap_out;
469
470	if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL))
471		vi->z_idataoff = m.nextpackoff;
472
473	map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
474	end = (m.lcn + 1ULL) << lclusterbits;
475
476	switch (m.type) {
477	case Z_EROFS_LCLUSTER_TYPE_PLAIN:
478	case Z_EROFS_LCLUSTER_TYPE_HEAD1:
479	case Z_EROFS_LCLUSTER_TYPE_HEAD2:
480		if (endoff >= m.clusterofs) {
481			m.headtype = m.type;
482			map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
483			/*
484			 * For ztailpacking files, in order to inline data more
485			 * effectively, special EOF lclusters are now supported
486			 * which can have three parts at most.
487			 */
488			if (ztailpacking && end > inode->i_size)
489				end = inode->i_size;
490			break;
491		}
492		/* m.lcn should be >= 1 if endoff < m.clusterofs */
493		if (!m.lcn) {
494			erofs_err(inode->i_sb,
495				  "invalid logical cluster 0 at nid %llu",
496				  vi->nid);
497			err = -EFSCORRUPTED;
498			goto unmap_out;
499		}
500		end = (m.lcn << lclusterbits) | m.clusterofs;
501		map->m_flags |= EROFS_MAP_FULL_MAPPED;
502		m.delta[0] = 1;
503		fallthrough;
504	case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
505		/* get the corresponding first chunk */
506		err = z_erofs_extent_lookback(&m, m.delta[0]);
507		if (err)
508			goto unmap_out;
509		break;
510	default:
511		erofs_err(inode->i_sb,
512			  "unknown type %u @ offset %llu of nid %llu",
513			  m.type, ofs, vi->nid);
514		err = -EOPNOTSUPP;
515		goto unmap_out;
516	}
517	if (m.partialref)
518		map->m_flags |= EROFS_MAP_PARTIAL_REF;
519	map->m_llen = end - map->m_la;
520
521	if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
522		vi->z_tailextent_headlcn = m.lcn;
523		/* for non-compact indexes, fragmentoff is 64 bits */
524		if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
525			vi->z_fragmentoff |= (u64)m.pblk << 32;
526	}
527	if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
528		map->m_flags |= EROFS_MAP_META;
529		map->m_pa = vi->z_idataoff;
530		map->m_plen = vi->z_idata_size;
531	} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
532		map->m_flags |= EROFS_MAP_FRAGMENT;
533	} else {
534		map->m_pa = erofs_pos(inode->i_sb, m.pblk);
535		err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
536		if (err)
537			goto unmap_out;
538	}
539
540	if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) {
541		if (map->m_llen > map->m_plen) {
542			DBG_BUGON(1);
543			err = -EFSCORRUPTED;
544			goto unmap_out;
545		}
546		afmt = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER ?
547			Z_EROFS_COMPRESSION_INTERLACED :
548			Z_EROFS_COMPRESSION_SHIFTED;
549	} else {
550		afmt = m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2 ?
551			vi->z_algorithmtype[1] : vi->z_algorithmtype[0];
552		if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) {
553			erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu",
554				  afmt, vi->nid);
555			err = -EFSCORRUPTED;
556			goto unmap_out;
557		}
558	}
559	map->m_algorithmformat = afmt;
560
561	if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
562	    ((flags & EROFS_GET_BLOCKS_READMORE) &&
563	     (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA ||
564	      map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE) &&
565	      map->m_llen >= i_blocksize(inode))) {
566		err = z_erofs_get_extent_decompressedlen(&m);
567		if (!err)
568			map->m_flags |= EROFS_MAP_FULL_MAPPED;
569	}
570
571unmap_out:
572	erofs_unmap_metabuf(&m.map->buf);
573	return err;
574}
575
576static int z_erofs_fill_inode_lazy(struct inode *inode)
577{
578	struct erofs_inode *const vi = EROFS_I(inode);
579	struct super_block *const sb = inode->i_sb;
580	int err, headnr;
581	erofs_off_t pos;
582	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
583	void *kaddr;
584	struct z_erofs_map_header *h;
585
586	if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) {
587		/*
588		 * paired with smp_mb() at the end of the function to ensure
589		 * fields will only be observed after the bit is set.
590		 */
591		smp_mb();
592		return 0;
593	}
594
595	if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE))
596		return -ERESTARTSYS;
597
598	err = 0;
599	if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
600		goto out_unlock;
601
602	pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
603	kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(sb, pos), EROFS_KMAP);
604	if (IS_ERR(kaddr)) {
605		err = PTR_ERR(kaddr);
606		goto out_unlock;
607	}
608
609	h = kaddr + erofs_blkoff(sb, pos);
610	/*
611	 * if the highest bit of the 8-byte map header is set, the whole file
612	 * is stored in the packed inode. The rest bits keeps z_fragmentoff.
613	 */
614	if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
615		vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
616		vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
617		vi->z_tailextent_headlcn = 0;
618		goto done;
619	}
620	vi->z_advise = le16_to_cpu(h->h_advise);
621	vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
622	vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
623
624	headnr = 0;
625	if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX ||
626	    vi->z_algorithmtype[++headnr] >= Z_EROFS_COMPRESSION_MAX) {
627		erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
628			  headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
629		err = -EOPNOTSUPP;
630		goto out_put_metabuf;
631	}
632
633	vi->z_logical_clusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 7);
634	if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
635	    vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
636			    Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
637		erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
638			  vi->nid);
639		err = -EFSCORRUPTED;
640		goto out_put_metabuf;
641	}
642	if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT &&
643	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
644	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
645		erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
646			  vi->nid);
647		err = -EFSCORRUPTED;
648		goto out_put_metabuf;
649	}
650
651	if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
652		struct erofs_map_blocks map = {
653			.buf = __EROFS_BUF_INITIALIZER
654		};
655
656		vi->z_idata_size = le16_to_cpu(h->h_idata_size);
657		err = z_erofs_do_map_blocks(inode, &map,
658					    EROFS_GET_BLOCKS_FINDTAIL);
659		erofs_put_metabuf(&map.buf);
660
661		if (!map.m_plen ||
662		    erofs_blkoff(sb, map.m_pa) + map.m_plen > sb->s_blocksize) {
663			erofs_err(sb, "invalid tail-packing pclustersize %llu",
664				  map.m_plen);
665			err = -EFSCORRUPTED;
666		}
667		if (err < 0)
668			goto out_put_metabuf;
669	}
670
671	if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
672	    !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
673		struct erofs_map_blocks map = {
674			.buf = __EROFS_BUF_INITIALIZER
675		};
676
677		vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
678		err = z_erofs_do_map_blocks(inode, &map,
679					    EROFS_GET_BLOCKS_FINDTAIL);
680		erofs_put_metabuf(&map.buf);
681		if (err < 0)
682			goto out_put_metabuf;
683	}
684done:
685	/* paired with smp_mb() at the beginning of the function */
686	smp_mb();
687	set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
688out_put_metabuf:
689	erofs_put_metabuf(&buf);
690out_unlock:
691	clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
692	return err;
693}
694
695int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
696			    int flags)
697{
698	struct erofs_inode *const vi = EROFS_I(inode);
699	int err = 0;
700
701	trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
702
703	/* when trying to read beyond EOF, leave it unmapped */
704	if (map->m_la >= inode->i_size) {
705		map->m_llen = map->m_la + 1 - inode->i_size;
706		map->m_la = inode->i_size;
707		map->m_flags = 0;
708		goto out;
709	}
710
711	err = z_erofs_fill_inode_lazy(inode);
712	if (err)
713		goto out;
714
715	if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
716	    !vi->z_tailextent_headlcn) {
717		map->m_la = 0;
718		map->m_llen = inode->i_size;
719		map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED |
720				EROFS_MAP_FRAGMENT;
721		goto out;
722	}
723
724	err = z_erofs_do_map_blocks(inode, map, flags);
725out:
726	trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
727	return err;
728}
729
730static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
731				loff_t length, unsigned int flags,
732				struct iomap *iomap, struct iomap *srcmap)
733{
734	int ret;
735	struct erofs_map_blocks map = { .m_la = offset };
736
737	ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
738	erofs_put_metabuf(&map.buf);
739	if (ret < 0)
740		return ret;
741
742	iomap->bdev = inode->i_sb->s_bdev;
743	iomap->offset = map.m_la;
744	iomap->length = map.m_llen;
745	if (map.m_flags & EROFS_MAP_MAPPED) {
746		iomap->type = IOMAP_MAPPED;
747		iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ?
748			      IOMAP_NULL_ADDR : map.m_pa;
749	} else {
750		iomap->type = IOMAP_HOLE;
751		iomap->addr = IOMAP_NULL_ADDR;
752		/*
753		 * No strict rule on how to describe extents for post EOF, yet
754		 * we need to do like below. Otherwise, iomap itself will get
755		 * into an endless loop on post EOF.
756		 *
757		 * Calculate the effective offset by subtracting extent start
758		 * (map.m_la) from the requested offset, and add it to length.
759		 * (NB: offset >= map.m_la always)
760		 */
761		if (iomap->offset >= inode->i_size)
762			iomap->length = length + offset - map.m_la;
763	}
764	iomap->flags = 0;
765	return 0;
766}
767
768const struct iomap_ops z_erofs_iomap_report_ops = {
769	.iomap_begin = z_erofs_iomap_begin_report,
770};
771