1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 Juniper Networks, Inc.
5 * Copyright (c) 2022-2023 Klara, Inc.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include "opt_tarfs.h"
30#include "opt_zstdio.h"
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/counter.h>
35#include <sys/bio.h>
36#include <sys/buf.h>
37#include <sys/malloc.h>
38#include <sys/mount.h>
39#include <sys/sysctl.h>
40#include <sys/uio.h>
41#include <sys/vnode.h>
42
43#if defined(ZSTDIO)
44#define TARFS_ZIO 1
45#else
46#undef TARFS_ZIO
47#endif
48
49#ifdef ZSTDIO
50#define ZSTD_STATIC_LINKING_ONLY
51#include <contrib/zstd/lib/zstd.h>
52#endif
53
54#include <fs/tarfs/tarfs.h>
55#include <fs/tarfs/tarfs_dbg.h>
56
57#ifdef TARFS_DEBUG
58SYSCTL_NODE(_vfs_tarfs, OID_AUTO, zio, CTLFLAG_RD, 0,
59    "Tar filesystem decompression layer");
60COUNTER_U64_DEFINE_EARLY(tarfs_zio_inflated);
61SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, inflated, CTLFLAG_RD,
62    &tarfs_zio_inflated, "Amount of compressed data inflated.");
63COUNTER_U64_DEFINE_EARLY(tarfs_zio_consumed);
64SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, consumed, CTLFLAG_RD,
65    &tarfs_zio_consumed, "Amount of compressed data consumed.");
66COUNTER_U64_DEFINE_EARLY(tarfs_zio_bounced);
67SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, bounced, CTLFLAG_RD,
68    &tarfs_zio_bounced, "Amount of decompressed data bounced.");
69
70static int
71tarfs_sysctl_handle_zio_reset(SYSCTL_HANDLER_ARGS)
72{
73	unsigned int tmp;
74	int error;
75
76	tmp = 0;
77	if ((error = SYSCTL_OUT(req, &tmp, sizeof(tmp))) != 0)
78		return (error);
79	if (req->newptr != NULL) {
80		if ((error = SYSCTL_IN(req, &tmp, sizeof(tmp))) != 0)
81			return (error);
82		counter_u64_zero(tarfs_zio_inflated);
83		counter_u64_zero(tarfs_zio_consumed);
84		counter_u64_zero(tarfs_zio_bounced);
85	}
86	return (0);
87}
88
89SYSCTL_PROC(_vfs_tarfs_zio, OID_AUTO, reset,
90    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW,
91    NULL, 0, tarfs_sysctl_handle_zio_reset, "IU",
92    "Reset compression counters.");
93#endif
94
95MALLOC_DEFINE(M_TARFSZSTATE, "tarfs zstate", "tarfs decompression state");
96MALLOC_DEFINE(M_TARFSZBUF, "tarfs zbuf", "tarfs decompression buffers");
97
98#define XZ_MAGIC		(uint8_t[]){ 0xfd, 0x37, 0x7a, 0x58, 0x5a }
99#define ZLIB_MAGIC		(uint8_t[]){ 0x1f, 0x8b, 0x08 }
100#define ZSTD_MAGIC		(uint8_t[]){ 0x28, 0xb5, 0x2f, 0xfd }
101
102#ifdef ZSTDIO
103struct tarfs_zstd {
104	ZSTD_DStream *zds;
105};
106#endif
107
108/* XXX review use of curthread / uio_td / td_cred */
109
110/*
111 * Reads from the tar file according to the provided uio.  If the archive
112 * is compressed and raw is false, reads the decompressed stream;
113 * otherwise, reads directly from the original file.  Returns 0 on success
114 * and a positive errno value on failure.
115 */
116int
117tarfs_io_read(struct tarfs_mount *tmp, bool raw, struct uio *uiop)
118{
119	void *rl = NULL;
120	off_t off = uiop->uio_offset;
121	size_t len = uiop->uio_resid;
122	int error;
123
124	if (raw || tmp->znode == NULL) {
125		rl = vn_rangelock_rlock(tmp->vp, off, off + len);
126		error = vn_lock(tmp->vp, LK_SHARED);
127		if (error == 0) {
128			error = VOP_READ(tmp->vp, uiop, IO_NODELOCKED,
129			    uiop->uio_td->td_ucred);
130			VOP_UNLOCK(tmp->vp);
131		}
132		vn_rangelock_unlock(tmp->vp, rl);
133	} else {
134		error = vn_lock(tmp->znode, LK_EXCLUSIVE);
135		if (error == 0) {
136			error = VOP_READ(tmp->znode, uiop,
137			    IO_DIRECT | IO_NODELOCKED,
138			    uiop->uio_td->td_ucred);
139			VOP_UNLOCK(tmp->znode);
140		}
141	}
142	TARFS_DPF(IO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
143	    (size_t)off, len, error, uiop->uio_resid);
144	return (error);
145}
146
147/*
148 * Reads from the tar file into the provided buffer.  If the archive is
149 * compressed and raw is false, reads the decompressed stream; otherwise,
150 * reads directly from the original file.  Returns the number of bytes
151 * read on success, 0 on EOF, and a negative errno value on failure.
152 */
153ssize_t
154tarfs_io_read_buf(struct tarfs_mount *tmp, bool raw,
155    void *buf, off_t off, size_t len)
156{
157	struct uio auio;
158	struct iovec aiov;
159	ssize_t res;
160	int error;
161
162	if (len == 0) {
163		TARFS_DPF(IO, "%s(%zu, %zu) null\n", __func__,
164		    (size_t)off, len);
165		return (0);
166	}
167	aiov.iov_base = buf;
168	aiov.iov_len = len;
169	auio.uio_iov = &aiov;
170	auio.uio_iovcnt = 1;
171	auio.uio_offset = off;
172	auio.uio_segflg = UIO_SYSSPACE;
173	auio.uio_rw = UIO_READ;
174	auio.uio_resid = len;
175	auio.uio_td = curthread;
176	error = tarfs_io_read(tmp, raw, &auio);
177	if (error != 0) {
178		TARFS_DPF(IO, "%s(%zu, %zu) error %d\n", __func__,
179		    (size_t)off, len, error);
180		return (-error);
181	}
182	res = len - auio.uio_resid;
183	if (res == 0 && len != 0) {
184		TARFS_DPF(IO, "%s(%zu, %zu) eof\n", __func__,
185		    (size_t)off, len);
186	} else {
187		TARFS_DPF(IO, "%s(%zu, %zu) read %zd | %*D\n", __func__,
188		    (size_t)off, len, res,
189		    (int)(res > 8 ? 8 : res), (uint8_t *)buf, " ");
190	}
191	return (res);
192}
193
194#ifdef ZSTDIO
195static void *
196tarfs_zstate_alloc(void *opaque, size_t size)
197{
198
199	(void)opaque;
200	return (malloc(size, M_TARFSZSTATE, M_WAITOK));
201}
202#endif
203
204#ifdef ZSTDIO
205static void
206tarfs_zstate_free(void *opaque, void *address)
207{
208
209	(void)opaque;
210	free(address, M_TARFSZSTATE);
211}
212#endif
213
214#ifdef ZSTDIO
215static ZSTD_customMem tarfs_zstd_mem = {
216	tarfs_zstate_alloc,
217	tarfs_zstate_free,
218	NULL,
219};
220#endif
221
222#ifdef TARFS_ZIO
223/*
224 * Updates the decompression frame index, recording the current input and
225 * output offsets in a new index entry, and growing the index if
226 * necessary.
227 */
228static void
229tarfs_zio_update_index(struct tarfs_zio *zio, off_t i, off_t o)
230{
231
232	if (++zio->curidx >= zio->nidx) {
233		if (++zio->nidx > zio->szidx) {
234			zio->szidx *= 2;
235			zio->idx = realloc(zio->idx,
236			    zio->szidx * sizeof(*zio->idx),
237			    M_TARFSZSTATE, M_ZERO | M_WAITOK);
238			TARFS_DPF(ALLOC, "%s: resized zio index\n", __func__);
239		}
240		zio->idx[zio->curidx].i = i;
241		zio->idx[zio->curidx].o = o;
242		TARFS_DPF(ZIDX, "%s: index %u = i %zu o %zu\n", __func__,
243		    zio->curidx, (size_t)zio->idx[zio->curidx].i,
244		    (size_t)zio->idx[zio->curidx].o);
245	}
246	MPASS(zio->idx[zio->curidx].i == i);
247	MPASS(zio->idx[zio->curidx].o == o);
248}
249#endif
250
251/*
252 * VOP_ACCESS for zio node.
253 */
254static int
255tarfs_zaccess(struct vop_access_args *ap)
256{
257	struct vnode *vp = ap->a_vp;
258	struct tarfs_zio *zio = vp->v_data;
259	struct tarfs_mount *tmp = zio->tmp;
260	accmode_t accmode = ap->a_accmode;
261	int error = EPERM;
262
263	if (accmode == VREAD) {
264		error = vn_lock(tmp->vp, LK_SHARED);
265		if (error == 0) {
266			error = VOP_ACCESS(tmp->vp, accmode, ap->a_cred, ap->a_td);
267			VOP_UNLOCK(tmp->vp);
268		}
269	}
270	TARFS_DPF(ZIO, "%s(%d) = %d\n", __func__, accmode, error);
271	return (error);
272}
273
274/*
275 * VOP_GETATTR for zio node.
276 */
277static int
278tarfs_zgetattr(struct vop_getattr_args *ap)
279{
280	struct vattr va;
281	struct vnode *vp = ap->a_vp;
282	struct tarfs_zio *zio = vp->v_data;
283	struct tarfs_mount *tmp = zio->tmp;
284	struct vattr *vap = ap->a_vap;
285	int error = 0;
286
287	VATTR_NULL(vap);
288	error = vn_lock(tmp->vp, LK_SHARED);
289	if (error == 0) {
290		error = VOP_GETATTR(tmp->vp, &va, ap->a_cred);
291		VOP_UNLOCK(tmp->vp);
292		if (error == 0) {
293			vap->va_type = VREG;
294			vap->va_mode = va.va_mode;
295			vap->va_nlink = 1;
296			vap->va_gid = va.va_gid;
297			vap->va_uid = va.va_uid;
298			vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
299			vap->va_fileid = TARFS_ZIOINO;
300			vap->va_size = zio->idx[zio->nidx - 1].o;
301			vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
302			vap->va_atime = va.va_atime;
303			vap->va_ctime = va.va_ctime;
304			vap->va_mtime = va.va_mtime;
305			vap->va_birthtime = tmp->root->birthtime;
306			vap->va_bytes = va.va_bytes;
307		}
308	}
309	TARFS_DPF(ZIO, "%s() = %d\n", __func__, error);
310	return (error);
311}
312
313#ifdef ZSTDIO
314/*
315 * VOP_READ for zio node, zstd edition.
316 */
317static int
318tarfs_zread_zstd(struct tarfs_zio *zio, struct uio *uiop)
319{
320	void *ibuf = NULL, *obuf = NULL, *rl = NULL;
321	struct uio auio;
322	struct iovec aiov;
323	struct tarfs_mount *tmp = zio->tmp;
324	struct tarfs_zstd *zstd = zio->zstd;
325	struct thread *td = curthread;
326	ZSTD_inBuffer zib;
327	ZSTD_outBuffer zob;
328	off_t zsize;
329	off_t ipos, opos;
330	size_t ilen, olen;
331	size_t zerror;
332	off_t off = uiop->uio_offset;
333	size_t len = uiop->uio_resid;
334	size_t resid = uiop->uio_resid;
335	size_t bsize;
336	int error;
337	bool reset = false;
338
339	/* do we have to rewind? */
340	if (off < zio->opos) {
341		while (zio->curidx > 0 && off < zio->idx[zio->curidx].o)
342			zio->curidx--;
343		reset = true;
344	}
345	/* advance to the nearest index entry */
346	if (off > zio->opos) {
347		// XXX maybe do a binary search instead
348		while (zio->curidx < zio->nidx - 1 &&
349		    off >= zio->idx[zio->curidx + 1].o) {
350			zio->curidx++;
351			reset = true;
352		}
353	}
354	/* reset the decompression stream if needed */
355	if (reset) {
356		zio->ipos = zio->idx[zio->curidx].i;
357		zio->opos = zio->idx[zio->curidx].o;
358		ZSTD_resetDStream(zstd->zds);
359		TARFS_DPF(ZIDX, "%s: skipping to index %u = i %zu o %zu\n", __func__,
360		    zio->curidx, (size_t)zio->ipos, (size_t)zio->opos);
361	} else {
362		TARFS_DPF(ZIDX, "%s: continuing at i %zu o %zu\n", __func__,
363		    (size_t)zio->ipos, (size_t)zio->opos);
364	}
365
366	/*
367	 * Set up a temporary buffer for compressed data.  Use the size
368	 * recommended by the zstd library; this is usually 128 kB, but
369	 * just in case, make sure it's a multiple of the page size and no
370	 * larger than MAXBSIZE.
371	 */
372	bsize = roundup(ZSTD_CStreamOutSize(), PAGE_SIZE);
373	if (bsize > MAXBSIZE)
374		bsize = MAXBSIZE;
375	ibuf = malloc(bsize, M_TEMP, M_WAITOK);
376	zib.src = NULL;
377	zib.size = 0;
378	zib.pos = 0;
379
380	/*
381	 * Set up the decompression buffer.  If the target is not in
382	 * kernel space, we will have to set up a bounce buffer.
383	 *
384	 * TODO: to avoid using a bounce buffer, map destination pages
385	 * using vm_fault_quick_hold_pages().
386	 */
387	MPASS(zio->opos <= off);
388	MPASS(uiop->uio_iovcnt == 1);
389	MPASS(uiop->uio_iov->iov_len >= len);
390	if (uiop->uio_segflg == UIO_SYSSPACE) {
391		zob.dst = uiop->uio_iov->iov_base;
392	} else {
393		TARFS_DPF(BOUNCE, "%s: allocating %zu-byte bounce buffer\n",
394		    __func__, len);
395		zob.dst = obuf = malloc(len, M_TEMP, M_WAITOK);
396	}
397	zob.size = len;
398	zob.pos = 0;
399
400	/* lock tarball */
401	rl = vn_rangelock_rlock(tmp->vp, zio->ipos, OFF_MAX);
402	error = vn_lock(tmp->vp, LK_SHARED);
403	if (error != 0) {
404		goto fail_unlocked;
405	}
406	/* check size */
407	error = vn_getsize_locked(tmp->vp, &zsize, td->td_ucred);
408	if (error != 0) {
409		goto fail;
410	}
411	if (zio->ipos >= zsize) {
412		/* beyond EOF */
413		goto fail;
414	}
415
416	while (resid > 0) {
417		if (zib.pos == zib.size) {
418			/* request data from the underlying file */
419			aiov.iov_base = ibuf;
420			aiov.iov_len = bsize;
421			auio.uio_iov = &aiov;
422			auio.uio_iovcnt = 1;
423			auio.uio_offset = zio->ipos;
424			auio.uio_segflg = UIO_SYSSPACE;
425			auio.uio_rw = UIO_READ;
426			auio.uio_resid = aiov.iov_len;
427			auio.uio_td = td;
428			error = VOP_READ(tmp->vp, &auio, IO_NODELOCKED,
429			    td->td_ucred);
430			if (error != 0)
431				goto fail;
432			TARFS_DPF(ZIO, "%s: req %zu+%zu got %zu+%zu\n", __func__,
433			    (size_t)zio->ipos, bsize,
434			    (size_t)zio->ipos, bsize - auio.uio_resid);
435			zib.src = ibuf;
436			zib.size = bsize - auio.uio_resid;
437			zib.pos = 0;
438		}
439		MPASS(zib.pos <= zib.size);
440		if (zib.pos == zib.size) {
441			TARFS_DPF(ZIO, "%s: end of file after i %zu o %zu\n", __func__,
442			    (size_t)zio->ipos, (size_t)zio->opos);
443			goto fail;
444		}
445		if (zio->opos < off) {
446			/* to be discarded */
447			zob.size = min(off - zio->opos, len);
448			zob.pos = 0;
449		} else {
450			zob.size = len;
451			zob.pos = zio->opos - off;
452		}
453		ipos = zib.pos;
454		opos = zob.pos;
455		/* decompress as much as possible */
456		zerror = ZSTD_decompressStream(zstd->zds, &zob, &zib);
457		zio->ipos += ilen = zib.pos - ipos;
458		zio->opos += olen = zob.pos - opos;
459		if (zio->opos > off)
460			resid -= olen;
461		if (ZSTD_isError(zerror)) {
462			TARFS_DPF(ZIO, "%s: inflate failed after i %zu o %zu: %s\n", __func__,
463			    (size_t)zio->ipos, (size_t)zio->opos, ZSTD_getErrorName(zerror));
464			error = EIO;
465			goto fail;
466		}
467		if (zerror == 0 && olen == 0) {
468			TARFS_DPF(ZIO, "%s: end of stream after i %zu o %zu\n", __func__,
469			    (size_t)zio->ipos, (size_t)zio->opos);
470			break;
471		}
472		if (zerror == 0) {
473			TARFS_DPF(ZIO, "%s: end of frame after i %zu o %zu\n", __func__,
474			    (size_t)zio->ipos, (size_t)zio->opos);
475			tarfs_zio_update_index(zio, zio->ipos, zio->opos);
476		}
477		TARFS_DPF(ZIO, "%s: inflated %zu\n", __func__, olen);
478#ifdef TARFS_DEBUG
479		counter_u64_add(tarfs_zio_inflated, olen);
480#endif
481	}
482fail:
483	VOP_UNLOCK(tmp->vp);
484fail_unlocked:
485	if (error == 0) {
486		if (uiop->uio_segflg == UIO_SYSSPACE) {
487			uiop->uio_resid = resid;
488		} else if (len > resid) {
489			TARFS_DPF(BOUNCE, "%s: bounced %zu bytes\n", __func__,
490			    len - resid);
491			error = uiomove(obuf, len - resid, uiop);
492#ifdef TARFS_DEBUG
493			counter_u64_add(tarfs_zio_bounced, len - resid);
494#endif
495		}
496	}
497	if (obuf != NULL) {
498		TARFS_DPF(BOUNCE, "%s: freeing bounce buffer\n", __func__);
499		free(obuf, M_TEMP);
500	}
501	if (rl != NULL)
502		vn_rangelock_unlock(tmp->vp, rl);
503	if (ibuf != NULL)
504		free(ibuf, M_TEMP);
505	TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
506	    (size_t)off, len, error, uiop->uio_resid);
507#ifdef TARFS_DEBUG
508	counter_u64_add(tarfs_zio_consumed, len - uiop->uio_resid);
509#endif
510	if (error != 0) {
511		zio->curidx = 0;
512		zio->ipos = zio->idx[0].i;
513		zio->opos = zio->idx[0].o;
514		ZSTD_resetDStream(zstd->zds);
515	}
516	return (error);
517}
518#endif
519
520/*
521 * VOP_READ for zio node.
522 */
523static int
524tarfs_zread(struct vop_read_args *ap)
525{
526#if defined(TARFS_DEBUG) || defined(ZSTDIO)
527	struct vnode *vp = ap->a_vp;
528	struct tarfs_zio *zio = vp->v_data;
529	struct uio *uiop = ap->a_uio;
530#endif
531#ifdef TARFS_DEBUG
532	off_t off = uiop->uio_offset;
533	size_t len = uiop->uio_resid;
534#endif
535	int error;
536
537	TARFS_DPF(ZIO, "%s(%zu, %zu)\n", __func__,
538	    (size_t)off, len);
539#ifdef ZSTDIO
540	if (zio->zstd != NULL) {
541		error = tarfs_zread_zstd(zio, uiop);
542	} else
543#endif
544		error = EFTYPE;
545	TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
546	    (size_t)off, len, error, uiop->uio_resid);
547	return (error);
548}
549
550/*
551 * VOP_RECLAIM for zio node.
552 */
553static int
554tarfs_zreclaim(struct vop_reclaim_args *ap)
555{
556	struct vnode *vp = ap->a_vp;
557
558	TARFS_DPF(ZIO, "%s(%p)\n", __func__, vp);
559	vp->v_data = NULL;
560	return (0);
561}
562
563/*
564 * VOP_STRATEGY for zio node.
565 */
566static int
567tarfs_zstrategy(struct vop_strategy_args *ap)
568{
569	struct uio auio;
570	struct iovec iov;
571	struct vnode *vp = ap->a_vp;
572	struct buf *bp = ap->a_bp;
573	off_t off;
574	size_t len;
575	int error;
576
577	iov.iov_base = bp->b_data;
578	iov.iov_len = bp->b_bcount;
579	off = bp->b_iooffset;
580	len = bp->b_bcount;
581	bp->b_resid = len;
582	auio.uio_iov = &iov;
583	auio.uio_iovcnt = 1;
584	auio.uio_offset = off;
585	auio.uio_resid = len;
586	auio.uio_segflg = UIO_SYSSPACE;
587	auio.uio_rw = UIO_READ;
588	auio.uio_td = curthread;
589	error = VOP_READ(vp, &auio, IO_DIRECT | IO_NODELOCKED, bp->b_rcred);
590	bp->b_flags |= B_DONE;
591	if (error != 0) {
592		bp->b_ioflags |= BIO_ERROR;
593		bp->b_error = error;
594	}
595	return (0);
596}
597
598static struct vop_vector tarfs_znodeops = {
599	.vop_default =		&default_vnodeops,
600
601	.vop_access =		tarfs_zaccess,
602	.vop_getattr =		tarfs_zgetattr,
603	.vop_read =		tarfs_zread,
604	.vop_reclaim =		tarfs_zreclaim,
605	.vop_strategy =		tarfs_zstrategy,
606};
607VFS_VOP_VECTOR_REGISTER(tarfs_znodeops);
608
609#ifdef TARFS_ZIO
610/*
611 * Initializes the decompression layer.
612 */
613static struct tarfs_zio *
614tarfs_zio_init(struct tarfs_mount *tmp, off_t i, off_t o)
615{
616	struct tarfs_zio *zio;
617	struct vnode *zvp;
618
619	zio = malloc(sizeof(*zio), M_TARFSZSTATE, M_ZERO | M_WAITOK);
620	TARFS_DPF(ALLOC, "%s: allocated zio\n", __func__);
621	zio->tmp = tmp;
622	zio->szidx = 128;
623	zio->idx = malloc(zio->szidx * sizeof(*zio->idx), M_TARFSZSTATE,
624	    M_ZERO | M_WAITOK);
625	zio->curidx = 0;
626	zio->nidx = 1;
627	zio->idx[zio->curidx].i = zio->ipos = i;
628	zio->idx[zio->curidx].o = zio->opos = o;
629	tmp->zio = zio;
630	TARFS_DPF(ALLOC, "%s: allocated zio index\n", __func__);
631	(void)getnewvnode("tarfsz", tmp->vfs, &tarfs_znodeops, &zvp);
632	zvp->v_data = zio;
633	zvp->v_type = VREG;
634	zvp->v_mount = tmp->vfs;
635	vn_set_state(zvp, VSTATE_CONSTRUCTED);
636	tmp->znode = zvp;
637	TARFS_DPF(ZIO, "%s: created zio node\n", __func__);
638	return (zio);
639}
640#endif
641
642/*
643 * Initializes the I/O layer, including decompression if the signature of
644 * a supported compression format is detected.  Returns 0 on success and a
645 * positive errno value on failure.
646 */
647int
648tarfs_io_init(struct tarfs_mount *tmp)
649{
650	uint8_t *block;
651#ifdef TARFS_ZIO
652	struct tarfs_zio *zio = NULL;
653#endif
654	ssize_t res;
655	int error = 0;
656
657	block = malloc(tmp->iosize, M_TEMP, M_ZERO | M_WAITOK);
658	res = tarfs_io_read_buf(tmp, true, block, 0, tmp->iosize);
659	if (res < 0) {
660		return (-res);
661	}
662	if (memcmp(block, XZ_MAGIC, sizeof(XZ_MAGIC)) == 0) {
663		printf("xz compression not supported\n");
664		error = EOPNOTSUPP;
665		goto bad;
666	} else if (memcmp(block, ZLIB_MAGIC, sizeof(ZLIB_MAGIC)) == 0) {
667		printf("zlib compression not supported\n");
668		error = EOPNOTSUPP;
669		goto bad;
670	} else if (memcmp(block, ZSTD_MAGIC, sizeof(ZSTD_MAGIC)) == 0) {
671#ifdef ZSTDIO
672		zio = tarfs_zio_init(tmp, 0, 0);
673		zio->zstd = malloc(sizeof(*zio->zstd), M_TARFSZSTATE, M_WAITOK);
674		zio->zstd->zds = ZSTD_createDStream_advanced(tarfs_zstd_mem);
675		(void)ZSTD_initDStream(zio->zstd->zds);
676#else
677		printf("zstd compression not supported\n");
678		error = EOPNOTSUPP;
679		goto bad;
680#endif
681	}
682bad:
683	free(block, M_TEMP);
684	return (error);
685}
686
687#ifdef TARFS_ZIO
688/*
689 * Tears down the decompression layer.
690 */
691static int
692tarfs_zio_fini(struct tarfs_mount *tmp)
693{
694	struct tarfs_zio *zio = tmp->zio;
695	int error = 0;
696
697	if (tmp->znode != NULL) {
698		error = vn_lock(tmp->znode, LK_EXCLUSIVE);
699		if (error != 0) {
700			TARFS_DPF(ALLOC, "%s: failed to lock znode", __func__);
701			return (error);
702		}
703		tmp->znode->v_mount = NULL;
704		vgone(tmp->znode);
705		vput(tmp->znode);
706		tmp->znode = NULL;
707	}
708#ifdef ZSTDIO
709	if (zio->zstd != NULL) {
710		TARFS_DPF(ALLOC, "%s: freeing zstd state\n", __func__);
711		ZSTD_freeDStream(zio->zstd->zds);
712		free(zio->zstd, M_TARFSZSTATE);
713	}
714#endif
715	if (zio->idx != NULL) {
716		TARFS_DPF(ALLOC, "%s: freeing index\n", __func__);
717		free(zio->idx, M_TARFSZSTATE);
718	}
719	TARFS_DPF(ALLOC, "%s: freeing zio\n", __func__);
720	free(zio, M_TARFSZSTATE);
721	tmp->zio = NULL;
722	return (error);
723}
724#endif
725
726/*
727 * Tears down the I/O layer, including the decompression layer if
728 * applicable.
729 */
730int
731tarfs_io_fini(struct tarfs_mount *tmp)
732{
733	int error = 0;
734
735#ifdef TARFS_ZIO
736	if (tmp->zio != NULL) {
737		error = tarfs_zio_fini(tmp);
738	}
739#endif
740	return (error);
741}
742