1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2004 Max Khon
5 * Copyright (c) 2014 Juniper Networks, Inc.
6 * Copyright (c) 2006-2016 Maxim Sobolev <sobomax@FreeBSD.org>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include <sys/param.h>
35#include <sys/bio.h>
36#include <sys/endian.h>
37#include <sys/errno.h>
38#include <sys/kernel.h>
39#include <sys/lock.h>
40#include <sys/mutex.h>
41#include <sys/malloc.h>
42#include <sys/sysctl.h>
43#include <sys/systm.h>
44#include <sys/kthread.h>
45
46#include <geom/geom.h>
47
48#include <geom/uzip/g_uzip.h>
49#include <geom/uzip/g_uzip_cloop.h>
50#include <geom/uzip/g_uzip_softc.h>
51#include <geom/uzip/g_uzip_dapi.h>
52#include <geom/uzip/g_uzip_zlib.h>
53#include <geom/uzip/g_uzip_lzma.h>
54#include <geom/uzip/g_uzip_wrkthr.h>
55
56#include "opt_geom.h"
57
58MALLOC_DEFINE(M_GEOM_UZIP, "geom_uzip", "GEOM UZIP data structures");
59
60FEATURE(geom_uzip, "GEOM read-only compressed disks support");
61
62struct g_uzip_blk {
63        uint64_t offset;
64        uint32_t blen;
65        unsigned char last:1;
66        unsigned char padded:1;
67#define BLEN_UNDEF      UINT32_MAX
68};
69
70#ifndef ABS
71#define	ABS(a)			((a) < 0 ? -(a) : (a))
72#endif
73
74#define BLK_IN_RANGE(mcn, bcn, ilen)	\
75    (((bcn) != BLEN_UNDEF) && ( \
76	((ilen) >= 0 && (mcn >= bcn) && (mcn <= ((intmax_t)(bcn) + (ilen)))) || \
77	((ilen) < 0 && (mcn <= bcn) && (mcn >= ((intmax_t)(bcn) + (ilen)))) \
78    ))
79
80#ifdef GEOM_UZIP_DEBUG
81# define GEOM_UZIP_DBG_DEFAULT	3
82#else
83# define GEOM_UZIP_DBG_DEFAULT	0
84#endif
85
86#define	GUZ_DBG_ERR	1
87#define	GUZ_DBG_INFO	2
88#define	GUZ_DBG_IO	3
89#define	GUZ_DBG_TOC	4
90
91#define	GUZ_DEV_SUFX	".uzip"
92#define	GUZ_DEV_NAME(p)	(p GUZ_DEV_SUFX)
93
94static char g_uzip_attach_to[MAXPATHLEN] = {"*"};
95static char g_uzip_noattach_to[MAXPATHLEN] = {GUZ_DEV_NAME("*")};
96TUNABLE_STR("kern.geom.uzip.attach_to", g_uzip_attach_to,
97    sizeof(g_uzip_attach_to));
98TUNABLE_STR("kern.geom.uzip.noattach_to", g_uzip_noattach_to,
99    sizeof(g_uzip_noattach_to));
100
101SYSCTL_DECL(_kern_geom);
102SYSCTL_NODE(_kern_geom, OID_AUTO, uzip, CTLFLAG_RW, 0, "GEOM_UZIP stuff");
103static u_int g_uzip_debug = GEOM_UZIP_DBG_DEFAULT;
104SYSCTL_UINT(_kern_geom_uzip, OID_AUTO, debug, CTLFLAG_RWTUN, &g_uzip_debug, 0,
105    "Debug level (0-4)");
106static u_int g_uzip_debug_block = BLEN_UNDEF;
107SYSCTL_UINT(_kern_geom_uzip, OID_AUTO, debug_block, CTLFLAG_RWTUN,
108    &g_uzip_debug_block, 0, "Debug operations around specific cluster#");
109
110#define	DPRINTF(lvl, a)		\
111	if ((lvl) <= g_uzip_debug) { \
112		printf a; \
113	}
114#define	DPRINTF_BLK(lvl, cn, a)	\
115	if ((lvl) <= g_uzip_debug || \
116	    BLK_IN_RANGE(cn, g_uzip_debug_block, 8) || \
117	    BLK_IN_RANGE(cn, g_uzip_debug_block, -8)) { \
118		printf a; \
119	}
120#define	DPRINTF_BRNG(lvl, bcn, ecn, a) \
121	KASSERT(bcn < ecn, ("DPRINTF_BRNG: invalid range (%ju, %ju)", \
122	    (uintmax_t)bcn, (uintmax_t)ecn)); \
123	if (((lvl) <= g_uzip_debug) || \
124	    BLK_IN_RANGE(g_uzip_debug_block, bcn, \
125	     (intmax_t)ecn - (intmax_t)bcn)) { \
126		printf a; \
127	}
128
129#define	UZIP_CLASS_NAME	"UZIP"
130
131/*
132 * Maximum allowed valid block size (to prevent foot-shooting)
133 */
134#define	MAX_BLKSZ	(MAXPHYS)
135
136static char CLOOP_MAGIC_START[] = "#!/bin/sh\n";
137
138static void g_uzip_read_done(struct bio *bp);
139static void g_uzip_do(struct g_uzip_softc *, struct bio *bp);
140
141static void
142g_uzip_softc_free(struct g_geom *gp)
143{
144	struct g_uzip_softc *sc = gp->softc;
145
146	DPRINTF(GUZ_DBG_INFO, ("%s: %d requests, %d cached\n",
147	    gp->name, sc->req_total, sc->req_cached));
148
149	mtx_lock(&sc->queue_mtx);
150	sc->wrkthr_flags |= GUZ_SHUTDOWN;
151	wakeup(sc);
152	while (!(sc->wrkthr_flags & GUZ_EXITING)) {
153		msleep(sc->procp, &sc->queue_mtx, PRIBIO, "guzfree",
154		    hz / 10);
155	}
156	mtx_unlock(&sc->queue_mtx);
157
158	sc->dcp->free(sc->dcp);
159	free(sc->toc, M_GEOM_UZIP);
160	mtx_destroy(&sc->queue_mtx);
161	mtx_destroy(&sc->last_mtx);
162	free(sc->last_buf, M_GEOM_UZIP);
163	free(sc, M_GEOM_UZIP);
164	gp->softc = NULL;
165}
166
167static int
168g_uzip_cached(struct g_geom *gp, struct bio *bp)
169{
170	struct g_uzip_softc *sc;
171	off_t ofs;
172	size_t blk, blkofs, usz;
173
174	sc = gp->softc;
175	ofs = bp->bio_offset + bp->bio_completed;
176	blk = ofs / sc->blksz;
177	mtx_lock(&sc->last_mtx);
178	if (blk == sc->last_blk) {
179		blkofs = ofs % sc->blksz;
180		usz = sc->blksz - blkofs;
181		if (bp->bio_resid < usz)
182			usz = bp->bio_resid;
183		memcpy(bp->bio_data + bp->bio_completed, sc->last_buf + blkofs,
184		    usz);
185		sc->req_cached++;
186		mtx_unlock(&sc->last_mtx);
187
188		DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: offset=%jd: got %jd bytes "
189		    "from cache\n", __func__, gp->name, bp, (intmax_t)ofs,
190		    (intmax_t)usz));
191
192		bp->bio_completed += usz;
193		bp->bio_resid -= usz;
194
195		if (bp->bio_resid == 0) {
196			g_io_deliver(bp, 0);
197			return (1);
198		}
199	} else
200		mtx_unlock(&sc->last_mtx);
201
202	return (0);
203}
204
205#define BLK_ENDS(sc, bi)	((sc)->toc[(bi)].offset + \
206    (sc)->toc[(bi)].blen)
207
208#define BLK_IS_CONT(sc, bi)	(BLK_ENDS((sc), (bi) - 1) == \
209    (sc)->toc[(bi)].offset)
210#define	BLK_IS_NIL(sc, bi)	((sc)->toc[(bi)].blen == 0)
211
212#define TOFF_2_BOFF(sc, pp, bi)	    ((sc)->toc[(bi)].offset - \
213    (sc)->toc[(bi)].offset % (pp)->sectorsize)
214#define	TLEN_2_BLEN(sc, pp, bp, ei) roundup(BLK_ENDS((sc), (ei)) - \
215    (bp)->bio_offset, (pp)->sectorsize)
216
217static int
218g_uzip_request(struct g_geom *gp, struct bio *bp)
219{
220	struct g_uzip_softc *sc;
221	struct bio *bp2;
222	struct g_consumer *cp;
223	struct g_provider *pp;
224	off_t ofs, start_blk_ofs;
225	size_t i, start_blk, end_blk, zsize;
226
227	if (g_uzip_cached(gp, bp) != 0)
228		return (1);
229
230	sc = gp->softc;
231
232	cp = LIST_FIRST(&gp->consumer);
233	pp = cp->provider;
234
235	ofs = bp->bio_offset + bp->bio_completed;
236	start_blk = ofs / sc->blksz;
237	KASSERT(start_blk < sc->nblocks, ("start_blk out of range"));
238	end_blk = howmany(ofs + bp->bio_resid, sc->blksz);
239	KASSERT(end_blk <= sc->nblocks, ("end_blk out of range"));
240
241	for (; BLK_IS_NIL(sc, start_blk) && start_blk < end_blk; start_blk++) {
242		/* Fill in any leading Nil blocks */
243		start_blk_ofs = ofs % sc->blksz;
244		zsize = MIN(sc->blksz - start_blk_ofs, bp->bio_resid);
245		DPRINTF_BLK(GUZ_DBG_IO, start_blk, ("%s/%s: %p/%ju: "
246		    "filling %ju zero bytes\n", __func__, gp->name, gp,
247		    (uintmax_t)bp->bio_completed, (uintmax_t)zsize));
248		bzero(bp->bio_data + bp->bio_completed, zsize);
249		bp->bio_completed += zsize;
250		bp->bio_resid -= zsize;
251		ofs += zsize;
252	}
253
254	if (start_blk == end_blk) {
255		KASSERT(bp->bio_resid == 0, ("bp->bio_resid is invalid"));
256		/*
257		 * No non-Nil data is left, complete request immediately.
258		 */
259		DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: all done returning %ju "
260		    "bytes\n", __func__, gp->name, gp,
261		    (uintmax_t)bp->bio_completed));
262		g_io_deliver(bp, 0);
263		return (1);
264	}
265
266	for (i = start_blk + 1; i < end_blk; i++) {
267		/* Trim discontinuous areas if any */
268		if (!BLK_IS_CONT(sc, i)) {
269			end_blk = i;
270			break;
271		}
272	}
273
274	DPRINTF_BRNG(GUZ_DBG_IO, start_blk, end_blk, ("%s/%s: %p: "
275	    "start=%u (%ju[%jd]), end=%u (%ju)\n", __func__, gp->name, bp,
276	    (u_int)start_blk, (uintmax_t)sc->toc[start_blk].offset,
277	    (intmax_t)sc->toc[start_blk].blen,
278	    (u_int)end_blk, (uintmax_t)BLK_ENDS(sc, end_blk - 1)));
279
280	bp2 = g_clone_bio(bp);
281	if (bp2 == NULL) {
282		g_io_deliver(bp, ENOMEM);
283		return (1);
284	}
285	bp2->bio_done = g_uzip_read_done;
286
287	bp2->bio_offset = TOFF_2_BOFF(sc, pp, start_blk);
288	while (1) {
289		bp2->bio_length = TLEN_2_BLEN(sc, pp, bp2, end_blk - 1);
290		if (bp2->bio_length <= MAXPHYS) {
291			break;
292		}
293		if (end_blk == (start_blk + 1)) {
294			break;
295		}
296		end_blk--;
297	}
298
299	DPRINTF(GUZ_DBG_IO, ("%s/%s: bp2->bio_length = %jd, "
300	    "bp2->bio_offset = %jd\n", __func__, gp->name,
301	    (intmax_t)bp2->bio_length, (intmax_t)bp2->bio_offset));
302
303	bp2->bio_data = malloc(bp2->bio_length, M_GEOM_UZIP, M_NOWAIT);
304	if (bp2->bio_data == NULL) {
305		g_destroy_bio(bp2);
306		g_io_deliver(bp, ENOMEM);
307		return (1);
308	}
309
310	DPRINTF_BRNG(GUZ_DBG_IO, start_blk, end_blk, ("%s/%s: %p: "
311	    "reading %jd bytes from offset %jd\n", __func__, gp->name, bp,
312	    (intmax_t)bp2->bio_length, (intmax_t)bp2->bio_offset));
313
314	g_io_request(bp2, cp);
315	return (0);
316}
317
318static void
319g_uzip_read_done(struct bio *bp)
320{
321	struct bio *bp2;
322	struct g_geom *gp;
323	struct g_uzip_softc *sc;
324
325	bp2 = bp->bio_parent;
326	gp = bp2->bio_to->geom;
327	sc = gp->softc;
328
329	mtx_lock(&sc->queue_mtx);
330	bioq_disksort(&sc->bio_queue, bp);
331	mtx_unlock(&sc->queue_mtx);
332	wakeup(sc);
333}
334
335static int
336g_uzip_memvcmp(const void *memory, unsigned char val, size_t size)
337{
338	const u_char *mm;
339
340	mm = (const u_char *)memory;
341	return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
342}
343
344static void
345g_uzip_do(struct g_uzip_softc *sc, struct bio *bp)
346{
347	struct bio *bp2;
348	struct g_provider *pp;
349	struct g_consumer *cp;
350	struct g_geom *gp;
351	char *data, *data2;
352	off_t ofs;
353	size_t blk, blkofs, len, ulen, firstblk;
354	int err;
355
356	bp2 = bp->bio_parent;
357	gp = bp2->bio_to->geom;
358
359	cp = LIST_FIRST(&gp->consumer);
360	pp = cp->provider;
361
362	bp2->bio_error = bp->bio_error;
363	if (bp2->bio_error != 0)
364		goto done;
365
366	/* Make sure there's forward progress. */
367	if (bp->bio_completed == 0) {
368		bp2->bio_error = ECANCELED;
369		goto done;
370	}
371
372	ofs = bp2->bio_offset + bp2->bio_completed;
373	firstblk = blk = ofs / sc->blksz;
374	blkofs = ofs % sc->blksz;
375	data = bp->bio_data + sc->toc[blk].offset % pp->sectorsize;
376	data2 = bp2->bio_data + bp2->bio_completed;
377	while (bp->bio_completed && bp2->bio_resid) {
378		if (blk > firstblk && !BLK_IS_CONT(sc, blk)) {
379			DPRINTF_BLK(GUZ_DBG_IO, blk, ("%s/%s: %p: backref'ed "
380			    "cluster #%u requested, looping around\n",
381			    __func__, gp->name, bp2, (u_int)blk));
382			goto done;
383		}
384		ulen = MIN(sc->blksz - blkofs, bp2->bio_resid);
385		len = sc->toc[blk].blen;
386		DPRINTF(GUZ_DBG_IO, ("%s/%s: %p/%ju: data2=%p, ulen=%u, "
387		    "data=%p, len=%u\n", __func__, gp->name, gp,
388		    bp->bio_completed, data2, (u_int)ulen, data, (u_int)len));
389		if (len == 0) {
390			/* All zero block: no cache update */
391zero_block:
392			bzero(data2, ulen);
393		} else if (len <= bp->bio_completed) {
394			mtx_lock(&sc->last_mtx);
395			err = sc->dcp->decompress(sc->dcp, gp->name, data,
396			    len, sc->last_buf);
397			if (err != 0 && sc->toc[blk].last != 0) {
398				/*
399				 * Last block decompression has failed, check
400				 * if it's just zero padding.
401				 */
402				if (g_uzip_memvcmp(data, '\0', len) == 0) {
403					sc->toc[blk].blen = 0;
404					sc->last_blk = -1;
405					mtx_unlock(&sc->last_mtx);
406					len = 0;
407					goto zero_block;
408				}
409			}
410			if (err != 0) {
411				sc->last_blk = -1;
412				mtx_unlock(&sc->last_mtx);
413				bp2->bio_error = EILSEQ;
414				DPRINTF(GUZ_DBG_ERR, ("%s/%s: decompress"
415				    "(%p, %ju, %ju) failed\n", __func__,
416				    gp->name, sc->dcp, (uintmax_t)blk,
417				    (uintmax_t)len));
418				goto done;
419			}
420			sc->last_blk = blk;
421			memcpy(data2, sc->last_buf + blkofs, ulen);
422			mtx_unlock(&sc->last_mtx);
423			err = sc->dcp->rewind(sc->dcp, gp->name);
424			if (err != 0) {
425				bp2->bio_error = EILSEQ;
426				DPRINTF(GUZ_DBG_ERR, ("%s/%s: rewind(%p) "
427				    "failed\n", __func__, gp->name, sc->dcp));
428				goto done;
429			}
430			data += len;
431		} else
432			break;
433
434		data2 += ulen;
435		bp2->bio_completed += ulen;
436		bp2->bio_resid -= ulen;
437		bp->bio_completed -= len;
438		blkofs = 0;
439		blk++;
440	}
441
442done:
443	/* Finish processing the request. */
444	free(bp->bio_data, M_GEOM_UZIP);
445	g_destroy_bio(bp);
446	if (bp2->bio_error != 0 || bp2->bio_resid == 0)
447		g_io_deliver(bp2, bp2->bio_error);
448	else
449		g_uzip_request(gp, bp2);
450}
451
452static void
453g_uzip_start(struct bio *bp)
454{
455	struct g_provider *pp;
456	struct g_geom *gp;
457	struct g_uzip_softc *sc;
458
459	pp = bp->bio_to;
460	gp = pp->geom;
461
462	DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: cmd=%d, offset=%jd, length=%jd, "
463	    "buffer=%p\n", __func__, gp->name, bp, bp->bio_cmd,
464	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length, bp->bio_data));
465
466	sc = gp->softc;
467	sc->req_total++;
468
469	if (bp->bio_cmd == BIO_GETATTR) {
470		struct bio *bp2;
471		struct g_consumer *cp;
472		struct g_geom *gp;
473		struct g_provider *pp;
474
475		/* pass on MNT:* requests and ignore others */
476		if (strncmp(bp->bio_attribute, "MNT:", 4) == 0) {
477			bp2 = g_clone_bio(bp);
478			if (bp2 == NULL) {
479				g_io_deliver(bp, ENOMEM);
480				return;
481			}
482			bp2->bio_done = g_std_done;
483			pp = bp->bio_to;
484			gp = pp->geom;
485			cp = LIST_FIRST(&gp->consumer);
486			g_io_request(bp2, cp);
487			return;
488		}
489	}
490	if (bp->bio_cmd != BIO_READ) {
491		g_io_deliver(bp, EOPNOTSUPP);
492		return;
493	}
494
495	bp->bio_resid = bp->bio_length;
496	bp->bio_completed = 0;
497
498	g_uzip_request(gp, bp);
499}
500
501static void
502g_uzip_orphan(struct g_consumer *cp)
503{
504	struct g_geom *gp;
505
506	g_topology_assert();
507	G_VALID_CONSUMER(cp);
508	gp = cp->geom;
509	g_trace(G_T_TOPOLOGY, "%s(%p/%s)", __func__, cp, gp->name);
510	g_wither_geom(gp, ENXIO);
511
512	/*
513	 * We can safely free the softc now if there are no accesses,
514	 * otherwise g_uzip_access() will do that after the last close.
515	 */
516	if ((cp->acr + cp->acw + cp->ace) == 0)
517		g_uzip_softc_free(gp);
518}
519
520static void
521g_uzip_spoiled(struct g_consumer *cp)
522{
523
524	g_trace(G_T_TOPOLOGY, "%s(%p/%s)", __func__, cp, cp->geom->name);
525	cp->flags |= G_CF_ORPHAN;
526	g_uzip_orphan(cp);
527}
528
529static int
530g_uzip_access(struct g_provider *pp, int dr, int dw, int de)
531{
532	struct g_geom *gp;
533	struct g_consumer *cp;
534	int error;
535
536	gp = pp->geom;
537	cp = LIST_FIRST(&gp->consumer);
538	KASSERT (cp != NULL, ("g_uzip_access but no consumer"));
539
540	if (cp->acw + dw > 0)
541		return (EROFS);
542
543	error = g_access(cp, dr, dw, de);
544
545	/*
546	 * Free the softc if all providers have been closed and this geom
547	 * is being removed.
548	 */
549	if (error == 0 && (gp->flags & G_GEOM_WITHER) != 0 &&
550	    (cp->acr + cp->acw + cp->ace) == 0)
551		g_uzip_softc_free(gp);
552
553	return (error);
554}
555
556static int
557g_uzip_parse_toc(struct g_uzip_softc *sc, struct g_provider *pp,
558    struct g_geom *gp)
559{
560	uint32_t i, j, backref_to;
561	uint64_t max_offset, min_offset;
562	struct g_uzip_blk *last_blk;
563
564	min_offset = sizeof(struct cloop_header) +
565	    (sc->nblocks + 1) * sizeof(uint64_t);
566	max_offset = sc->toc[0].offset - 1;
567	last_blk = &sc->toc[0];
568	for (i = 0; i < sc->nblocks; i++) {
569		/* First do some bounds checking */
570		if ((sc->toc[i].offset < min_offset) ||
571		    (sc->toc[i].offset > pp->mediasize)) {
572			goto error_offset;
573		}
574		DPRINTF_BLK(GUZ_DBG_IO, i, ("%s: cluster #%u "
575		    "offset=%ju max_offset=%ju\n", gp->name,
576		    (u_int)i, (uintmax_t)sc->toc[i].offset,
577		    (uintmax_t)max_offset));
578		backref_to = BLEN_UNDEF;
579		if (sc->toc[i].offset < max_offset) {
580			/*
581			 * For the backref'ed blocks search already parsed
582			 * TOC entries for the matching offset and copy the
583			 * size from matched entry.
584			 */
585			for (j = 0; j <= i; j++) {
586                                if (sc->toc[j].offset == sc->toc[i].offset &&
587				    !BLK_IS_NIL(sc, j)) {
588                                        break;
589                                }
590                                if (j != i) {
591					continue;
592				}
593				DPRINTF(GUZ_DBG_ERR, ("%s: cannot match "
594				    "backref'ed offset at cluster #%u\n",
595				    gp->name, i));
596				return (-1);
597			}
598			sc->toc[i].blen = sc->toc[j].blen;
599			backref_to = j;
600		} else {
601			last_blk = &sc->toc[i];
602			/*
603			 * For the "normal blocks" seek forward until we hit
604			 * block whose offset is larger than ours and assume
605			 * it's going to be the next one.
606			 */
607			for (j = i + 1; j < sc->nblocks; j++) {
608				if (sc->toc[j].offset > max_offset) {
609					break;
610				}
611			}
612			sc->toc[i].blen = sc->toc[j].offset -
613			    sc->toc[i].offset;
614			if (BLK_ENDS(sc, i) > pp->mediasize) {
615				DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u "
616				    "extends past media boundary (%ju > %ju)\n",
617				    gp->name, (u_int)i,
618				    (uintmax_t)BLK_ENDS(sc, i),
619				    (intmax_t)pp->mediasize));
620				return (-1);
621			}
622			KASSERT(max_offset <= sc->toc[i].offset, (
623			    "%s: max_offset is incorrect: %ju",
624			    gp->name, (uintmax_t)max_offset));
625			max_offset = BLK_ENDS(sc, i) - 1;
626		}
627		DPRINTF_BLK(GUZ_DBG_TOC, i, ("%s: cluster #%u, original %u "
628		    "bytes, in %u bytes", gp->name, i, sc->blksz,
629		    sc->toc[i].blen));
630		if (backref_to != BLEN_UNDEF) {
631			DPRINTF_BLK(GUZ_DBG_TOC, i, (" (->#%u)",
632			    (u_int)backref_to));
633		}
634		DPRINTF_BLK(GUZ_DBG_TOC, i, ("\n"));
635	}
636	last_blk->last = 1;
637	/* Do a second pass to validate block lengths */
638	for (i = 0; i < sc->nblocks; i++) {
639		if (sc->toc[i].blen > sc->dcp->max_blen) {
640			if (sc->toc[i].last == 0) {
641				DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u "
642				    "length (%ju) exceeds "
643				    "max_blen (%ju)\n", gp->name, i,
644				    (uintmax_t)sc->toc[i].blen,
645				    (uintmax_t)sc->dcp->max_blen));
646				return (-1);
647			}
648			DPRINTF(GUZ_DBG_INFO, ("%s: cluster #%u extra "
649			    "padding is detected, trimmed to %ju\n",
650			    gp->name, i, (uintmax_t)sc->dcp->max_blen));
651			    sc->toc[i].blen = sc->dcp->max_blen;
652			sc->toc[i].padded = 1;
653		}
654	}
655	return (0);
656
657error_offset:
658	DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u: invalid offset %ju, "
659	    "min_offset=%ju mediasize=%jd\n", gp->name, (u_int)i,
660	    sc->toc[i].offset, min_offset, pp->mediasize));
661	return (-1);
662}
663
664static struct g_geom *
665g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
666{
667	int error;
668	uint32_t i, total_offsets, offsets_read, blk;
669	void *buf;
670	struct cloop_header *header;
671	struct g_consumer *cp;
672	struct g_geom *gp;
673	struct g_provider *pp2;
674	struct g_uzip_softc *sc;
675	enum {
676		G_UZIP = 1,
677		G_ULZMA
678	} type;
679
680	g_trace(G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name);
681	g_topology_assert();
682
683	/* Skip providers that are already open for writing. */
684	if (pp->acw > 0)
685		return (NULL);
686
687	if ((fnmatch(g_uzip_attach_to, pp->name, 0) != 0) ||
688	    (fnmatch(g_uzip_noattach_to, pp->name, 0) == 0)) {
689		DPRINTF(GUZ_DBG_INFO, ("%s(%s,%s), ignoring\n", __func__,
690		    mp->name, pp->name));
691		return (NULL);
692	}
693
694	buf = NULL;
695
696	/*
697	 * Create geom instance.
698	 */
699	gp = g_new_geomf(mp, GUZ_DEV_NAME("%s"), pp->name);
700	cp = g_new_consumer(gp);
701	error = g_attach(cp, pp);
702	if (error == 0)
703		error = g_access(cp, 1, 0, 0);
704	if (error) {
705		goto e1;
706	}
707	g_topology_unlock();
708
709	/*
710	 * Read cloop header, look for CLOOP magic, perform
711	 * other validity checks.
712	 */
713	DPRINTF(GUZ_DBG_INFO, ("%s: media sectorsize %u, mediasize %jd\n",
714	    gp->name, pp->sectorsize, (intmax_t)pp->mediasize));
715	buf = g_read_data(cp, 0, pp->sectorsize, NULL);
716	if (buf == NULL)
717		goto e2;
718	header = (struct cloop_header *) buf;
719	if (strncmp(header->magic, CLOOP_MAGIC_START,
720	    sizeof(CLOOP_MAGIC_START) - 1) != 0) {
721		DPRINTF(GUZ_DBG_ERR, ("%s: no CLOOP magic\n", gp->name));
722		goto e3;
723	}
724
725	switch (header->magic[CLOOP_OFS_COMPR]) {
726	case CLOOP_COMP_LZMA:
727	case CLOOP_COMP_LZMA_DDP:
728		type = G_ULZMA;
729		if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_LZMA) {
730			DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n",
731			    gp->name));
732			goto e3;
733		}
734		DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_LZMA image found\n",
735		    gp->name));
736		break;
737	case CLOOP_COMP_LIBZ:
738	case CLOOP_COMP_LIBZ_DDP:
739		type = G_UZIP;
740		if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_ZLIB) {
741			DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n",
742			    gp->name));
743			goto e3;
744		}
745		DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZLIB image found\n",
746		    gp->name));
747		break;
748	default:
749		DPRINTF(GUZ_DBG_ERR, ("%s: unsupported image type\n",
750		    gp->name));
751                goto e3;
752        }
753
754	/*
755	 * Initialize softc and read offsets.
756	 */
757	sc = malloc(sizeof(*sc), M_GEOM_UZIP, M_WAITOK | M_ZERO);
758	gp->softc = sc;
759	sc->blksz = ntohl(header->blksz);
760	sc->nblocks = ntohl(header->nblocks);
761	if (sc->blksz % 512 != 0) {
762		printf("%s: block size (%u) should be multiple of 512.\n",
763		    gp->name, sc->blksz);
764		goto e4;
765	}
766	if (sc->blksz > MAX_BLKSZ) {
767		printf("%s: block size (%u) should not be larger than %d.\n",
768		    gp->name, sc->blksz, MAX_BLKSZ);
769	}
770	total_offsets = sc->nblocks + 1;
771	if (sizeof(struct cloop_header) +
772	    total_offsets * sizeof(uint64_t) > pp->mediasize) {
773		printf("%s: media too small for %u blocks\n",
774		    gp->name, sc->nblocks);
775		goto e4;
776	}
777	sc->toc = malloc(total_offsets * sizeof(struct g_uzip_blk),
778	    M_GEOM_UZIP, M_WAITOK | M_ZERO);
779	offsets_read = MIN(total_offsets,
780	    (pp->sectorsize - sizeof(*header)) / sizeof(uint64_t));
781	for (i = 0; i < offsets_read; i++) {
782		sc->toc[i].offset = be64toh(((uint64_t *) (header + 1))[i]);
783		sc->toc[i].blen = BLEN_UNDEF;
784	}
785	DPRINTF(GUZ_DBG_INFO, ("%s: %u offsets in the first sector\n",
786	       gp->name, offsets_read));
787	for (blk = 1; offsets_read < total_offsets; blk++) {
788		uint32_t nread;
789
790		free(buf, M_GEOM);
791		buf = g_read_data(
792		    cp, blk * pp->sectorsize, pp->sectorsize, NULL);
793		if (buf == NULL)
794			goto e5;
795		nread = MIN(total_offsets - offsets_read,
796		     pp->sectorsize / sizeof(uint64_t));
797		DPRINTF(GUZ_DBG_TOC, ("%s: %u offsets read from sector %d\n",
798		    gp->name, nread, blk));
799		for (i = 0; i < nread; i++) {
800			sc->toc[offsets_read + i].offset =
801			    be64toh(((uint64_t *) buf)[i]);
802			sc->toc[offsets_read + i].blen = BLEN_UNDEF;
803		}
804		offsets_read += nread;
805	}
806	free(buf, M_GEOM);
807	buf = NULL;
808	offsets_read -= 1;
809	DPRINTF(GUZ_DBG_INFO, ("%s: done reading %u block offsets from %u "
810	    "sectors\n", gp->name, offsets_read, blk));
811	if (sc->nblocks != offsets_read) {
812		DPRINTF(GUZ_DBG_ERR, ("%s: read %s offsets than expected "
813		    "blocks\n", gp->name,
814		    sc->nblocks < offsets_read ? "more" : "less"));
815		goto e5;
816	}
817
818	if (type == G_UZIP) {
819		sc->dcp = g_uzip_zlib_ctor(sc->blksz);
820	} else {
821		sc->dcp = g_uzip_lzma_ctor(sc->blksz);
822	}
823	if (sc->dcp == NULL) {
824		goto e5;
825	}
826
827	/*
828	 * "Fake" last+1 block, to make it easier for the TOC parser to
829	 * iterate without making the last element a special case.
830	 */
831	sc->toc[sc->nblocks].offset = pp->mediasize;
832	/* Massage TOC (table of contents), make sure it is sound */
833	if (g_uzip_parse_toc(sc, pp, gp) != 0) {
834		DPRINTF(GUZ_DBG_ERR, ("%s: TOC error\n", gp->name));
835		goto e6;
836	}
837	mtx_init(&sc->last_mtx, "geom_uzip cache", NULL, MTX_DEF);
838	mtx_init(&sc->queue_mtx, "geom_uzip wrkthread", NULL, MTX_DEF);
839	bioq_init(&sc->bio_queue);
840	sc->last_blk = -1;
841	sc->last_buf = malloc(sc->blksz, M_GEOM_UZIP, M_WAITOK);
842	sc->req_total = 0;
843	sc->req_cached = 0;
844
845	sc->uzip_do = &g_uzip_do;
846
847	error = kproc_create(g_uzip_wrkthr, sc, &sc->procp, 0, 0, "%s",
848	    gp->name);
849	if (error != 0) {
850		goto e7;
851	}
852
853	g_topology_lock();
854	pp2 = g_new_providerf(gp, "%s", gp->name);
855	pp2->sectorsize = 512;
856	pp2->mediasize = (off_t)sc->nblocks * sc->blksz;
857	pp2->stripesize = pp->stripesize;
858	pp2->stripeoffset = pp->stripeoffset;
859	g_error_provider(pp2, 0);
860	g_access(cp, -1, 0, 0);
861
862	DPRINTF(GUZ_DBG_INFO, ("%s: taste ok (%d, %jd), (%d, %d), %x\n",
863	    gp->name, pp2->sectorsize, (intmax_t)pp2->mediasize,
864	    pp2->stripeoffset, pp2->stripesize, pp2->flags));
865	DPRINTF(GUZ_DBG_INFO, ("%s: %u x %u blocks\n", gp->name, sc->nblocks,
866	    sc->blksz));
867	return (gp);
868
869e7:
870	free(sc->last_buf, M_GEOM);
871	mtx_destroy(&sc->queue_mtx);
872	mtx_destroy(&sc->last_mtx);
873e6:
874	sc->dcp->free(sc->dcp);
875e5:
876	free(sc->toc, M_GEOM);
877e4:
878	free(gp->softc, M_GEOM_UZIP);
879e3:
880	if (buf != NULL) {
881		free(buf, M_GEOM);
882	}
883e2:
884	g_topology_lock();
885	g_access(cp, -1, 0, 0);
886e1:
887	g_detach(cp);
888	g_destroy_consumer(cp);
889	g_destroy_geom(gp);
890
891	return (NULL);
892}
893
894static int
895g_uzip_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
896{
897	struct g_provider *pp;
898
899	KASSERT(gp != NULL, ("NULL geom"));
900	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, gp->name);
901	g_topology_assert();
902
903	if (gp->softc == NULL) {
904		DPRINTF(GUZ_DBG_ERR, ("%s(%s): gp->softc == NULL\n", __func__,
905		    gp->name));
906		return (ENXIO);
907	}
908
909	pp = LIST_FIRST(&gp->provider);
910	KASSERT(pp != NULL, ("NULL provider"));
911	if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
912		return (EBUSY);
913
914	g_wither_geom(gp, ENXIO);
915	g_uzip_softc_free(gp);
916	return (0);
917}
918
919static struct g_class g_uzip_class = {
920	.name = UZIP_CLASS_NAME,
921	.version = G_VERSION,
922	.taste = g_uzip_taste,
923	.destroy_geom = g_uzip_destroy_geom,
924
925	.start = g_uzip_start,
926	.orphan = g_uzip_orphan,
927	.access = g_uzip_access,
928	.spoiled = g_uzip_spoiled,
929};
930
931DECLARE_GEOM_CLASS(g_uzip_class, g_uzip);
932MODULE_DEPEND(g_uzip, xz, 1, 1, 1);
933MODULE_DEPEND(g_uzip, zlib, 1, 1, 1);
934MODULE_VERSION(geom_uzip, 0);
935