1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#ifndef	_G_JOURNAL_H_
30#define	_G_JOURNAL_H_
31
32#include <sys/endian.h>
33#include <sys/md5.h>
34#ifdef _KERNEL
35#include <sys/bio.h>
36#endif
37
38#define	G_JOURNAL_CLASS_NAME	"JOURNAL"
39
40#define	G_JOURNAL_MAGIC		"GEOM::JOURNAL"
41/*
42 * Version history:
43 * 0 - Initial version number.
44 */
45#define	G_JOURNAL_VERSION	0
46
47#ifdef _KERNEL
48extern int g_journal_debug;
49
50#define	GJ_DEBUG(lvl, ...) \
51    _GEOM_DEBUG("GEOM_JOURNAL", g_journal_debug, (lvl), NULL, __VA_ARGS__)
52#define	GJ_LOGREQ(lvl, bp, ...) \
53    _GEOM_DEBUG("GEOM_JOURNAL", g_journal_debug, (lvl), (bp), __VA_ARGS__)
54
55#define	JEMPTY(sc)	((sc)->sc_journal_offset -			\
56			 (sc)->sc_jprovider->sectorsize ==		\
57			 (sc)->sc_active.jj_offset &&			\
58			 (sc)->sc_current_count == 0)
59
60#define	GJ_BIO_REGULAR		0x00
61#define	GJ_BIO_READ		0x01
62#define	GJ_BIO_JOURNAL		0x02
63#define	GJ_BIO_COPY		0x03
64#define	GJ_BIO_MASK		0x0f
65
66#if 0
67#define	GJF_BIO_DONT_FREE	0x10
68#define	GJF_BIO_MASK		0xf0
69#endif
70
71#define	GJF_DEVICE_HARDCODED		0x0001
72#define	GJF_DEVICE_DESTROY		0x0010
73#define	GJF_DEVICE_SWITCH		0x0020
74#define	GJF_DEVICE_BEFORE_SWITCH	0x0040
75#define	GJF_DEVICE_CLEAN		0x0080
76#define	GJF_DEVICE_CHECKSUM		0x0100
77
78#define	GJ_HARD_LIMIT		64
79
80/*
81 * We keep pointers to journaled data in bio structure and because we
82 * need to store two off_t values (offset in data provider and offset in
83 * journal), we have to borrow bio_completed field for this.
84 */
85#define	bio_joffset	bio_completed
86/*
87 * Use bio_caller1 field as a pointer in queue.
88 */
89#define	bio_next	bio_caller1
90
91/*
92 * There are two such structures maintained inside each journaled device.
93 * One describes active part of the journal, were recent requests are stored.
94 * The second describes the last consistent part of the journal with requests
95 * that are copied to the destination provider.
96 */
97struct g_journal_journal {
98	struct bio	*jj_queue;	/* Cached journal entries. */
99	off_t		 jj_offset;	/* Journal's start offset. */
100};
101
102struct g_journal_softc {
103	uint32_t	 sc_id;
104	uint8_t		 sc_type;
105	uint8_t		 sc_orig_type;
106	struct g_geom	*sc_geom;
107	u_int		 sc_flags;
108	struct mtx	 sc_mtx;
109	off_t		 sc_mediasize;
110	u_int		 sc_sectorsize;
111#define	GJ_FLUSH_DATA		0x01
112#define	GJ_FLUSH_JOURNAL	0x02
113	u_int		 sc_bio_flush;
114
115	uint32_t	 sc_journal_id;
116	uint32_t	 sc_journal_next_id;
117	int		 sc_journal_copying;
118	off_t		 sc_journal_offset;
119	off_t		 sc_journal_previous_id;
120
121	struct bio_queue_head sc_back_queue;
122	struct bio_queue_head sc_regular_queue;
123
124	struct bio_queue_head sc_delayed_queue;
125	int		 sc_delayed_count;
126
127	struct bio	*sc_current_queue;
128	int		 sc_current_count;
129
130	struct bio	*sc_flush_queue;
131	int		 sc_flush_count;
132	int		 sc_flush_in_progress;
133
134	struct bio	*sc_copy_queue;
135	int		 sc_copy_in_progress;
136
137	struct g_consumer *sc_dconsumer;
138	struct g_consumer *sc_jconsumer;
139
140	struct g_journal_journal sc_inactive;
141	struct g_journal_journal sc_active;
142
143	off_t		 sc_jstart;	/* Journal space start offset. */
144	off_t		 sc_jend;	/* Journal space end offset. */
145
146	struct callout	 sc_callout;
147	struct proc	*sc_worker;
148
149	struct root_hold_token *sc_rootmount;
150};
151#define	sc_dprovider	sc_dconsumer->provider
152#define	sc_jprovider	sc_jconsumer->provider
153#define	sc_name		sc_dprovider->name
154
155#define	GJQ_INSERT_HEAD(head, bp)	do {				\
156	(bp)->bio_next = (head);					\
157	(head) = (bp);							\
158} while (0)
159#define	GJQ_INSERT_AFTER(head, bp, pbp)	do {				\
160	if ((pbp) == NULL)						\
161		GJQ_INSERT_HEAD(head, bp);				\
162	else {								\
163		(bp)->bio_next = (pbp)->bio_next;			\
164		(pbp)->bio_next = (bp);					\
165	}								\
166} while (0)
167#define GJQ_LAST(head, bp) do {						\
168	struct bio *_bp;						\
169									\
170	if ((head) == NULL) {						\
171		(bp) = (head);						\
172		break;							\
173	}								\
174	for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next)	\
175		continue;						\
176	(bp) = (_bp);							\
177} while (0)
178#define	GJQ_FIRST(head)	(head)
179#define	GJQ_REMOVE(head, bp)	do {					\
180	struct bio *_bp;						\
181									\
182	if ((head) == (bp)) {						\
183		(head) = (bp)->bio_next;				\
184		(bp)->bio_next = NULL;					\
185		break;							\
186	}								\
187	for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next) {\
188		if (_bp->bio_next == (bp))				\
189			break;						\
190	}								\
191	KASSERT(_bp->bio_next != NULL, ("NULL bio_next"));		\
192	KASSERT(_bp->bio_next == (bp), ("bio_next != bp"));		\
193	_bp->bio_next = (bp)->bio_next;					\
194	(bp)->bio_next = NULL;						\
195} while (0)
196#define GJQ_FOREACH(head, bp)						\
197	for ((bp) = (head); (bp) != NULL; (bp) = (bp)->bio_next)
198
199#define	GJ_HEADER_MAGIC	"GJHDR"
200
201struct g_journal_header {
202	char		jh_magic[sizeof(GJ_HEADER_MAGIC)];
203	uint32_t	jh_journal_id;
204	uint32_t	jh_journal_next_id;
205} __packed;
206
207struct g_journal_entry {
208	uint64_t	je_joffset;
209	uint64_t	je_offset;
210	uint64_t	je_length;
211} __packed;
212
213#define	GJ_RECORD_HEADER_MAGIC		"GJRHDR"
214#define	GJ_RECORD_HEADER_NENTRIES	(20)
215#define	GJ_RECORD_MAX_SIZE(sc)	\
216	((sc)->sc_jprovider->sectorsize + GJ_RECORD_HEADER_NENTRIES * maxphys)
217#define	GJ_VALIDATE_OFFSET(offset, sc)	do {				\
218	if ((offset) + GJ_RECORD_MAX_SIZE(sc) >= (sc)->sc_jend) {	\
219		(offset) = (sc)->sc_jstart;				\
220		GJ_DEBUG(2, "Starting from the beginning (%s).",		\
221		    (sc)->sc_name);					\
222	}								\
223} while (0)
224
225struct g_journal_record_header {
226	char		jrh_magic[sizeof(GJ_RECORD_HEADER_MAGIC)];
227	uint32_t	jrh_journal_id;
228	uint16_t	jrh_nentries;
229	u_char		jrh_sum[8];
230	struct g_journal_entry jrh_entries[GJ_RECORD_HEADER_NENTRIES];
231} __packed;
232
233typedef int (g_journal_clean_t)(struct mount *mp);
234typedef void (g_journal_dirty_t)(struct g_consumer *cp);
235
236struct g_journal_desc {
237	const char		*jd_fstype;
238	g_journal_clean_t	*jd_clean;
239	g_journal_dirty_t	*jd_dirty;
240};
241
242/* Supported file systems. */
243extern const struct g_journal_desc g_journal_ufs;
244
245#define	GJ_TIMER_START(lvl, bt)	do {					\
246	if (g_journal_debug >= (lvl))					\
247		binuptime(bt);						\
248} while (0)
249#define	GJ_TIMER_STOP(lvl, bt, ...)	do {				\
250	if (g_journal_debug >= (lvl)) {					\
251		struct bintime _bt2;					\
252		struct timeval _tv;					\
253									\
254		binuptime(&_bt2);					\
255		bintime_sub(&_bt2, bt);					\
256		bintime2timeval(&_bt2, &_tv);				\
257		printf("GEOM_JOURNAL");					\
258		if (g_journal_debug > 0)				\
259			printf("[%u]", lvl);				\
260		printf(": ");						\
261		printf(__VA_ARGS__);					\
262		printf(": %jd.%06jds\n", (intmax_t)_tv.tv_sec,		\
263		    (intmax_t)_tv.tv_usec);				\
264	}								\
265} while (0)
266#endif	/* _KERNEL */
267
268#define	GJ_TYPE_DATA		0x01
269#define	GJ_TYPE_JOURNAL		0x02
270#define	GJ_TYPE_COMPLETE	(GJ_TYPE_DATA|GJ_TYPE_JOURNAL)
271
272#define	GJ_FLAG_CLEAN		0x01
273#define	GJ_FLAG_CHECKSUM	0x02
274
275struct g_journal_metadata {
276	char		md_magic[16];	/* Magic value. */
277	uint32_t	md_version;	/* Version number. */
278	uint32_t	md_id;		/* Journal unique ID. */
279	uint8_t		md_type;	/* Provider type. */
280	uint64_t	md_jstart;	/* Journal space start offset. */
281	uint64_t	md_jend;	/* Journal space end offset. */
282	uint64_t	md_joffset;	/* Last known consistent journal offset. */
283	uint32_t	md_jid;		/* Last known consistent journal ID. */
284	uint64_t	md_flags;	/* Journal flags. */
285	char		md_provider[16]; /* Hardcoded provider. */
286	uint64_t	md_provsize;	/* Provider's size. */
287	u_char		md_hash[16];	/* MD5 hash. */
288};
289static __inline void
290journal_metadata_encode(struct g_journal_metadata *md, u_char *data)
291{
292	MD5_CTX ctx;
293
294	bcopy(md->md_magic, data, 16);
295	le32enc(data + 16, md->md_version);
296	le32enc(data + 20, md->md_id);
297	*(data + 24) = md->md_type;
298	le64enc(data + 25, md->md_jstart);
299	le64enc(data + 33, md->md_jend);
300	le64enc(data + 41, md->md_joffset);
301	le32enc(data + 49, md->md_jid);
302	le64enc(data + 53, md->md_flags);
303	bcopy(md->md_provider, data + 61, 16);
304	le64enc(data + 77, md->md_provsize);
305	MD5Init(&ctx);
306	MD5Update(&ctx, data, 85);
307	MD5Final(md->md_hash, &ctx);
308	bcopy(md->md_hash, data + 85, 16);
309}
310static __inline int
311journal_metadata_decode_v0(const u_char *data, struct g_journal_metadata *md)
312{
313	MD5_CTX ctx;
314
315	md->md_id = le32dec(data + 20);
316	md->md_type = *(data + 24);
317	md->md_jstart = le64dec(data + 25);
318	md->md_jend = le64dec(data + 33);
319	md->md_joffset = le64dec(data + 41);
320	md->md_jid = le32dec(data + 49);
321	md->md_flags = le64dec(data + 53);
322	bcopy(data + 61, md->md_provider, 16);
323	md->md_provsize = le64dec(data + 77);
324	MD5Init(&ctx);
325	MD5Update(&ctx, data, 85);
326	MD5Final(md->md_hash, &ctx);
327	if (bcmp(md->md_hash, data + 85, 16) != 0)
328		return (EINVAL);
329	return (0);
330}
331static __inline int
332journal_metadata_decode(const u_char *data, struct g_journal_metadata *md)
333{
334	int error;
335
336	bcopy(data, md->md_magic, 16);
337	md->md_version = le32dec(data + 16);
338	switch (md->md_version) {
339	case 0:
340		error = journal_metadata_decode_v0(data, md);
341		break;
342	default:
343		error = EINVAL;
344		break;
345	}
346	return (error);
347}
348
349static __inline void
350journal_metadata_dump(const struct g_journal_metadata *md)
351{
352	static const char hex[] = "0123456789abcdef";
353	char hash[16 * 2 + 1];
354	u_int i;
355
356	printf("     magic: %s\n", md->md_magic);
357	printf("   version: %u\n", (u_int)md->md_version);
358	printf("        id: %u\n", (u_int)md->md_id);
359	printf("      type: %u\n", (u_int)md->md_type);
360	printf("     start: %ju\n", (uintmax_t)md->md_jstart);
361	printf("       end: %ju\n", (uintmax_t)md->md_jend);
362	printf("   joffset: %ju\n", (uintmax_t)md->md_joffset);
363	printf("       jid: %u\n", (u_int)md->md_jid);
364	printf("     flags: %u\n", (u_int)md->md_flags);
365	printf("hcprovider: %s\n", md->md_provider);
366	printf("  provsize: %ju\n", (uintmax_t)md->md_provsize);
367	bzero(hash, sizeof(hash));
368	for (i = 0; i < 16; i++) {
369		hash[i * 2] = hex[md->md_hash[i] >> 4];
370		hash[i * 2 + 1] = hex[md->md_hash[i] & 0x0f];
371	}
372	printf("  MD5 hash: %s\n", hash);
373}
374#endif	/* !_G_JOURNAL_H_ */
375