1163837Spjd/*- 2163837Spjd * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3163837Spjd * All rights reserved. 4163837Spjd * 5163837Spjd * Redistribution and use in source and binary forms, with or without 6163837Spjd * modification, are permitted provided that the following conditions 7163837Spjd * are met: 8163837Spjd * 1. Redistributions of source code must retain the above copyright 9163837Spjd * notice, this list of conditions and the following disclaimer. 10163837Spjd * 2. Redistributions in binary form must reproduce the above copyright 11163837Spjd * notice, this list of conditions and the following disclaimer in the 12163837Spjd * documentation and/or other materials provided with the distribution. 13163837Spjd * 14163837Spjd * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15163837Spjd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16163837Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17163837Spjd * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18163837Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19163837Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20163837Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21163837Spjd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22163837Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23163837Spjd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24163837Spjd * SUCH DAMAGE. 25163837Spjd * 26163837Spjd * $FreeBSD$ 27163837Spjd */ 28163837Spjd 29163837Spjd#ifndef _G_JOURNAL_H_ 30163837Spjd#define _G_JOURNAL_H_ 31163837Spjd 32163837Spjd#include <sys/endian.h> 33163837Spjd#include <sys/md5.h> 34163837Spjd#ifdef _KERNEL 35163837Spjd#include <sys/bio.h> 36163837Spjd#endif 37163837Spjd 38163837Spjd#define G_JOURNAL_CLASS_NAME "JOURNAL" 39163837Spjd 40163837Spjd#define G_JOURNAL_MAGIC "GEOM::JOURNAL" 41163837Spjd/* 42163837Spjd * Version history: 43163837Spjd * 0 - Initial version number. 44163837Spjd */ 45163837Spjd#define G_JOURNAL_VERSION 0 46163837Spjd 47163837Spjd#ifdef _KERNEL 48163837Spjdextern int g_journal_debug; 49163837Spjd 50163837Spjd#define GJ_DEBUG(lvl, ...) do { \ 51163837Spjd if (g_journal_debug >= (lvl)) { \ 52163837Spjd printf("GEOM_JOURNAL"); \ 53163837Spjd if (g_journal_debug > 0) \ 54163837Spjd printf("[%u]", lvl); \ 55163837Spjd printf(": "); \ 56163837Spjd printf(__VA_ARGS__); \ 57163837Spjd printf("\n"); \ 58163837Spjd } \ 59163837Spjd} while (0) 60163837Spjd#define GJ_LOGREQ(lvl, bp, ...) do { \ 61163837Spjd if (g_journal_debug >= (lvl)) { \ 62163837Spjd printf("GEOM_JOURNAL"); \ 63163837Spjd if (g_journal_debug > 0) \ 64163837Spjd printf("[%u]", lvl); \ 65163837Spjd printf(": "); \ 66163837Spjd printf(__VA_ARGS__); \ 67163837Spjd printf(" "); \ 68163837Spjd g_print_bio(bp); \ 69163837Spjd printf("\n"); \ 70163837Spjd } \ 71163837Spjd} while (0) 72163837Spjd 73163837Spjd#define JEMPTY(sc) ((sc)->sc_journal_offset - \ 74163837Spjd (sc)->sc_jprovider->sectorsize == \ 75163837Spjd (sc)->sc_active.jj_offset && \ 76163837Spjd (sc)->sc_current_count == 0) 77163837Spjd 78163837Spjd#define GJ_BIO_REGULAR 0x00 79163837Spjd#define GJ_BIO_READ 0x01 80163837Spjd#define GJ_BIO_JOURNAL 0x02 81163837Spjd#define GJ_BIO_COPY 0x03 82163837Spjd#define GJ_BIO_MASK 0x0f 83163837Spjd 84163837Spjd#if 0 85163837Spjd#define GJF_BIO_DONT_FREE 0x10 86163837Spjd#define GJF_BIO_MASK 0xf0 87163837Spjd#endif 88163837Spjd 89163837Spjd#define GJF_DEVICE_HARDCODED 0x0001 90163837Spjd#define GJF_DEVICE_DESTROY 0x0010 91163837Spjd#define GJF_DEVICE_SWITCH 0x0020 92163837Spjd#define GJF_DEVICE_BEFORE_SWITCH 0x0040 93163837Spjd#define GJF_DEVICE_CLEAN 0x0080 94163837Spjd#define GJF_DEVICE_CHECKSUM 0x0100 95163837Spjd 96163837Spjd#define GJ_HARD_LIMIT 64 97163837Spjd 98163837Spjd/* 99163837Spjd * We keep pointers to journaled data in bio structure and because we 100163837Spjd * need to store two off_t values (offset in data provider and offset in 101163837Spjd * journal), we have to borrow bio_completed field for this. 102163837Spjd */ 103163837Spjd#define bio_joffset bio_completed 104163837Spjd/* 105163837Spjd * Use bio_caller1 field as a pointer in queue. 106163837Spjd */ 107163837Spjd#define bio_next bio_caller1 108163837Spjd 109163837Spjd/* 110163837Spjd * There are two such structures maintained inside each journaled device. 111163837Spjd * One describes active part of the journal, were recent requests are stored. 112163837Spjd * The second describes the last consistent part of the journal with requests 113163837Spjd * that are copied to the destination provider. 114163837Spjd */ 115163837Spjdstruct g_journal_journal { 116163837Spjd struct bio *jj_queue; /* Cached journal entries. */ 117163837Spjd off_t jj_offset; /* Journal's start offset. */ 118163837Spjd}; 119163837Spjd 120163837Spjdstruct g_journal_softc { 121163837Spjd uint32_t sc_id; 122163837Spjd uint8_t sc_type; 123163837Spjd uint8_t sc_orig_type; 124163837Spjd struct g_geom *sc_geom; 125163837Spjd u_int sc_flags; 126163837Spjd struct mtx sc_mtx; 127163837Spjd off_t sc_mediasize; 128163837Spjd u_int sc_sectorsize; 129163837Spjd#define GJ_FLUSH_DATA 0x01 130163837Spjd#define GJ_FLUSH_JOURNAL 0x02 131163837Spjd u_int sc_bio_flush; 132163837Spjd 133163837Spjd uint32_t sc_journal_id; 134163837Spjd uint32_t sc_journal_next_id; 135163837Spjd int sc_journal_copying; 136163837Spjd off_t sc_journal_offset; 137163837Spjd off_t sc_journal_previous_id; 138163837Spjd 139163837Spjd struct bio_queue_head sc_back_queue; 140163837Spjd struct bio_queue_head sc_regular_queue; 141163837Spjd 142163837Spjd struct bio_queue_head sc_delayed_queue; 143163837Spjd int sc_delayed_count; 144163837Spjd 145163837Spjd struct bio *sc_current_queue; 146163837Spjd int sc_current_count; 147163837Spjd 148163837Spjd struct bio *sc_flush_queue; 149163837Spjd int sc_flush_count; 150163837Spjd int sc_flush_in_progress; 151163837Spjd 152163837Spjd struct bio *sc_copy_queue; 153163837Spjd int sc_copy_in_progress; 154163837Spjd 155163837Spjd struct g_consumer *sc_dconsumer; 156163837Spjd struct g_consumer *sc_jconsumer; 157163837Spjd 158163837Spjd struct g_journal_journal sc_inactive; 159163837Spjd struct g_journal_journal sc_active; 160163837Spjd 161163837Spjd off_t sc_jstart; /* Journal space start offset. */ 162163837Spjd off_t sc_jend; /* Journal space end offset. */ 163163837Spjd 164163837Spjd struct callout sc_callout; 165163837Spjd struct proc *sc_worker; 166185693Strasz 167185693Strasz struct root_hold_token *sc_rootmount; 168163837Spjd}; 169163837Spjd#define sc_dprovider sc_dconsumer->provider 170163837Spjd#define sc_jprovider sc_jconsumer->provider 171163837Spjd#define sc_name sc_dprovider->name 172163837Spjd 173163837Spjd#define GJQ_INSERT_HEAD(head, bp) do { \ 174163837Spjd (bp)->bio_next = (head); \ 175163837Spjd (head) = (bp); \ 176163837Spjd} while (0) 177163837Spjd#define GJQ_INSERT_AFTER(head, bp, pbp) do { \ 178163837Spjd if ((pbp) == NULL) \ 179163837Spjd GJQ_INSERT_HEAD(head, bp); \ 180163837Spjd else { \ 181163837Spjd (bp)->bio_next = (pbp)->bio_next; \ 182163837Spjd (pbp)->bio_next = (bp); \ 183163837Spjd } \ 184163837Spjd} while (0) 185163837Spjd#define GJQ_FIRST(head) (head) 186163837Spjd#define GJQ_REMOVE(head, bp) do { \ 187163837Spjd struct bio *_bp; \ 188163837Spjd \ 189163837Spjd if ((head) == (bp)) { \ 190163837Spjd (head) = (bp)->bio_next; \ 191163837Spjd (bp)->bio_next = NULL; \ 192163837Spjd break; \ 193163837Spjd } \ 194163837Spjd for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next) {\ 195163837Spjd if (_bp->bio_next == (bp)) \ 196163837Spjd break; \ 197163837Spjd } \ 198163837Spjd KASSERT(_bp->bio_next != NULL, ("NULL bio_next")); \ 199163837Spjd KASSERT(_bp->bio_next == (bp), ("bio_next != bp")); \ 200163837Spjd _bp->bio_next = (bp)->bio_next; \ 201163837Spjd (bp)->bio_next = NULL; \ 202163837Spjd} while (0) 203163837Spjd#define GJQ_FOREACH(head, bp) \ 204163837Spjd for ((bp) = (head); (bp) != NULL; (bp) = (bp)->bio_next) 205163837Spjd 206163837Spjd#define GJ_HEADER_MAGIC "GJHDR" 207163837Spjd 208163837Spjdstruct g_journal_header { 209163837Spjd char jh_magic[sizeof(GJ_HEADER_MAGIC)]; 210163837Spjd uint32_t jh_journal_id; 211163837Spjd uint32_t jh_journal_next_id; 212163837Spjd} __packed; 213163837Spjd 214163837Spjdstruct g_journal_entry { 215163837Spjd uint64_t je_joffset; 216163837Spjd uint64_t je_offset; 217163837Spjd uint64_t je_length; 218163837Spjd} __packed; 219163837Spjd 220163837Spjd#define GJ_RECORD_HEADER_MAGIC "GJRHDR" 221163837Spjd#define GJ_RECORD_HEADER_NENTRIES (20) 222163837Spjd#define GJ_RECORD_MAX_SIZE(sc) \ 223163837Spjd ((sc)->sc_jprovider->sectorsize + GJ_RECORD_HEADER_NENTRIES * MAXPHYS) 224163837Spjd#define GJ_VALIDATE_OFFSET(offset, sc) do { \ 225163837Spjd if ((offset) + GJ_RECORD_MAX_SIZE(sc) >= (sc)->sc_jend) { \ 226163837Spjd (offset) = (sc)->sc_jstart; \ 227163837Spjd GJ_DEBUG(2, "Starting from the begining (%s).", \ 228163837Spjd (sc)->sc_name); \ 229163837Spjd } \ 230163837Spjd} while (0) 231163837Spjd 232163837Spjdstruct g_journal_record_header { 233163837Spjd char jrh_magic[sizeof(GJ_RECORD_HEADER_MAGIC)]; 234163837Spjd uint32_t jrh_journal_id; 235163837Spjd uint16_t jrh_nentries; 236163837Spjd u_char jrh_sum[8]; 237163837Spjd struct g_journal_entry jrh_entries[GJ_RECORD_HEADER_NENTRIES]; 238163837Spjd} __packed; 239163837Spjd 240163837Spjdtypedef int (g_journal_clean_t)(struct mount *mp); 241163837Spjdtypedef void (g_journal_dirty_t)(struct g_consumer *cp); 242163837Spjd 243163837Spjdstruct g_journal_desc { 244163837Spjd const char *jd_fstype; 245163837Spjd g_journal_clean_t *jd_clean; 246163837Spjd g_journal_dirty_t *jd_dirty; 247163837Spjd}; 248163837Spjd 249163837Spjd/* Supported file systems. */ 250163837Spjdextern const struct g_journal_desc g_journal_ufs; 251163837Spjd 252163837Spjd#define GJ_TIMER_START(lvl, bt) do { \ 253163837Spjd if (g_journal_debug >= (lvl)) \ 254163837Spjd binuptime(bt); \ 255163837Spjd} while (0) 256163837Spjd#define GJ_TIMER_STOP(lvl, bt, ...) do { \ 257163837Spjd if (g_journal_debug >= (lvl)) { \ 258163837Spjd struct bintime _bt2; \ 259163837Spjd struct timeval _tv; \ 260163837Spjd \ 261163837Spjd binuptime(&_bt2); \ 262163837Spjd bintime_sub(&_bt2, bt); \ 263163837Spjd bintime2timeval(&_bt2, &_tv); \ 264163837Spjd printf("GEOM_JOURNAL"); \ 265163837Spjd if (g_journal_debug > 0) \ 266163837Spjd printf("[%u]", lvl); \ 267163837Spjd printf(": "); \ 268163837Spjd printf(__VA_ARGS__); \ 269163837Spjd printf(": %jd.%06jds\n", (intmax_t)_tv.tv_sec, \ 270163837Spjd (intmax_t)_tv.tv_usec); \ 271163837Spjd } \ 272163837Spjd} while (0) 273163837Spjd#endif /* _KERNEL */ 274163837Spjd 275163837Spjd#define GJ_TYPE_DATA 0x01 276163837Spjd#define GJ_TYPE_JOURNAL 0x02 277163837Spjd#define GJ_TYPE_COMPLETE (GJ_TYPE_DATA|GJ_TYPE_JOURNAL) 278163837Spjd 279163837Spjd#define GJ_FLAG_CLEAN 0x01 280163837Spjd#define GJ_FLAG_CHECKSUM 0x02 281163837Spjd 282163837Spjdstruct g_journal_metadata { 283163837Spjd char md_magic[16]; /* Magic value. */ 284163837Spjd uint32_t md_version; /* Version number. */ 285163837Spjd uint32_t md_id; /* Journal unique ID. */ 286163837Spjd uint8_t md_type; /* Provider type. */ 287163837Spjd uint64_t md_jstart; /* Journal space start offset. */ 288163837Spjd uint64_t md_jend; /* Journal space end offset. */ 289163837Spjd uint64_t md_joffset; /* Last known consistent journal offset. */ 290163837Spjd uint32_t md_jid; /* Last known consistent journal ID. */ 291163837Spjd uint64_t md_flags; /* Journal flags. */ 292163837Spjd char md_provider[16]; /* Hardcoded provider. */ 293163837Spjd uint64_t md_provsize; /* Provider's size. */ 294163837Spjd u_char md_hash[16]; /* MD5 hash. */ 295163837Spjd}; 296163837Spjdstatic __inline void 297163837Spjdjournal_metadata_encode(struct g_journal_metadata *md, u_char *data) 298163837Spjd{ 299163837Spjd MD5_CTX ctx; 300163837Spjd 301163837Spjd bcopy(md->md_magic, data, 16); 302163837Spjd le32enc(data + 16, md->md_version); 303163837Spjd le32enc(data + 20, md->md_id); 304163837Spjd *(data + 24) = md->md_type; 305163837Spjd le64enc(data + 25, md->md_jstart); 306163837Spjd le64enc(data + 33, md->md_jend); 307163837Spjd le64enc(data + 41, md->md_joffset); 308163837Spjd le32enc(data + 49, md->md_jid); 309163837Spjd le64enc(data + 53, md->md_flags); 310163837Spjd bcopy(md->md_provider, data + 61, 16); 311163837Spjd le64enc(data + 77, md->md_provsize); 312163837Spjd MD5Init(&ctx); 313163837Spjd MD5Update(&ctx, data, 85); 314163837Spjd MD5Final(md->md_hash, &ctx); 315163837Spjd bcopy(md->md_hash, data + 85, 16); 316163837Spjd} 317163837Spjdstatic __inline int 318163837Spjdjournal_metadata_decode_v0(const u_char *data, struct g_journal_metadata *md) 319163837Spjd{ 320163837Spjd MD5_CTX ctx; 321163837Spjd 322163837Spjd md->md_id = le32dec(data + 20); 323163837Spjd md->md_type = *(data + 24); 324163837Spjd md->md_jstart = le64dec(data + 25); 325163837Spjd md->md_jend = le64dec(data + 33); 326163837Spjd md->md_joffset = le64dec(data + 41); 327163837Spjd md->md_jid = le32dec(data + 49); 328163837Spjd md->md_flags = le64dec(data + 53); 329163837Spjd bcopy(data + 61, md->md_provider, 16); 330163837Spjd md->md_provsize = le64dec(data + 77); 331163837Spjd MD5Init(&ctx); 332163837Spjd MD5Update(&ctx, data, 85); 333163837Spjd MD5Final(md->md_hash, &ctx); 334163837Spjd if (bcmp(md->md_hash, data + 85, 16) != 0) 335163837Spjd return (EINVAL); 336163837Spjd return (0); 337163837Spjd} 338163837Spjdstatic __inline int 339163837Spjdjournal_metadata_decode(const u_char *data, struct g_journal_metadata *md) 340163837Spjd{ 341163837Spjd int error; 342163837Spjd 343163837Spjd bcopy(data, md->md_magic, 16); 344163837Spjd md->md_version = le32dec(data + 16); 345163837Spjd switch (md->md_version) { 346163837Spjd case 0: 347163837Spjd error = journal_metadata_decode_v0(data, md); 348163837Spjd break; 349163837Spjd default: 350163837Spjd error = EINVAL; 351163837Spjd break; 352163837Spjd } 353163837Spjd return (error); 354163837Spjd} 355163837Spjd 356163837Spjdstatic __inline void 357163837Spjdjournal_metadata_dump(const struct g_journal_metadata *md) 358163837Spjd{ 359163837Spjd static const char hex[] = "0123456789abcdef"; 360163837Spjd char hash[16 * 2 + 1]; 361163837Spjd u_int i; 362163837Spjd 363163837Spjd printf(" magic: %s\n", md->md_magic); 364163837Spjd printf(" version: %u\n", (u_int)md->md_version); 365163837Spjd printf(" id: %u\n", (u_int)md->md_id); 366163837Spjd printf(" type: %u\n", (u_int)md->md_type); 367163837Spjd printf(" start: %ju\n", (uintmax_t)md->md_jstart); 368163837Spjd printf(" end: %ju\n", (uintmax_t)md->md_jend); 369163837Spjd printf(" joffset: %ju\n", (uintmax_t)md->md_joffset); 370163837Spjd printf(" jid: %u\n", (u_int)md->md_jid); 371163837Spjd printf(" flags: %u\n", (u_int)md->md_flags); 372163837Spjd printf("hcprovider: %s\n", md->md_provider); 373163837Spjd printf(" provsize: %ju\n", (uintmax_t)md->md_provsize); 374163837Spjd bzero(hash, sizeof(hash)); 375163837Spjd for (i = 0; i < 16; i++) { 376163837Spjd hash[i * 2] = hex[md->md_hash[i] >> 4]; 377163837Spjd hash[i * 2 + 1] = hex[md->md_hash[i] & 0x0f]; 378163837Spjd } 379163837Spjd printf(" MD5 hash: %s\n", hash); 380163837Spjd} 381163837Spjd#endif /* !_G_JOURNAL_H_ */ 382