journal.c revision 143731
1/* 2 * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") 3 * Copyright (C) 1999-2002 Internet Software Consortium. 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 * PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18/* $Id: journal.c,v 1.77.2.1.10.9 2004/09/16 04:57:02 marka Exp $ */ 19 20#include <config.h> 21 22#include <stdlib.h> 23 24#include <isc/file.h> 25#include <isc/mem.h> 26#include <isc/stdio.h> 27#include <isc/string.h> 28#include <isc/util.h> 29 30#include <dns/compress.h> 31#include <dns/db.h> 32#include <dns/dbiterator.h> 33#include <dns/diff.h> 34#include <dns/fixedname.h> 35#include <dns/journal.h> 36#include <dns/log.h> 37#include <dns/rdataset.h> 38#include <dns/rdatasetiter.h> 39#include <dns/result.h> 40#include <dns/soa.h> 41 42/* 43 * When true, accept IXFR difference sequences where the 44 * SOA serial number does not change (BIND 8 sends such 45 * sequences). 46 */ 47static isc_boolean_t bind8_compat = ISC_TRUE; /* XXX config */ 48 49/**************************************************************************/ 50/* 51 * Miscellaneous utilities. 52 */ 53 54#define JOURNAL_COMMON_LOGARGS \ 55 dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL 56 57#define JOURNAL_DEBUG_LOGARGS(n) \ 58 JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n) 59 60/* 61 * It would be non-sensical (or at least obtuse) to use FAIL() with an 62 * ISC_R_SUCCESS code, but the test is there to keep the Solaris compiler 63 * from complaining about "end-of-loop code not reached". 64 */ 65#define FAIL(code) \ 66 do { result = (code); \ 67 if (result != ISC_R_SUCCESS) goto failure; \ 68 } while (0) 69 70#define CHECK(op) \ 71 do { result = (op); \ 72 if (result != ISC_R_SUCCESS) goto failure; \ 73 } while (0) 74 75static isc_result_t index_to_disk(dns_journal_t *); 76 77static inline isc_uint32_t 78decode_uint32(unsigned char *p) { 79 return ((p[0] << 24) + 80 (p[1] << 16) + 81 (p[2] << 8) + 82 (p[3] << 0)); 83} 84 85static inline void 86encode_uint32(isc_uint32_t val, unsigned char *p) { 87 p[0] = (isc_uint8_t)(val >> 24); 88 p[1] = (isc_uint8_t)(val >> 16); 89 p[2] = (isc_uint8_t)(val >> 8); 90 p[3] = (isc_uint8_t)(val >> 0); 91} 92 93isc_result_t 94dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx, 95 dns_diffop_t op, dns_difftuple_t **tp) 96{ 97 isc_result_t result; 98 dns_dbnode_t *node; 99 dns_rdataset_t rdataset; 100 dns_rdata_t rdata = DNS_RDATA_INIT; 101 dns_name_t *zonename; 102 103 zonename = dns_db_origin(db); 104 105 node = NULL; 106 result = dns_db_findnode(db, zonename, ISC_FALSE, &node); 107 if (result != ISC_R_SUCCESS) 108 goto nonode; 109 110 dns_rdataset_init(&rdataset); 111 result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0, 112 (isc_stdtime_t)0, &rdataset, NULL); 113 if (result != ISC_R_SUCCESS) 114 goto freenode; 115 116 result = dns_rdataset_first(&rdataset); 117 if (result != ISC_R_SUCCESS) 118 goto freenode; 119 120 dns_rdataset_current(&rdataset, &rdata); 121 122 result = dns_difftuple_create(mctx, op, zonename, rdataset.ttl, 123 &rdata, tp); 124 125 dns_rdataset_disassociate(&rdataset); 126 dns_db_detachnode(db, &node); 127 return (ISC_R_SUCCESS); 128 129 freenode: 130 dns_db_detachnode(db, &node); 131 nonode: 132 UNEXPECTED_ERROR(__FILE__, __LINE__, "missing SOA"); 133 return (result); 134} 135 136/**************************************************************************/ 137/* 138 * Journalling. 139 */ 140 141/* 142 * A journal file consists of 143 * 144 * - A fixed-size header of type journal_rawheader_t. 145 * 146 * - The index. This is an unordered array of index entries 147 * of type journal_rawpos_t giving the locations 148 * of some arbitrary subset of the journal's addressable 149 * transactions. The index entries are used as hints to 150 * speed up the process of locating a transaction with a given 151 * serial number. Unused index entries have an "offset" 152 * field of zero. The size of the index can vary between 153 * journal files, but does not change during the lifetime 154 * of a file. The size can be zero. 155 * 156 * - The journal data. This consists of one or more transactions. 157 * Each transaction begins with a transaction header of type 158 * journal_rawxhdr_t. The transaction header is followed by a 159 * sequence of RRs, similar in structure to an IXFR difference 160 * sequence (RFC1995). That is, the pre-transaction SOA, 161 * zero or more other deleted RRs, the post-transaction SOA, 162 * and zero or more other added RRs. Unlike in IXFR, each RR 163 * is prefixed with a 32-bit length. 164 * 165 * The journal data part grows as new transactions are 166 * appended to the file. Only those transactions 167 * whose serial number is current-(2^31-1) to current 168 * are considered "addressable" and may be pointed 169 * to from the header or index. They may be preceded 170 * by old transactions that are no longer addressable, 171 * and they may be followed by transactions that were 172 * appended to the journal but never committed by updating 173 * the "end" position in the header. The latter will 174 * be overwritten when new transactions are added. 175 */ 176 177/* 178 * On-disk representation of a "pointer" to a journal entry. 179 * These are used in the journal header to locate the beginning 180 * and end of the journal, and in the journal index to locate 181 * other transactions. 182 */ 183typedef struct { 184 unsigned char serial[4]; /* SOA serial before update. */ 185 /* 186 * XXXRTH Should offset be 8 bytes? 187 * XXXDCL ... probably, since isc_offset_t is 8 bytes on many OSs. 188 * XXXAG ... but we will not be able to seek >2G anyway on many 189 * platforms as long as we are using fseek() rather 190 * than lseek(). 191 */ 192 unsigned char offset[4]; /* Offset from beginning of file. */ 193} journal_rawpos_t; 194 195/* 196 * The on-disk representation of the journal header. 197 * All numbers are stored in big-endian order. 198 */ 199 200/* 201 * The header is of a fixed size, with some spare room for future 202 * extensions. 203 */ 204#define JOURNAL_HEADER_SIZE 64 /* Bytes. */ 205 206typedef union { 207 struct { 208 /* File format version ID. */ 209 unsigned char format[16]; 210 /* Position of the first addressable transaction */ 211 journal_rawpos_t begin; 212 /* Position of the next (yet nonexistent) transaction. */ 213 journal_rawpos_t end; 214 /* Number of index entries following the header. */ 215 unsigned char index_size[4]; 216 } h; 217 /* Pad the header to a fixed size. */ 218 unsigned char pad[JOURNAL_HEADER_SIZE]; 219} journal_rawheader_t; 220 221/* 222 * The on-disk representation of the transaction header. 223 * There is one of these at the beginning of each transaction. 224 */ 225typedef struct { 226 unsigned char size[4]; /* In bytes, excluding header. */ 227 unsigned char serial0[4]; /* SOA serial before update. */ 228 unsigned char serial1[4]; /* SOA serial after update. */ 229} journal_rawxhdr_t; 230 231/* 232 * The on-disk representation of the RR header. 233 * There is one of these at the beginning of each RR. 234 */ 235typedef struct { 236 unsigned char size[4]; /* In bytes, excluding header. */ 237} journal_rawrrhdr_t; 238 239/* 240 * The in-core representation of the journal header. 241 */ 242typedef struct { 243 isc_uint32_t serial; 244 isc_offset_t offset; 245} journal_pos_t; 246 247#define POS_VALID(pos) ((pos).offset != 0) 248#define POS_INVALIDATE(pos) ((pos).offset = 0, (pos).serial = 0) 249 250typedef struct { 251 unsigned char format[16]; 252 journal_pos_t begin; 253 journal_pos_t end; 254 isc_uint32_t index_size; 255} journal_header_t; 256 257/* 258 * The in-core representation of the transaction header. 259 */ 260 261typedef struct { 262 isc_uint32_t size; 263 isc_uint32_t serial0; 264 isc_uint32_t serial1; 265} journal_xhdr_t; 266 267/* 268 * The in-core representation of the RR header. 269 */ 270typedef struct { 271 isc_uint32_t size; 272} journal_rrhdr_t; 273 274 275/* 276 * Initial contents to store in the header of a newly created 277 * journal file. 278 * 279 * The header starts with the magic string ";BIND LOG V9\n" 280 * to identify the file as a BIND 9 journal file. An ASCII 281 * identification string is used rather than a binary magic 282 * number to be consistent with BIND 8 (BIND 8 journal files 283 * are ASCII text files). 284 */ 285 286static journal_header_t 287initial_journal_header = { ";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0 }; 288 289#define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset) 290 291typedef enum { 292 JOURNAL_STATE_INVALID, 293 JOURNAL_STATE_READ, 294 JOURNAL_STATE_WRITE, 295 JOURNAL_STATE_TRANSACTION 296} journal_state_t; 297 298struct dns_journal { 299 unsigned int magic; /* JOUR */ 300 isc_mem_t *mctx; /* Memory context */ 301 journal_state_t state; 302 const char *filename; /* Journal file name */ 303 FILE * fp; /* File handle */ 304 isc_offset_t offset; /* Current file offset */ 305 journal_header_t header; /* In-core journal header */ 306 unsigned char *rawindex; /* In-core buffer for journal 307 index in on-disk format */ 308 journal_pos_t *index; /* In-core journal index */ 309 310 /* Current transaction state (when writing). */ 311 struct { 312 unsigned int n_soa; /* Number of SOAs seen */ 313 journal_pos_t pos[2]; /* Begin/end position */ 314 } x; 315 316 /* Iteration state (when reading). */ 317 struct { 318 /* These define the part of the journal we iterate over. */ 319 journal_pos_t bpos; /* Position before first, */ 320 journal_pos_t epos; /* and after last 321 transaction */ 322 /* The rest is iterator state. */ 323 isc_uint32_t current_serial; /* Current SOA serial */ 324 isc_buffer_t source; /* Data from disk */ 325 isc_buffer_t target; /* Data from _fromwire check */ 326 dns_decompress_t dctx; /* Dummy decompression ctx */ 327 dns_name_t name; /* Current domain name */ 328 dns_rdata_t rdata; /* Current rdata */ 329 isc_uint32_t ttl; /* Current TTL */ 330 unsigned int xsize; /* Size of transaction data */ 331 unsigned int xpos; /* Current position in it */ 332 isc_result_t result; /* Result of last call */ 333 } it; 334}; 335 336#define DNS_JOURNAL_MAGIC ISC_MAGIC('J', 'O', 'U', 'R') 337#define DNS_JOURNAL_VALID(t) ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC) 338 339static void 340journal_pos_decode(journal_rawpos_t *raw, journal_pos_t *cooked) { 341 cooked->serial = decode_uint32(raw->serial); 342 cooked->offset = decode_uint32(raw->offset); 343} 344 345static void 346journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) { 347 encode_uint32(cooked->serial, raw->serial); 348 encode_uint32(cooked->offset, raw->offset); 349} 350 351static void 352journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) { 353 INSIST(sizeof(cooked->format) == sizeof(raw->h.format)); 354 memcpy(cooked->format, raw->h.format, sizeof(cooked->format)); 355 journal_pos_decode(&raw->h.begin, &cooked->begin); 356 journal_pos_decode(&raw->h.end, &cooked->end); 357 cooked->index_size = decode_uint32(raw->h.index_size); 358} 359 360static void 361journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) { 362 INSIST(sizeof(cooked->format) == sizeof(raw->h.format)); 363 memset(raw->pad, 0, sizeof(raw->pad)); 364 memcpy(raw->h.format, cooked->format, sizeof(raw->h.format)); 365 journal_pos_encode(&raw->h.begin, &cooked->begin); 366 journal_pos_encode(&raw->h.end, &cooked->end); 367 encode_uint32(cooked->index_size, raw->h.index_size); 368} 369 370/* 371 * Journal file I/O subroutines, with error checking and reporting. 372 */ 373static isc_result_t 374journal_seek(dns_journal_t *j, isc_uint32_t offset) { 375 isc_result_t result; 376 result = isc_stdio_seek(j->fp, (long)offset, SEEK_SET); 377 if (result != ISC_R_SUCCESS) { 378 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 379 "%s: seek: %s", j->filename, 380 isc_result_totext(result)); 381 return (ISC_R_UNEXPECTED); 382 } 383 j->offset = offset; 384 return (ISC_R_SUCCESS); 385} 386 387static isc_result_t 388journal_read(dns_journal_t *j, void *mem, size_t nbytes) { 389 isc_result_t result; 390 391 result = isc_stdio_read(mem, 1, nbytes, j->fp, NULL); 392 if (result != ISC_R_SUCCESS) { 393 if (result == ISC_R_EOF) 394 return (ISC_R_NOMORE); 395 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 396 "%s: read: %s", 397 j->filename, isc_result_totext(result)); 398 return (ISC_R_UNEXPECTED); 399 } 400 j->offset += nbytes; 401 return (ISC_R_SUCCESS); 402} 403 404static isc_result_t 405journal_write(dns_journal_t *j, void *mem, size_t nbytes) { 406 isc_result_t result; 407 408 result = isc_stdio_write(mem, 1, nbytes, j->fp, NULL); 409 if (result != ISC_R_SUCCESS) { 410 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 411 "%s: write: %s", 412 j->filename, isc_result_totext(result)); 413 return (ISC_R_UNEXPECTED); 414 } 415 j->offset += nbytes; 416 return (ISC_R_SUCCESS); 417} 418 419static isc_result_t 420journal_fsync(dns_journal_t *j) { 421 isc_result_t result; 422 result = isc_stdio_flush(j->fp); 423 if (result != ISC_R_SUCCESS) { 424 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 425 "%s: flush: %s", 426 j->filename, isc_result_totext(result)); 427 return (ISC_R_UNEXPECTED); 428 } 429 result = isc_stdio_sync(j->fp); 430 if (result != ISC_R_SUCCESS) { 431 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 432 "%s: fsync: %s", 433 j->filename, isc_result_totext(result)); 434 return (ISC_R_UNEXPECTED); 435 } 436 return (ISC_R_SUCCESS); 437} 438 439/* 440 * Read/write a transaction header at the current file position. 441 */ 442 443static isc_result_t 444journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) { 445 journal_rawxhdr_t raw; 446 isc_result_t result; 447 result = journal_read(j, &raw, sizeof(raw)); 448 if (result != ISC_R_SUCCESS) 449 return (result); 450 xhdr->size = decode_uint32(raw.size); 451 xhdr->serial0 = decode_uint32(raw.serial0); 452 xhdr->serial1 = decode_uint32(raw.serial1); 453 return (ISC_R_SUCCESS); 454} 455 456static isc_result_t 457journal_write_xhdr(dns_journal_t *j, isc_uint32_t size, 458 isc_uint32_t serial0, isc_uint32_t serial1) 459{ 460 journal_rawxhdr_t raw; 461 encode_uint32(size, raw.size); 462 encode_uint32(serial0, raw.serial0); 463 encode_uint32(serial1, raw.serial1); 464 return (journal_write(j, &raw, sizeof(raw))); 465} 466 467 468/* 469 * Read an RR header at the current file position. 470 */ 471 472static isc_result_t 473journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) { 474 journal_rawrrhdr_t raw; 475 isc_result_t result; 476 result = journal_read(j, &raw, sizeof(raw)); 477 if (result != ISC_R_SUCCESS) 478 return (result); 479 rrhdr->size = decode_uint32(raw.size); 480 return (ISC_R_SUCCESS); 481} 482 483static isc_result_t 484journal_file_create(isc_mem_t *mctx, const char *filename) { 485 FILE *fp = NULL; 486 isc_result_t result; 487 journal_header_t header; 488 journal_rawheader_t rawheader; 489 int index_size = 56; /* XXX configurable */ 490 int size; 491 void *mem; /* Memory for temporary index image. */ 492 493 INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE); 494 495 result = isc_stdio_open(filename, "wb", &fp); 496 if (result != ISC_R_SUCCESS) { 497 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 498 "%s: create: %s", 499 filename, isc_result_totext(result)); 500 return (ISC_R_UNEXPECTED); 501 } 502 503 header = initial_journal_header; 504 header.index_size = index_size; 505 journal_header_encode(&header, &rawheader); 506 507 size = sizeof(journal_rawheader_t) + 508 index_size * sizeof(journal_rawpos_t); 509 510 mem = isc_mem_get(mctx, size); 511 if (mem == NULL) { 512 (void)isc_stdio_close(fp); 513 (void)isc_file_remove(filename); 514 return (ISC_R_NOMEMORY); 515 } 516 memset(mem, 0, size); 517 memcpy(mem, &rawheader, sizeof(rawheader)); 518 519 result = isc_stdio_write(mem, 1, (size_t) size, fp, NULL); 520 if (result != ISC_R_SUCCESS) { 521 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 522 "%s: write: %s", 523 filename, isc_result_totext(result)); 524 (void)isc_stdio_close(fp); 525 (void)isc_file_remove(filename); 526 isc_mem_put(mctx, mem, size); 527 return (ISC_R_UNEXPECTED); 528 } 529 isc_mem_put(mctx, mem, size); 530 531 result = isc_stdio_close(fp); 532 if (result != ISC_R_SUCCESS) { 533 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 534 "%s: close: %s", 535 filename, isc_result_totext(result)); 536 (void)isc_file_remove(filename); 537 return (ISC_R_UNEXPECTED); 538 } 539 540 return (ISC_R_SUCCESS); 541} 542 543static isc_result_t 544journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t write, 545 isc_boolean_t create, dns_journal_t **journalp) { 546 FILE *fp = NULL; 547 isc_result_t result; 548 journal_rawheader_t rawheader; 549 dns_journal_t *j; 550 551 INSIST(journalp != NULL && *journalp == NULL); 552 j = isc_mem_get(mctx, sizeof(*j)); 553 if (j == NULL) 554 return (ISC_R_NOMEMORY); 555 556 j->mctx = mctx; 557 j->state = JOURNAL_STATE_INVALID; 558 j->fp = NULL; 559 j->filename = filename; 560 j->index = NULL; 561 j->rawindex = NULL; 562 563 result = isc_stdio_open(j->filename, write ? "rb+" : "rb", &fp); 564 565 if (result == ISC_R_FILENOTFOUND) { 566 if (create) { 567 isc_log_write(JOURNAL_COMMON_LOGARGS, 568 ISC_LOG_INFO, 569 "journal file %s does not exist, " 570 "creating it", 571 j->filename); 572 CHECK(journal_file_create(mctx, filename)); 573 /* 574 * Retry. 575 */ 576 result = isc_stdio_open(j->filename, "rb+", &fp); 577 } else { 578 FAIL(ISC_R_NOTFOUND); 579 } 580 } 581 if (result != ISC_R_SUCCESS) { 582 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 583 "%s: open: %s", 584 j->filename, isc_result_totext(result)); 585 FAIL(ISC_R_UNEXPECTED); 586 } 587 588 j->fp = fp; 589 590 /* 591 * Set magic early so that seek/read can succeed. 592 */ 593 j->magic = DNS_JOURNAL_MAGIC; 594 595 CHECK(journal_seek(j, 0)); 596 CHECK(journal_read(j, &rawheader, sizeof(rawheader))); 597 598 if (memcmp(rawheader.h.format, initial_journal_header.format, 599 sizeof(initial_journal_header.format)) != 0) { 600 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 601 "%s: journal format not recognized", 602 j->filename); 603 FAIL(ISC_R_UNEXPECTED); 604 } 605 journal_header_decode(&rawheader, &j->header); 606 607 /* 608 * If there is an index, read the raw index into a dynamically 609 * allocated buffer and then convert it into a cooked index. 610 */ 611 if (j->header.index_size != 0) { 612 unsigned int i; 613 unsigned int rawbytes; 614 unsigned char *p; 615 616 rawbytes = j->header.index_size * sizeof(journal_rawpos_t); 617 j->rawindex = isc_mem_get(mctx, rawbytes); 618 if (j->rawindex == NULL) 619 FAIL(ISC_R_NOMEMORY); 620 621 CHECK(journal_read(j, j->rawindex, rawbytes)); 622 623 j->index = isc_mem_get(mctx, j->header.index_size * 624 sizeof(journal_pos_t)); 625 if (j->index == NULL) 626 FAIL(ISC_R_NOMEMORY); 627 628 p = j->rawindex; 629 for (i = 0; i < j->header.index_size; i++) { 630 j->index[i].serial = decode_uint32(p); 631 p += 4; 632 j->index[i].offset = decode_uint32(p); 633 p += 4; 634 } 635 INSIST(p == j->rawindex + rawbytes); 636 } 637 j->offset = -1; /* Invalid, must seek explicitly. */ 638 639 /* 640 * Initialize the iterator. 641 */ 642 dns_name_init(&j->it.name, NULL); 643 dns_rdata_init(&j->it.rdata); 644 645 /* 646 * Set up empty initial buffers for uncheched and checked 647 * wire format RR data. They will be reallocated 648 * later. 649 */ 650 isc_buffer_init(&j->it.source, NULL, 0); 651 isc_buffer_init(&j->it.target, NULL, 0); 652 dns_decompress_init(&j->it.dctx, -1, DNS_DECOMPRESS_NONE); 653 654 j->state = 655 write ? JOURNAL_STATE_WRITE : JOURNAL_STATE_READ; 656 657 *journalp = j; 658 return (ISC_R_SUCCESS); 659 660 failure: 661 j->magic = 0; 662 if (j->index != NULL) { 663 isc_mem_put(j->mctx, j->index, j->header.index_size * 664 sizeof(journal_rawpos_t)); 665 j->index = NULL; 666 } 667 if (j->fp != NULL) 668 (void)isc_stdio_close(j->fp); 669 isc_mem_put(j->mctx, j, sizeof(*j)); 670 return (result); 671} 672 673isc_result_t 674dns_journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t write, 675 dns_journal_t **journalp) { 676 return (journal_open(mctx, filename, write, write, journalp)); 677} 678 679/* 680 * A comparison function defining the sorting order for 681 * entries in the IXFR-style journal file. 682 * 683 * The IXFR format requires that deletions are sorted before 684 * additions, and within either one, SOA records are sorted 685 * before others. 686 * 687 * Also sort the non-SOA records by type as a courtesy to the 688 * server receiving the IXFR - it may help reduce the amount of 689 * rdataset merging it has to do. 690 */ 691static int 692ixfr_order(const void *av, const void *bv) { 693 dns_difftuple_t const * const *ap = av; 694 dns_difftuple_t const * const *bp = bv; 695 dns_difftuple_t const *a = *ap; 696 dns_difftuple_t const *b = *bp; 697 int r; 698 699 r = (b->op == DNS_DIFFOP_DEL) - (a->op == DNS_DIFFOP_DEL); 700 if (r != 0) 701 return (r); 702 703 r = (b->rdata.type == dns_rdatatype_soa) - 704 (a->rdata.type == dns_rdatatype_soa); 705 if (r != 0) 706 return (r); 707 708 r = (a->rdata.type - b->rdata.type); 709 return (r); 710} 711 712/* 713 * Advance '*pos' to the next journal transaction. 714 * 715 * Requires: 716 * *pos refers to a valid journal transaction. 717 * 718 * Ensures: 719 * When ISC_R_SUCCESS is returned, 720 * *pos refers to the next journal transaction. 721 * 722 * Returns one of: 723 * 724 * ISC_R_SUCCESS 725 * ISC_R_NOMORE *pos pointed at the last transaction 726 * Other results due to file errors are possible. 727 */ 728static isc_result_t 729journal_next(dns_journal_t *j, journal_pos_t *pos) { 730 isc_result_t result; 731 journal_xhdr_t xhdr; 732 REQUIRE(DNS_JOURNAL_VALID(j)); 733 734 result = journal_seek(j, pos->offset); 735 if (result != ISC_R_SUCCESS) 736 return (result); 737 738 if (pos->serial == j->header.end.serial) 739 return (ISC_R_NOMORE); 740 /* 741 * Read the header of the current transaction. 742 * This will return ISC_R_NOMORE if we are at EOF. 743 */ 744 result = journal_read_xhdr(j, &xhdr); 745 if (result != ISC_R_SUCCESS) 746 return (result); 747 748 /* 749 * Check serial number consistency. 750 */ 751 if (xhdr.serial0 != pos->serial) { 752 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 753 "%s: journal file corrupt: " 754 "expected serial %u, got %u", 755 j->filename, pos->serial, xhdr.serial0); 756 return (ISC_R_UNEXPECTED); 757 } 758 759 /* 760 * Check for offset wraparound. 761 */ 762 if ((isc_offset_t)(pos->offset + sizeof(journal_rawxhdr_t) + xhdr.size) 763 < pos->offset) { 764 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 765 "%s: offset too large", j->filename); 766 return (ISC_R_UNEXPECTED); 767 } 768 769 pos->offset += sizeof(journal_rawxhdr_t) + xhdr.size; 770 pos->serial = xhdr.serial1; 771 return (ISC_R_SUCCESS); 772} 773 774/* 775 * If the index of the journal 'j' contains an entry "better" 776 * than '*best_guess', replace '*best_guess' with it. 777 * 778 * "Better" means having a serial number closer to 'serial' 779 * but not greater than 'serial'. 780 */ 781static void 782index_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *best_guess) { 783 unsigned int i; 784 if (j->index == NULL) 785 return; 786 for (i = 0; i < j->header.index_size; i++) { 787 if (POS_VALID(j->index[i]) && 788 DNS_SERIAL_GE(serial, j->index[i].serial) && 789 DNS_SERIAL_GT(j->index[i].serial, best_guess->serial)) 790 *best_guess = j->index[i]; 791 } 792} 793 794/* 795 * Add a new index entry. If there is no room, make room by removing 796 * the odd-numbered entries and compacting the others into the first 797 * half of the index. This decimates old index entries exponentially 798 * over time, so that the index always contains a much larger fraction 799 * of recent serial numbers than of old ones. This is deliberate - 800 * most index searches are for outgoing IXFR, and IXFR tends to request 801 * recent versions more often than old ones. 802 */ 803static void 804index_add(dns_journal_t *j, journal_pos_t *pos) { 805 unsigned int i; 806 if (j->index == NULL) 807 return; 808 /* 809 * Search for a vacant position. 810 */ 811 for (i = 0; i < j->header.index_size; i++) { 812 if (! POS_VALID(j->index[i])) 813 break; 814 } 815 if (i == j->header.index_size) { 816 unsigned int k = 0; 817 /* 818 * Found no vacant position. Make some room. 819 */ 820 for (i = 0; i < j->header.index_size; i += 2) { 821 j->index[k++] = j->index[i]; 822 } 823 i = k; /* 'i' identifies the first vacant position. */ 824 while (k < j->header.index_size) { 825 POS_INVALIDATE(j->index[k]); 826 k++; 827 } 828 } 829 INSIST(i < j->header.index_size); 830 INSIST(! POS_VALID(j->index[i])); 831 832 /* 833 * Store the new index entry. 834 */ 835 j->index[i] = *pos; 836} 837 838/* 839 * Invalidate any existing index entries that could become 840 * ambiguous when a new transaction with number 'serial' is added. 841 */ 842static void 843index_invalidate(dns_journal_t *j, isc_uint32_t serial) { 844 unsigned int i; 845 if (j->index == NULL) 846 return; 847 for (i = 0; i < j->header.index_size; i++) { 848 if (! DNS_SERIAL_GT(serial, j->index[i].serial)) 849 POS_INVALIDATE(j->index[i]); 850 } 851} 852 853/* 854 * Try to find a transaction with initial serial number 'serial' 855 * in the journal 'j'. 856 * 857 * If found, store its position at '*pos' and return ISC_R_SUCCESS. 858 * 859 * If 'serial' is current (= the ending serial number of the 860 * last transaction in the journal), set '*pos' to 861 * the position immediately following the last transaction and 862 * return ISC_R_SUCCESS. 863 * 864 * If 'serial' is within the range of addressable serial numbers 865 * covered by the journal but that particular serial number is missing 866 * (from the journal, not just from the index), return ISC_R_NOTFOUND. 867 * 868 * If 'serial' is outside the range of addressable serial numbers 869 * covered by the journal, return ISC_R_RANGE. 870 * 871 */ 872static isc_result_t 873journal_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *pos) { 874 isc_result_t result; 875 journal_pos_t current_pos; 876 REQUIRE(DNS_JOURNAL_VALID(j)); 877 878 if (DNS_SERIAL_GT(j->header.begin.serial, serial)) 879 return (ISC_R_RANGE); 880 if (DNS_SERIAL_GT(serial, j->header.end.serial)) 881 return (ISC_R_RANGE); 882 if (serial == j->header.end.serial) { 883 *pos = j->header.end; 884 return (ISC_R_SUCCESS); 885 } 886 887 current_pos = j->header.begin; 888 index_find(j, serial, ¤t_pos); 889 890 while (current_pos.serial != serial) { 891 if (DNS_SERIAL_GT(current_pos.serial, serial)) 892 return (ISC_R_NOTFOUND); 893 result = journal_next(j, ¤t_pos); 894 if (result != ISC_R_SUCCESS) 895 return (result); 896 } 897 *pos = current_pos; 898 return (ISC_R_SUCCESS); 899} 900 901isc_result_t 902dns_journal_begin_transaction(dns_journal_t *j) { 903 isc_uint32_t offset; 904 isc_result_t result; 905 journal_rawxhdr_t hdr; 906 907 REQUIRE(DNS_JOURNAL_VALID(j)); 908 REQUIRE(j->state == JOURNAL_STATE_WRITE); 909 910 /* 911 * Find the file offset where the new transaction should 912 * be written, and seek there. 913 */ 914 if (JOURNAL_EMPTY(&j->header)) { 915 offset = sizeof(journal_rawheader_t) + 916 j->header.index_size * sizeof(journal_rawpos_t); 917 } else { 918 offset = j->header.end.offset; 919 } 920 j->x.pos[0].offset = offset; 921 j->x.pos[1].offset = offset; /* Initial value, will be incremented. */ 922 j->x.n_soa = 0; 923 924 CHECK(journal_seek(j, offset)); 925 926 /* 927 * Write a dummy transaction header of all zeroes to reserve 928 * space. It will be filled in when the transaction is 929 * finished. 930 */ 931 memset(&hdr, 0, sizeof(hdr)); 932 CHECK(journal_write(j, &hdr, sizeof(hdr))); 933 j->x.pos[1].offset = j->offset; 934 935 j->state = JOURNAL_STATE_TRANSACTION; 936 result = ISC_R_SUCCESS; 937 failure: 938 return (result); 939} 940 941isc_result_t 942dns_journal_writediff(dns_journal_t *j, dns_diff_t *diff) { 943 dns_difftuple_t *t; 944 isc_buffer_t buffer; 945 void *mem = NULL; 946 unsigned int size; 947 isc_result_t result; 948 isc_region_t used; 949 950 REQUIRE(DNS_DIFF_VALID(diff)); 951 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION); 952 953 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "writing to journal"); 954 (void)dns_diff_print(diff, NULL); 955 956 /* 957 * Pass 1: determine the buffer size needed, and 958 * keep track of SOA serial numbers. 959 */ 960 size = 0; 961 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL; 962 t = ISC_LIST_NEXT(t, link)) 963 { 964 if (t->rdata.type == dns_rdatatype_soa) { 965 if (j->x.n_soa < 2) 966 j->x.pos[j->x.n_soa].serial = 967 dns_soa_getserial(&t->rdata); 968 j->x.n_soa++; 969 } 970 size += sizeof(journal_rawrrhdr_t); 971 size += t->name.length; /* XXX should have access macro? */ 972 size += 10; 973 size += t->rdata.length; 974 } 975 976 mem = isc_mem_get(j->mctx, size); 977 if (mem == NULL) 978 return (ISC_R_NOMEMORY); 979 980 isc_buffer_init(&buffer, mem, size); 981 982 /* 983 * Pass 2. Write RRs to buffer. 984 */ 985 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL; 986 t = ISC_LIST_NEXT(t, link)) 987 { 988 /* 989 * Write the RR header. 990 */ 991 isc_buffer_putuint32(&buffer, t->name.length + 10 + 992 t->rdata.length); 993 /* 994 * Write the owner name, RR header, and RR data. 995 */ 996 isc_buffer_putmem(&buffer, t->name.ndata, t->name.length); 997 isc_buffer_putuint16(&buffer, t->rdata.type); 998 isc_buffer_putuint16(&buffer, t->rdata.rdclass); 999 isc_buffer_putuint32(&buffer, t->ttl); 1000 INSIST(t->rdata.length < 65536); 1001 isc_buffer_putuint16(&buffer, (isc_uint16_t)t->rdata.length); 1002 INSIST(isc_buffer_availablelength(&buffer) >= t->rdata.length); 1003 isc_buffer_putmem(&buffer, t->rdata.data, t->rdata.length); 1004 } 1005 1006 isc_buffer_usedregion(&buffer, &used); 1007 INSIST(used.length == size); 1008 1009 j->x.pos[1].offset += used.length; 1010 1011 /* 1012 * Write the buffer contents to the journal file. 1013 */ 1014 CHECK(journal_write(j, used.base, used.length)); 1015 1016 result = ISC_R_SUCCESS; 1017 1018 failure: 1019 if (mem != NULL) 1020 isc_mem_put(j->mctx, mem, size); 1021 return (result); 1022 1023} 1024 1025isc_result_t 1026dns_journal_commit(dns_journal_t *j) { 1027 isc_result_t result; 1028 journal_rawheader_t rawheader; 1029 1030 REQUIRE(DNS_JOURNAL_VALID(j)); 1031 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION); 1032 1033 /* 1034 * Perform some basic consistency checks. 1035 */ 1036 if (j->x.n_soa != 2) { 1037 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1038 "%s: malformed transaction: %d SOAs", 1039 j->filename, j->x.n_soa); 1040 return (ISC_R_UNEXPECTED); 1041 } 1042 if (! (DNS_SERIAL_GT(j->x.pos[1].serial, j->x.pos[0].serial) || 1043 (bind8_compat && 1044 j->x.pos[1].serial == j->x.pos[0].serial))) 1045 { 1046 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1047 "%s: malformed transaction: serial number " 1048 "would decrease", j->filename); 1049 return (ISC_R_UNEXPECTED); 1050 } 1051 if (! JOURNAL_EMPTY(&j->header)) { 1052 if (j->x.pos[0].serial != j->header.end.serial) { 1053 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1054 "malformed transaction: " 1055 "%s last serial %u != " 1056 "transaction first serial %u", 1057 j->filename, 1058 j->header.end.serial, 1059 j->x.pos[0].serial); 1060 return (ISC_R_UNEXPECTED); 1061 } 1062 } 1063 1064 /* 1065 * Some old journal entries may become non-addressable 1066 * when we increment the current serial number. Purge them 1067 * by stepping header.begin forward to the first addressable 1068 * transaction. Also purge them from the index. 1069 */ 1070 if (! JOURNAL_EMPTY(&j->header)) { 1071 while (! DNS_SERIAL_GT(j->x.pos[1].serial, 1072 j->header.begin.serial)) { 1073 CHECK(journal_next(j, &j->header.begin)); 1074 } 1075 index_invalidate(j, j->x.pos[1].serial); 1076 } 1077#ifdef notyet 1078 if (DNS_SERIAL_GT(last_dumped_serial, j->x.pos[1].serial)) { 1079 force_dump(...); 1080 } 1081#endif 1082 1083 /* 1084 * Commit the transaction data to stable storage. 1085 */ 1086 CHECK(journal_fsync(j)); 1087 1088 /* 1089 * Update the transaction header. 1090 */ 1091 CHECK(journal_seek(j, j->x.pos[0].offset)); 1092 CHECK(journal_write_xhdr(j, (j->x.pos[1].offset - j->x.pos[0].offset) - 1093 sizeof(journal_rawxhdr_t), 1094 j->x.pos[0].serial, j->x.pos[1].serial)); 1095 1096 /* 1097 * Update the journal header. 1098 */ 1099 if (JOURNAL_EMPTY(&j->header)) { 1100 j->header.begin = j->x.pos[0]; 1101 } 1102 j->header.end = j->x.pos[1]; 1103 journal_header_encode(&j->header, &rawheader); 1104 CHECK(journal_seek(j, 0)); 1105 CHECK(journal_write(j, &rawheader, sizeof(rawheader))); 1106 1107 /* 1108 * Update the index. 1109 */ 1110 index_add(j, &j->x.pos[0]); 1111 1112 /* 1113 * Convert the index into on-disk format and write 1114 * it to disk. 1115 */ 1116 CHECK(index_to_disk(j)); 1117 1118 /* 1119 * Commit the header to stable storage. 1120 */ 1121 CHECK(journal_fsync(j)); 1122 1123 /* 1124 * We no longer have a transaction open. 1125 */ 1126 j->state = JOURNAL_STATE_WRITE; 1127 1128 result = ISC_R_SUCCESS; 1129 1130 failure: 1131 return (result); 1132} 1133 1134isc_result_t 1135dns_journal_write_transaction(dns_journal_t *j, dns_diff_t *diff) { 1136 isc_result_t result; 1137 CHECK(dns_diff_sort(diff, ixfr_order)); 1138 CHECK(dns_journal_begin_transaction(j)); 1139 CHECK(dns_journal_writediff(j, diff)); 1140 CHECK(dns_journal_commit(j)); 1141 result = ISC_R_SUCCESS; 1142 failure: 1143 return (result); 1144} 1145 1146void 1147dns_journal_destroy(dns_journal_t **journalp) { 1148 dns_journal_t *j = *journalp; 1149 REQUIRE(DNS_JOURNAL_VALID(j)); 1150 1151 j->it.result = ISC_R_FAILURE; 1152 dns_name_invalidate(&j->it.name); 1153 dns_decompress_invalidate(&j->it.dctx); 1154 if (j->rawindex != NULL) 1155 isc_mem_put(j->mctx, j->rawindex, j->header.index_size * 1156 sizeof(journal_rawpos_t)); 1157 if (j->index != NULL) 1158 isc_mem_put(j->mctx, j->index, j->header.index_size * 1159 sizeof(journal_pos_t)); 1160 if (j->it.target.base != NULL) 1161 isc_mem_put(j->mctx, j->it.target.base, j->it.target.length); 1162 if (j->it.source.base != NULL) 1163 isc_mem_put(j->mctx, j->it.source.base, j->it.source.length); 1164 1165 if (j->fp != NULL) 1166 (void)isc_stdio_close(j->fp); 1167 j->magic = 0; 1168 isc_mem_put(j->mctx, j, sizeof(*j)); 1169 *journalp = NULL; 1170} 1171 1172/* 1173 * Roll the open journal 'j' into the database 'db'. 1174 * A new database version will be created. 1175 */ 1176 1177/* XXX Share code with incoming IXFR? */ 1178 1179static isc_result_t 1180roll_forward(dns_journal_t *j, dns_db_t *db) { 1181 isc_buffer_t source; /* Transaction data from disk */ 1182 isc_buffer_t target; /* Ditto after _fromwire check */ 1183 isc_uint32_t db_serial; /* Database SOA serial */ 1184 isc_uint32_t end_serial; /* Last journal SOA serial */ 1185 isc_result_t result; 1186 dns_dbversion_t *ver = NULL; 1187 journal_pos_t pos; 1188 dns_diff_t diff; 1189 unsigned int n_soa = 0; 1190 unsigned int n_put = 0; 1191 1192 REQUIRE(DNS_JOURNAL_VALID(j)); 1193 REQUIRE(DNS_DB_VALID(db)); 1194 1195 dns_diff_init(j->mctx, &diff); 1196 1197 /* 1198 * Set up empty initial buffers for uncheched and checked 1199 * wire format transaction data. They will be reallocated 1200 * later. 1201 */ 1202 isc_buffer_init(&source, NULL, 0); 1203 isc_buffer_init(&target, NULL, 0); 1204 1205 /* 1206 * Create the new database version. 1207 */ 1208 CHECK(dns_db_newversion(db, &ver)); 1209 1210 /* 1211 * Get the current database SOA serial number. 1212 */ 1213 CHECK(dns_db_getsoaserial(db, ver, &db_serial)); 1214 1215 /* 1216 * Locate a journal entry for the current database serial. 1217 */ 1218 CHECK(journal_find(j, db_serial, &pos)); 1219 /* 1220 * XXX do more drastic things, like marking zone stale, 1221 * if this fails? 1222 */ 1223 /* 1224 * XXXRTH The zone code should probably mark the zone as bad and 1225 * scream loudly into the log if this is a dynamic update 1226 * log reply that failed. 1227 */ 1228 1229 end_serial = dns_journal_last_serial(j); 1230 if (db_serial == end_serial) 1231 CHECK(DNS_R_UPTODATE); 1232 1233 CHECK(dns_journal_iter_init(j, db_serial, end_serial)); 1234 1235 for (result = dns_journal_first_rr(j); 1236 result == ISC_R_SUCCESS; 1237 result = dns_journal_next_rr(j)) 1238 { 1239 dns_name_t *name; 1240 isc_uint32_t ttl; 1241 dns_rdata_t *rdata; 1242 dns_difftuple_t *tuple = NULL; 1243 1244 name = NULL; 1245 rdata = NULL; 1246 dns_journal_current_rr(j, &name, &ttl, &rdata); 1247 1248 if (rdata->type == dns_rdatatype_soa) { 1249 n_soa++; 1250 if (n_soa == 2) 1251 db_serial = j->it.current_serial; 1252 } 1253 1254 if (n_soa == 3) 1255 n_soa = 1; 1256 if (n_soa == 0) { 1257 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1258 "%s: journal file corrupt: missing " 1259 "initial SOA", j->filename); 1260 FAIL(ISC_R_UNEXPECTED); 1261 } 1262 CHECK(dns_difftuple_create(diff.mctx, n_soa == 1 ? 1263 DNS_DIFFOP_DEL : DNS_DIFFOP_ADD, 1264 name, ttl, rdata, &tuple)); 1265 dns_diff_append(&diff, &tuple); 1266 1267 if (++n_put > 100) { 1268 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), 1269 "%s: applying diff to database (%u)", 1270 j->filename, db_serial); 1271 (void)dns_diff_print(&diff, NULL); 1272 CHECK(dns_diff_apply(&diff, db, ver)); 1273 dns_diff_clear(&diff); 1274 n_put = 0; 1275 } 1276 } 1277 if (result == ISC_R_NOMORE) 1278 result = ISC_R_SUCCESS; 1279 CHECK(result); 1280 1281 if (n_put != 0) { 1282 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), 1283 "%s: applying final diff to database (%u)", 1284 j->filename, db_serial); 1285 (void)dns_diff_print(&diff, NULL); 1286 CHECK(dns_diff_apply(&diff, db, ver)); 1287 dns_diff_clear(&diff); 1288 } 1289 1290 failure: 1291 if (ver != NULL) 1292 dns_db_closeversion(db, &ver, result == ISC_R_SUCCESS ? 1293 ISC_TRUE : ISC_FALSE); 1294 1295 if (source.base != NULL) 1296 isc_mem_put(j->mctx, source.base, source.length); 1297 if (target.base != NULL) 1298 isc_mem_put(j->mctx, target.base, target.length); 1299 1300 dns_diff_clear(&diff); 1301 1302 return (result); 1303} 1304 1305isc_result_t 1306dns_journal_rollforward(isc_mem_t *mctx, dns_db_t *db, const char *filename) { 1307 dns_journal_t *j; 1308 isc_result_t result; 1309 1310 REQUIRE(DNS_DB_VALID(db)); 1311 REQUIRE(filename != NULL); 1312 1313 j = NULL; 1314 result = dns_journal_open(mctx, filename, ISC_FALSE, &j); 1315 if (result == ISC_R_NOTFOUND) { 1316 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), 1317 "no journal file, but that's OK"); 1318 return (DNS_R_NOJOURNAL); 1319 } 1320 if (result != ISC_R_SUCCESS) 1321 return (result); 1322 if (JOURNAL_EMPTY(&j->header)) 1323 result = DNS_R_UPTODATE; 1324 else 1325 result = roll_forward(j, db); 1326 1327 dns_journal_destroy(&j); 1328 1329 return (result); 1330} 1331 1332isc_result_t 1333dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) { 1334 dns_journal_t *j; 1335 isc_buffer_t source; /* Transaction data from disk */ 1336 isc_buffer_t target; /* Ditto after _fromwire check */ 1337 isc_uint32_t start_serial; /* Database SOA serial */ 1338 isc_uint32_t end_serial; /* Last journal SOA serial */ 1339 isc_result_t result; 1340 dns_diff_t diff; 1341 unsigned int n_soa = 0; 1342 unsigned int n_put = 0; 1343 1344 REQUIRE(filename != NULL); 1345 1346 j = NULL; 1347 result = dns_journal_open(mctx, filename, ISC_FALSE, &j); 1348 if (result == ISC_R_NOTFOUND) { 1349 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file"); 1350 return (DNS_R_NOJOURNAL); 1351 } 1352 1353 if (result != ISC_R_SUCCESS) { 1354 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1355 "journal open failure: %s: %s", 1356 isc_result_totext(result), j->filename); 1357 return (result); 1358 } 1359 1360 dns_diff_init(j->mctx, &diff); 1361 1362 /* 1363 * Set up empty initial buffers for uncheched and checked 1364 * wire format transaction data. They will be reallocated 1365 * later. 1366 */ 1367 isc_buffer_init(&source, NULL, 0); 1368 isc_buffer_init(&target, NULL, 0); 1369 1370 start_serial = dns_journal_first_serial(j); 1371 end_serial = dns_journal_last_serial(j); 1372 1373 CHECK(dns_journal_iter_init(j, start_serial, end_serial)); 1374 1375 for (result = dns_journal_first_rr(j); 1376 result == ISC_R_SUCCESS; 1377 result = dns_journal_next_rr(j)) 1378 { 1379 dns_name_t *name; 1380 isc_uint32_t ttl; 1381 dns_rdata_t *rdata; 1382 dns_difftuple_t *tuple = NULL; 1383 1384 name = NULL; 1385 rdata = NULL; 1386 dns_journal_current_rr(j, &name, &ttl, &rdata); 1387 1388 if (rdata->type == dns_rdatatype_soa) 1389 n_soa++; 1390 1391 if (n_soa == 3) 1392 n_soa = 1; 1393 if (n_soa == 0) { 1394 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1395 "%s: journal file corrupt: missing " 1396 "initial SOA", j->filename); 1397 FAIL(ISC_R_UNEXPECTED); 1398 } 1399 CHECK(dns_difftuple_create(diff.mctx, n_soa == 1 ? 1400 DNS_DIFFOP_DEL : DNS_DIFFOP_ADD, 1401 name, ttl, rdata, &tuple)); 1402 dns_diff_append(&diff, &tuple); 1403 1404 if (++n_put > 100) { 1405 result = dns_diff_print(&diff, file); 1406 dns_diff_clear(&diff); 1407 n_put = 0; 1408 if (result != ISC_R_SUCCESS) 1409 break; 1410 } 1411 } 1412 if (result == ISC_R_NOMORE) 1413 result = ISC_R_SUCCESS; 1414 CHECK(result); 1415 1416 if (n_put != 0) { 1417 result = dns_diff_print(&diff, file); 1418 dns_diff_clear(&diff); 1419 } 1420 goto cleanup; 1421 1422 failure: 1423 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1424 "%s: cannot print: journal file corrupt", j->filename); 1425 1426 cleanup: 1427 if (source.base != NULL) 1428 isc_mem_put(j->mctx, source.base, source.length); 1429 if (target.base != NULL) 1430 isc_mem_put(j->mctx, target.base, target.length); 1431 1432 dns_diff_clear(&diff); 1433 dns_journal_destroy(&j); 1434 1435 return (result); 1436} 1437 1438/**************************************************************************/ 1439/* 1440 * Miscellaneous accessors. 1441 */ 1442isc_uint32_t dns_journal_first_serial(dns_journal_t *j) { 1443 return (j->header.begin.serial); 1444} 1445 1446isc_uint32_t dns_journal_last_serial(dns_journal_t *j) { 1447 return (j->header.end.serial); 1448} 1449 1450/**************************************************************************/ 1451/* 1452 * Iteration support. 1453 * 1454 * When serving an outgoing IXFR, we transmit a part the journal starting 1455 * at the serial number in the IXFR request and ending at the serial 1456 * number that is current when the IXFR request arrives. The ending 1457 * serial number is not necessarily at the end of the journal: 1458 * the journal may grow while the IXFR is in progress, but we stop 1459 * when we reach the serial number that was current when the IXFR started. 1460 */ 1461 1462static isc_result_t read_one_rr(dns_journal_t *j); 1463 1464/* 1465 * Make sure the buffer 'b' is has at least 'size' bytes 1466 * allocated, and clear it. 1467 * 1468 * Requires: 1469 * Either b->base is NULL, or it points to b->length bytes of memory 1470 * previously allocated by isc_mem_get(). 1471 */ 1472 1473static isc_result_t 1474size_buffer(isc_mem_t *mctx, isc_buffer_t *b, unsigned size) { 1475 if (b->length < size) { 1476 void *mem = isc_mem_get(mctx, size); 1477 if (mem == NULL) 1478 return (ISC_R_NOMEMORY); 1479 if (b->base != NULL) 1480 isc_mem_put(mctx, b->base, b->length); 1481 b->base = mem; 1482 b->length = size; 1483 } 1484 isc_buffer_clear(b); 1485 return (ISC_R_SUCCESS); 1486} 1487 1488isc_result_t 1489dns_journal_iter_init(dns_journal_t *j, 1490 isc_uint32_t begin_serial, isc_uint32_t end_serial) 1491{ 1492 isc_result_t result; 1493 1494 CHECK(journal_find(j, begin_serial, &j->it.bpos)); 1495 INSIST(j->it.bpos.serial == begin_serial); 1496 1497 CHECK(journal_find(j, end_serial, &j->it.epos)); 1498 INSIST(j->it.epos.serial == end_serial); 1499 1500 result = ISC_R_SUCCESS; 1501 failure: 1502 j->it.result = result; 1503 return (j->it.result); 1504} 1505 1506 1507isc_result_t 1508dns_journal_first_rr(dns_journal_t *j) { 1509 isc_result_t result; 1510 1511 /* 1512 * Seek to the beginning of the first transaction we are 1513 * interested in. 1514 */ 1515 CHECK(journal_seek(j, j->it.bpos.offset)); 1516 j->it.current_serial = j->it.bpos.serial; 1517 1518 j->it.xsize = 0; /* We have no transaction data yet... */ 1519 j->it.xpos = 0; /* ...and haven't used any of it. */ 1520 1521 return (read_one_rr(j)); 1522 1523 failure: 1524 return (result); 1525} 1526 1527static isc_result_t 1528read_one_rr(dns_journal_t *j) { 1529 isc_result_t result; 1530 1531 dns_rdatatype_t rdtype; 1532 dns_rdataclass_t rdclass; 1533 unsigned int rdlen; 1534 isc_uint32_t ttl; 1535 journal_xhdr_t xhdr; 1536 journal_rrhdr_t rrhdr; 1537 1538 INSIST(j->offset <= j->it.epos.offset); 1539 if (j->offset == j->it.epos.offset) 1540 return (ISC_R_NOMORE); 1541 if (j->it.xpos == j->it.xsize) { 1542 /* 1543 * We are at a transaction boundary. 1544 * Read another transaction header. 1545 */ 1546 CHECK(journal_read_xhdr(j, &xhdr)); 1547 if (xhdr.size == 0) { 1548 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1549 "%s: journal corrupt: empty transaction", 1550 j->filename); 1551 FAIL(ISC_R_UNEXPECTED); 1552 } 1553 if (xhdr.serial0 != j->it.current_serial) { 1554 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1555 "%s: journal file corrupt: " 1556 "expected serial %u, got %u", 1557 j->filename, 1558 j->it.current_serial, xhdr.serial0); 1559 FAIL(ISC_R_UNEXPECTED); 1560 } 1561 j->it.xsize = xhdr.size; 1562 j->it.xpos = 0; 1563 } 1564 /* 1565 * Read an RR. 1566 */ 1567 result = journal_read_rrhdr(j, &rrhdr); 1568 /* 1569 * Perform a sanity check on the journal RR size. 1570 * The smallest possible RR has a 1-byte owner name 1571 * and a 10-byte header. The largest possible 1572 * RR has 65535 bytes of data, a header, and a maximum- 1573 * size owner name, well below 70 k total. 1574 */ 1575 if (rrhdr.size < 1+10 || rrhdr.size > 70000) { 1576 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1577 "%s: journal corrupt: impossible RR size " 1578 "(%d bytes)", j->filename, rrhdr.size); 1579 FAIL(ISC_R_UNEXPECTED); 1580 } 1581 1582 CHECK(size_buffer(j->mctx, &j->it.source, rrhdr.size)); 1583 CHECK(journal_read(j, j->it.source.base, rrhdr.size)); 1584 isc_buffer_add(&j->it.source, rrhdr.size); 1585 1586 /* 1587 * The target buffer is made the same size 1588 * as the source buffer, with the assumption that when 1589 * no compression in present, the output of dns_*_fromwire() 1590 * is no larger than the input. 1591 */ 1592 CHECK(size_buffer(j->mctx, &j->it.target, rrhdr.size)); 1593 1594 /* 1595 * Parse the owner name. We don't know where it 1596 * ends yet, so we make the entire "remaining" 1597 * part of the buffer "active". 1598 */ 1599 isc_buffer_setactive(&j->it.source, 1600 j->it.source.used - j->it.source.current); 1601 CHECK(dns_name_fromwire(&j->it.name, &j->it.source, 1602 &j->it.dctx, 0, &j->it.target)); 1603 1604 /* 1605 * Check that the RR header is there, and parse it. 1606 */ 1607 if (isc_buffer_remaininglength(&j->it.source) < 10) 1608 FAIL(DNS_R_FORMERR); 1609 1610 rdtype = isc_buffer_getuint16(&j->it.source); 1611 rdclass = isc_buffer_getuint16(&j->it.source); 1612 ttl = isc_buffer_getuint32(&j->it.source); 1613 rdlen = isc_buffer_getuint16(&j->it.source); 1614 1615 /* 1616 * Parse the rdata. 1617 */ 1618 isc_buffer_setactive(&j->it.source, rdlen); 1619 dns_rdata_reset(&j->it.rdata); 1620 CHECK(dns_rdata_fromwire(&j->it.rdata, rdclass, 1621 rdtype, &j->it.source, &j->it.dctx, 1622 0, &j->it.target)); 1623 j->it.ttl = ttl; 1624 1625 j->it.xpos += sizeof(journal_rawrrhdr_t) + rrhdr.size; 1626 if (rdtype == dns_rdatatype_soa) { 1627 /* XXX could do additional consistency checks here */ 1628 j->it.current_serial = dns_soa_getserial(&j->it.rdata); 1629 } 1630 1631 result = ISC_R_SUCCESS; 1632 1633 failure: 1634 j->it.result = result; 1635 return (result); 1636} 1637 1638isc_result_t 1639dns_journal_next_rr(dns_journal_t *j) { 1640 j->it.result = read_one_rr(j); 1641 return (j->it.result); 1642} 1643 1644void 1645dns_journal_current_rr(dns_journal_t *j, dns_name_t **name, isc_uint32_t *ttl, 1646 dns_rdata_t **rdata) 1647{ 1648 REQUIRE(j->it.result == ISC_R_SUCCESS); 1649 *name = &j->it.name; 1650 *ttl = j->it.ttl; 1651 *rdata = &j->it.rdata; 1652} 1653 1654/**************************************************************************/ 1655/* 1656 * Generating diffs from databases 1657 */ 1658 1659/* 1660 * Construct a diff containing all the RRs at the current name of the 1661 * database iterator 'dbit' in database 'db', version 'ver'. 1662 * Set '*name' to the current name, and append the diff to 'diff'. 1663 * All new tuples will have the operation 'op'. 1664 * 1665 * Requires: 'name' must have buffer large enough to hold the name. 1666 * Typically, a dns_fixedname_t would be used. 1667 */ 1668static isc_result_t 1669get_name_diff(dns_db_t *db, dns_dbversion_t *ver, isc_stdtime_t now, 1670 dns_dbiterator_t *dbit, dns_name_t *name, dns_diffop_t op, 1671 dns_diff_t *diff) 1672{ 1673 isc_result_t result; 1674 dns_dbnode_t *node = NULL; 1675 dns_rdatasetiter_t *rdsiter = NULL; 1676 dns_difftuple_t *tuple = NULL; 1677 1678 result = dns_dbiterator_current(dbit, &node, name); 1679 if (result != ISC_R_SUCCESS) 1680 return (result); 1681 1682 result = dns_db_allrdatasets(db, node, ver, now, &rdsiter); 1683 if (result != ISC_R_SUCCESS) 1684 goto cleanup_node; 1685 1686 for (result = dns_rdatasetiter_first(rdsiter); 1687 result == ISC_R_SUCCESS; 1688 result = dns_rdatasetiter_next(rdsiter)) 1689 { 1690 dns_rdataset_t rdataset; 1691 1692 dns_rdataset_init(&rdataset); 1693 dns_rdatasetiter_current(rdsiter, &rdataset); 1694 1695 for (result = dns_rdataset_first(&rdataset); 1696 result == ISC_R_SUCCESS; 1697 result = dns_rdataset_next(&rdataset)) 1698 { 1699 dns_rdata_t rdata = DNS_RDATA_INIT; 1700 dns_rdataset_current(&rdataset, &rdata); 1701 result = dns_difftuple_create(diff->mctx, op, name, 1702 rdataset.ttl, &rdata, 1703 &tuple); 1704 if (result != ISC_R_SUCCESS) { 1705 dns_rdataset_disassociate(&rdataset); 1706 goto cleanup_iterator; 1707 } 1708 dns_diff_append(diff, &tuple); 1709 } 1710 dns_rdataset_disassociate(&rdataset); 1711 if (result != ISC_R_NOMORE) 1712 goto cleanup_iterator; 1713 } 1714 if (result != ISC_R_NOMORE) 1715 goto cleanup_iterator; 1716 1717 result = ISC_R_SUCCESS; 1718 1719 cleanup_iterator: 1720 dns_rdatasetiter_destroy(&rdsiter); 1721 1722 cleanup_node: 1723 dns_db_detachnode(db, &node); 1724 1725 return (result); 1726} 1727 1728/* 1729 * Comparison function for use by dns_diff_subtract when sorting 1730 * the diffs to be subtracted. The sort keys are the rdata type 1731 * and the rdata itself. The owner name is ignored, because 1732 * it is known to be the same for all tuples. 1733 */ 1734static int 1735rdata_order(const void *av, const void *bv) { 1736 dns_difftuple_t const * const *ap = av; 1737 dns_difftuple_t const * const *bp = bv; 1738 dns_difftuple_t const *a = *ap; 1739 dns_difftuple_t const *b = *bp; 1740 int r; 1741 r = (b->rdata.type - a->rdata.type); 1742 if (r != 0) 1743 return (r); 1744 r = dns_rdata_compare(&a->rdata, &b->rdata); 1745 return (r); 1746} 1747 1748static isc_result_t 1749dns_diff_subtract(dns_diff_t diff[2], dns_diff_t *r) { 1750 isc_result_t result; 1751 dns_difftuple_t *p[2]; 1752 int i, t; 1753 CHECK(dns_diff_sort(&diff[0], rdata_order)); 1754 CHECK(dns_diff_sort(&diff[1], rdata_order)); 1755 1756 for (;;) { 1757 p[0] = ISC_LIST_HEAD(diff[0].tuples); 1758 p[1] = ISC_LIST_HEAD(diff[1].tuples); 1759 if (p[0] == NULL && p[1] == NULL) 1760 break; 1761 1762 for (i = 0; i < 2; i++) 1763 if (p[!i] == NULL) { 1764 ISC_LIST_UNLINK(diff[i].tuples, p[i], link); 1765 ISC_LIST_APPEND(r->tuples, p[i], link); 1766 goto next; 1767 } 1768 t = rdata_order(&p[0], &p[1]); 1769 if (t < 0) { 1770 ISC_LIST_UNLINK(diff[0].tuples, p[0], link); 1771 ISC_LIST_APPEND(r->tuples, p[0], link); 1772 goto next; 1773 } 1774 if (t > 0) { 1775 ISC_LIST_UNLINK(diff[1].tuples, p[1], link); 1776 ISC_LIST_APPEND(r->tuples, p[1], link); 1777 goto next; 1778 } 1779 INSIST(t == 0); 1780 /* 1781 * Identical RRs in both databases; skip them both. 1782 */ 1783 for (i = 0; i < 2; i++) { 1784 ISC_LIST_UNLINK(diff[i].tuples, p[i], link); 1785 dns_difftuple_free(&p[i]); 1786 } 1787 next: ; 1788 } 1789 result = ISC_R_SUCCESS; 1790 failure: 1791 return (result); 1792} 1793 1794/* 1795 * Compare the databases 'dba' and 'dbb' and generate a journal 1796 * entry containing the changes to make 'dba' from 'dbb' (note 1797 * the order). This journal entry will consist of a single, 1798 * possibly very large transaction. 1799 */ 1800 1801isc_result_t 1802dns_db_diff(isc_mem_t *mctx, 1803 dns_db_t *dba, dns_dbversion_t *dbvera, 1804 dns_db_t *dbb, dns_dbversion_t *dbverb, 1805 const char *journal_filename) 1806{ 1807 dns_db_t *db[2]; 1808 dns_dbversion_t *ver[2]; 1809 dns_dbiterator_t *dbit[2] = { NULL, NULL }; 1810 isc_boolean_t have[2] = { ISC_FALSE, ISC_FALSE }; 1811 dns_fixedname_t fixname[2]; 1812 isc_result_t result, itresult[2]; 1813 dns_diff_t diff[2], resultdiff; 1814 int i, t; 1815 dns_journal_t *journal = NULL; 1816 1817 db[0] = dba, db[1] = dbb; 1818 ver[0] = dbvera, ver[1] = dbverb; 1819 1820 dns_diff_init(mctx, &diff[0]); 1821 dns_diff_init(mctx, &diff[1]); 1822 dns_diff_init(mctx, &resultdiff); 1823 1824 dns_fixedname_init(&fixname[0]); 1825 dns_fixedname_init(&fixname[1]); 1826 1827 result = dns_journal_open(mctx, journal_filename, ISC_TRUE, &journal); 1828 if (result != ISC_R_SUCCESS) 1829 return (result); 1830 1831 result = dns_db_createiterator(db[0], ISC_FALSE, &dbit[0]); 1832 if (result != ISC_R_SUCCESS) 1833 goto cleanup_journal; 1834 result = dns_db_createiterator(db[1], ISC_FALSE, &dbit[1]); 1835 if (result != ISC_R_SUCCESS) 1836 goto cleanup_interator0; 1837 1838 itresult[0] = dns_dbiterator_first(dbit[0]); 1839 itresult[1] = dns_dbiterator_first(dbit[1]); 1840 1841 for (;;) { 1842 for (i = 0; i < 2; i++) { 1843 if (! have[i] && itresult[i] == ISC_R_SUCCESS) { 1844 CHECK(get_name_diff(db[i], ver[i], 0, dbit[i], 1845 dns_fixedname_name(&fixname[i]), 1846 i == 0 ? 1847 DNS_DIFFOP_ADD : 1848 DNS_DIFFOP_DEL, 1849 &diff[i])); 1850 itresult[i] = dns_dbiterator_next(dbit[i]); 1851 have[i] = ISC_TRUE; 1852 } 1853 } 1854 1855 if (! have[0] && ! have[1]) { 1856 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1857 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1858 break; 1859 } 1860 1861 for (i = 0; i < 2; i++) { 1862 if (! have[!i]) { 1863 ISC_LIST_APPENDLIST(resultdiff.tuples, 1864 diff[i].tuples, link); 1865 INSIST(ISC_LIST_EMPTY(diff[i].tuples)); 1866 have[i] = ISC_FALSE; 1867 goto next; 1868 } 1869 } 1870 1871 t = dns_name_compare(dns_fixedname_name(&fixname[0]), 1872 dns_fixedname_name(&fixname[1])); 1873 if (t < 0) { 1874 ISC_LIST_APPENDLIST(resultdiff.tuples, 1875 diff[0].tuples, link); 1876 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1877 have[0] = ISC_FALSE; 1878 continue; 1879 } 1880 if (t > 0) { 1881 ISC_LIST_APPENDLIST(resultdiff.tuples, 1882 diff[1].tuples, link); 1883 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1884 have[1] = ISC_FALSE; 1885 continue; 1886 } 1887 INSIST(t == 0); 1888 CHECK(dns_diff_subtract(diff, &resultdiff)); 1889 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1890 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1891 have[0] = have[1] = ISC_FALSE; 1892 next: ; 1893 } 1894 if (itresult[0] != ISC_R_NOMORE) 1895 FAIL(itresult[0]); 1896 if (itresult[1] != ISC_R_NOMORE) 1897 FAIL(itresult[1]); 1898 1899 if (ISC_LIST_EMPTY(resultdiff.tuples)) { 1900 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no changes"); 1901 } else { 1902 CHECK(dns_journal_write_transaction(journal, &resultdiff)); 1903 } 1904 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1905 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1906 1907 failure: 1908 dns_diff_clear(&resultdiff); 1909 dns_dbiterator_destroy(&dbit[1]); 1910 cleanup_interator0: 1911 dns_dbiterator_destroy(&dbit[0]); 1912 cleanup_journal: 1913 dns_journal_destroy(&journal); 1914 return (result); 1915} 1916 1917isc_result_t 1918dns_journal_compact(isc_mem_t *mctx, char *filename, isc_uint32_t serial, 1919 isc_uint32_t target_size) 1920{ 1921 unsigned int i; 1922 journal_pos_t best_guess; 1923 journal_pos_t current_pos; 1924 dns_journal_t *j = NULL; 1925 journal_rawheader_t rawheader; 1926 unsigned int copy_length; 1927 unsigned int len; 1928 char *buf = NULL; 1929 unsigned int size = 0; 1930 isc_result_t result; 1931 unsigned int indexend; 1932 1933 CHECK(journal_open(mctx, filename, ISC_TRUE, ISC_FALSE, &j)); 1934 1935 if (JOURNAL_EMPTY(&j->header)) { 1936 dns_journal_destroy(&j); 1937 return (ISC_R_SUCCESS); 1938 } 1939 1940 if (DNS_SERIAL_GT(j->header.begin.serial, serial) || 1941 DNS_SERIAL_GT(serial, j->header.end.serial)) { 1942 dns_journal_destroy(&j); 1943 return (ISC_R_RANGE); 1944 } 1945 1946 /* 1947 * Cope with very small target sizes. 1948 */ 1949 indexend = sizeof(journal_rawheader_t) + 1950 j->header.index_size * sizeof(journal_rawpos_t); 1951 if (target_size < indexend * 2) 1952 target_size = target_size/2 + indexend; 1953 1954 /* 1955 * See if there is any work to do. 1956 */ 1957 if ((isc_uint32_t) j->header.end.offset < target_size) { 1958 dns_journal_destroy(&j); 1959 return (ISC_R_SUCCESS); 1960 } 1961 1962 /* 1963 * Remove overhead so space test below can succeed. 1964 */ 1965 if (target_size >= indexend) 1966 target_size -= indexend; 1967 1968 /* 1969 * Find if we can create enough free space. 1970 */ 1971 best_guess = j->header.begin; 1972 for (i = 0; i < j->header.index_size; i++) { 1973 if (POS_VALID(j->index[i]) && 1974 DNS_SERIAL_GE(serial, j->index[i].serial) && 1975 ((isc_uint32_t)(j->header.end.offset - j->index[i].offset) 1976 >= target_size / 2) && 1977 j->index[i].offset > best_guess.offset) 1978 best_guess = j->index[i]; 1979 } 1980 1981 current_pos = best_guess; 1982 while (current_pos.serial != serial) { 1983 CHECK(journal_next(j, ¤t_pos)); 1984 if (current_pos.serial == j->header.end.serial) 1985 break; 1986 1987 if (DNS_SERIAL_GE(serial, current_pos.serial) && 1988 ((isc_uint32_t)(j->header.end.offset - current_pos.offset) 1989 >= (target_size / 2)) && 1990 current_pos.offset > best_guess.offset) 1991 best_guess = current_pos; 1992 else 1993 break; 1994 } 1995 1996 INSIST(best_guess.serial != j->header.end.serial); 1997 if (best_guess.serial != serial) 1998 CHECK(journal_next(j, &best_guess)); 1999 2000 /* 2001 * Enough space to proceed? 2002 */ 2003 if ((isc_uint32_t) (j->header.end.offset - best_guess.offset) > 2004 (isc_uint32_t) (best_guess.offset - indexend)) { 2005 dns_journal_destroy(&j); 2006 return (ISC_R_NOSPACE); 2007 } 2008 2009 copy_length = j->header.end.offset - best_guess.offset; 2010 2011 /* 2012 * Invalidate entire index, will be rebuilt at end. 2013 */ 2014 for (i = 0; i < j->header.index_size; i++) { 2015 if (POS_VALID(j->index[i])) 2016 POS_INVALIDATE(j->index[i]); 2017 } 2018 2019 /* 2020 * Convert the index into on-disk format and write 2021 * it to disk. 2022 */ 2023 CHECK(index_to_disk(j)); 2024 CHECK(journal_fsync(j)); 2025 2026 /* 2027 * Update the journal header. 2028 */ 2029 if (copy_length == 0) { 2030 j->header.begin.serial = 0; 2031 j->header.end.serial = 0; 2032 j->header.begin.offset = 0; 2033 j->header.end.offset = 0; 2034 } else { 2035 j->header.begin = best_guess; 2036 } 2037 journal_header_encode(&j->header, &rawheader); 2038 CHECK(journal_seek(j, 0)); 2039 CHECK(journal_write(j, &rawheader, sizeof(rawheader))); 2040 CHECK(journal_fsync(j)); 2041 2042 if (copy_length != 0) { 2043 /* 2044 * Copy best_guess to end into space just freed. 2045 */ 2046 size = 64*1024; 2047 if (copy_length < size) 2048 size = copy_length; 2049 buf = isc_mem_get(mctx, size); 2050 if (buf == NULL) { 2051 result = ISC_R_NOMEMORY; 2052 goto failure; 2053 } 2054 2055 for (i = 0; i < copy_length; i += size) { 2056 len = (copy_length - i) > size ? size : 2057 (copy_length - i); 2058 CHECK(journal_seek(j, best_guess.offset + i)); 2059 CHECK(journal_read(j, buf, len)); 2060 CHECK(journal_seek(j, indexend + i)); 2061 CHECK(journal_write(j, buf, len)); 2062 } 2063 2064 CHECK(journal_fsync(j)); 2065 2066 /* 2067 * Compute new header. 2068 */ 2069 j->header.begin.offset = indexend; 2070 j->header.end.offset = indexend + copy_length; 2071 /* 2072 * Update the journal header. 2073 */ 2074 journal_header_encode(&j->header, &rawheader); 2075 CHECK(journal_seek(j, 0)); 2076 CHECK(journal_write(j, &rawheader, sizeof(rawheader))); 2077 CHECK(journal_fsync(j)); 2078 2079 /* 2080 * Build new index. 2081 */ 2082 current_pos = j->header.begin; 2083 while (current_pos.serial != j->header.end.serial) { 2084 index_add(j, ¤t_pos); 2085 CHECK(journal_next(j, ¤t_pos)); 2086 } 2087 2088 /* 2089 * Write index. 2090 */ 2091 CHECK(index_to_disk(j)); 2092 CHECK(journal_fsync(j)); 2093 2094 indexend = j->header.end.offset; 2095 } 2096 dns_journal_destroy(&j); 2097 (void)isc_file_truncate(filename, (isc_offset_t)indexend); 2098 result = ISC_R_SUCCESS; 2099 2100 failure: 2101 if (buf != NULL) 2102 isc_mem_put(mctx, buf, size); 2103 if (j != NULL) 2104 dns_journal_destroy(&j); 2105 return (result); 2106} 2107 2108static isc_result_t 2109index_to_disk(dns_journal_t *j) { 2110 isc_result_t result = ISC_R_SUCCESS; 2111 2112 if (j->header.index_size != 0) { 2113 unsigned int i; 2114 unsigned char *p; 2115 unsigned int rawbytes; 2116 2117 rawbytes = j->header.index_size * sizeof(journal_rawpos_t); 2118 2119 p = j->rawindex; 2120 for (i = 0; i < j->header.index_size; i++) { 2121 encode_uint32(j->index[i].serial, p); 2122 p += 4; 2123 encode_uint32(j->index[i].offset, p); 2124 p += 4; 2125 } 2126 INSIST(p == j->rawindex + rawbytes); 2127 2128 CHECK(journal_seek(j, sizeof(journal_rawheader_t))); 2129 CHECK(journal_write(j, j->rawindex, rawbytes)); 2130 } 2131failure: 2132 return (result); 2133} 2134