journal.c revision 254897
1/* 2 * Copyright (C) 2004, 2005, 2007-2011, 2013 Internet Systems Consortium, Inc. ("ISC") 3 * Copyright (C) 1999-2002 Internet Software Consortium. 4 * 5 * Permission to use, copy, modify, and/or distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 * PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18/* $Id: journal.c,v 1.120 2011/12/22 07:32:41 each Exp $ */ 19 20#include <config.h> 21 22#include <stdlib.h> 23#include <unistd.h> 24#include <errno.h> 25 26#include <isc/file.h> 27#include <isc/mem.h> 28#include <isc/stdio.h> 29#include <isc/string.h> 30#include <isc/util.h> 31 32#include <dns/compress.h> 33#include <dns/db.h> 34#include <dns/dbiterator.h> 35#include <dns/diff.h> 36#include <dns/fixedname.h> 37#include <dns/journal.h> 38#include <dns/log.h> 39#include <dns/rdataset.h> 40#include <dns/rdatasetiter.h> 41#include <dns/result.h> 42#include <dns/soa.h> 43 44/*! \file 45 * \brief Journaling. 46 * 47 * A journal file consists of 48 * 49 * \li A fixed-size header of type journal_rawheader_t. 50 * 51 * \li The index. This is an unordered array of index entries 52 * of type journal_rawpos_t giving the locations 53 * of some arbitrary subset of the journal's addressable 54 * transactions. The index entries are used as hints to 55 * speed up the process of locating a transaction with a given 56 * serial number. Unused index entries have an "offset" 57 * field of zero. The size of the index can vary between 58 * journal files, but does not change during the lifetime 59 * of a file. The size can be zero. 60 * 61 * \li The journal data. This consists of one or more transactions. 62 * Each transaction begins with a transaction header of type 63 * journal_rawxhdr_t. The transaction header is followed by a 64 * sequence of RRs, similar in structure to an IXFR difference 65 * sequence (RFC1995). That is, the pre-transaction SOA, 66 * zero or more other deleted RRs, the post-transaction SOA, 67 * and zero or more other added RRs. Unlike in IXFR, each RR 68 * is prefixed with a 32-bit length. 69 * 70 * The journal data part grows as new transactions are 71 * appended to the file. Only those transactions 72 * whose serial number is current-(2^31-1) to current 73 * are considered "addressable" and may be pointed 74 * to from the header or index. They may be preceded 75 * by old transactions that are no longer addressable, 76 * and they may be followed by transactions that were 77 * appended to the journal but never committed by updating 78 * the "end" position in the header. The latter will 79 * be overwritten when new transactions are added. 80 */ 81/*% 82 * When true, accept IXFR difference sequences where the 83 * SOA serial number does not change (BIND 8 sends such 84 * sequences). 85 */ 86static isc_boolean_t bind8_compat = ISC_TRUE; /* XXX config */ 87 88/**************************************************************************/ 89/* 90 * Miscellaneous utilities. 91 */ 92 93#define JOURNAL_COMMON_LOGARGS \ 94 dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL 95 96#define JOURNAL_DEBUG_LOGARGS(n) \ 97 JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n) 98 99/*% 100 * It would be non-sensical (or at least obtuse) to use FAIL() with an 101 * ISC_R_SUCCESS code, but the test is there to keep the Solaris compiler 102 * from complaining about "end-of-loop code not reached". 103 */ 104#define FAIL(code) \ 105 do { result = (code); \ 106 if (result != ISC_R_SUCCESS) goto failure; \ 107 } while (0) 108 109#define CHECK(op) \ 110 do { result = (op); \ 111 if (result != ISC_R_SUCCESS) goto failure; \ 112 } while (0) 113 114#define JOURNAL_SERIALSET 0x01U 115 116static isc_result_t index_to_disk(dns_journal_t *); 117 118static inline isc_uint32_t 119decode_uint32(unsigned char *p) { 120 return ((p[0] << 24) + 121 (p[1] << 16) + 122 (p[2] << 8) + 123 (p[3] << 0)); 124} 125 126static inline void 127encode_uint32(isc_uint32_t val, unsigned char *p) { 128 p[0] = (isc_uint8_t)(val >> 24); 129 p[1] = (isc_uint8_t)(val >> 16); 130 p[2] = (isc_uint8_t)(val >> 8); 131 p[3] = (isc_uint8_t)(val >> 0); 132} 133 134isc_result_t 135dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx, 136 dns_diffop_t op, dns_difftuple_t **tp) 137{ 138 isc_result_t result; 139 dns_dbnode_t *node; 140 dns_rdataset_t rdataset; 141 dns_rdata_t rdata = DNS_RDATA_INIT; 142 dns_name_t *zonename; 143 144 zonename = dns_db_origin(db); 145 146 node = NULL; 147 result = dns_db_findnode(db, zonename, ISC_FALSE, &node); 148 if (result != ISC_R_SUCCESS) 149 goto nonode; 150 151 dns_rdataset_init(&rdataset); 152 result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0, 153 (isc_stdtime_t)0, &rdataset, NULL); 154 if (result != ISC_R_SUCCESS) 155 goto freenode; 156 157 result = dns_rdataset_first(&rdataset); 158 if (result != ISC_R_SUCCESS) 159 goto freenode; 160 161 dns_rdataset_current(&rdataset, &rdata); 162 163 result = dns_difftuple_create(mctx, op, zonename, rdataset.ttl, 164 &rdata, tp); 165 166 dns_rdataset_disassociate(&rdataset); 167 dns_db_detachnode(db, &node); 168 return (result); 169 170 freenode: 171 dns_db_detachnode(db, &node); 172 nonode: 173 UNEXPECTED_ERROR(__FILE__, __LINE__, "missing SOA"); 174 return (result); 175} 176 177/* Journaling */ 178 179/*% 180 * On-disk representation of a "pointer" to a journal entry. 181 * These are used in the journal header to locate the beginning 182 * and end of the journal, and in the journal index to locate 183 * other transactions. 184 */ 185typedef struct { 186 unsigned char serial[4]; /*%< SOA serial before update. */ 187 /* 188 * XXXRTH Should offset be 8 bytes? 189 * XXXDCL ... probably, since isc_offset_t is 8 bytes on many OSs. 190 * XXXAG ... but we will not be able to seek >2G anyway on many 191 * platforms as long as we are using fseek() rather 192 * than lseek(). 193 */ 194 unsigned char offset[4]; /*%< Offset from beginning of file. */ 195} journal_rawpos_t; 196 197 198/*% 199 * The header is of a fixed size, with some spare room for future 200 * extensions. 201 */ 202#define JOURNAL_HEADER_SIZE 64 /* Bytes. */ 203 204/*% 205 * The on-disk representation of the journal header. 206 * All numbers are stored in big-endian order. 207 */ 208typedef union { 209 struct { 210 /*% File format version ID. */ 211 unsigned char format[16]; 212 /*% Position of the first addressable transaction */ 213 journal_rawpos_t begin; 214 /*% Position of the next (yet nonexistent) transaction. */ 215 journal_rawpos_t end; 216 /*% Number of index entries following the header. */ 217 unsigned char index_size[4]; 218 /*% Source serial number. */ 219 unsigned char sourceserial[4]; 220 unsigned char flags; 221 } h; 222 /* Pad the header to a fixed size. */ 223 unsigned char pad[JOURNAL_HEADER_SIZE]; 224} journal_rawheader_t; 225 226/*% 227 * The on-disk representation of the transaction header. 228 * There is one of these at the beginning of each transaction. 229 */ 230typedef struct { 231 unsigned char size[4]; /*%< In bytes, excluding header. */ 232 unsigned char serial0[4]; /*%< SOA serial before update. */ 233 unsigned char serial1[4]; /*%< SOA serial after update. */ 234} journal_rawxhdr_t; 235 236/*% 237 * The on-disk representation of the RR header. 238 * There is one of these at the beginning of each RR. 239 */ 240typedef struct { 241 unsigned char size[4]; /*%< In bytes, excluding header. */ 242} journal_rawrrhdr_t; 243 244/*% 245 * The in-core representation of the journal header. 246 */ 247typedef struct { 248 isc_uint32_t serial; 249 isc_offset_t offset; 250} journal_pos_t; 251 252#define POS_VALID(pos) ((pos).offset != 0) 253#define POS_INVALIDATE(pos) ((pos).offset = 0, (pos).serial = 0) 254 255typedef struct { 256 unsigned char format[16]; 257 journal_pos_t begin; 258 journal_pos_t end; 259 isc_uint32_t index_size; 260 isc_uint32_t sourceserial; 261 isc_boolean_t serialset; 262} journal_header_t; 263 264/*% 265 * The in-core representation of the transaction header. 266 */ 267 268typedef struct { 269 isc_uint32_t size; 270 isc_uint32_t serial0; 271 isc_uint32_t serial1; 272} journal_xhdr_t; 273 274/*% 275 * The in-core representation of the RR header. 276 */ 277typedef struct { 278 isc_uint32_t size; 279} journal_rrhdr_t; 280 281 282/*% 283 * Initial contents to store in the header of a newly created 284 * journal file. 285 * 286 * The header starts with the magic string ";BIND LOG V9\n" 287 * to identify the file as a BIND 9 journal file. An ASCII 288 * identification string is used rather than a binary magic 289 * number to be consistent with BIND 8 (BIND 8 journal files 290 * are ASCII text files). 291 */ 292 293static journal_header_t 294initial_journal_header = { ";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0, 0, 0 }; 295 296#define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset) 297 298typedef enum { 299 JOURNAL_STATE_INVALID, 300 JOURNAL_STATE_READ, 301 JOURNAL_STATE_WRITE, 302 JOURNAL_STATE_TRANSACTION, 303 JOURNAL_STATE_INLINE 304} journal_state_t; 305 306struct dns_journal { 307 unsigned int magic; /*%< JOUR */ 308 isc_mem_t *mctx; /*%< Memory context */ 309 journal_state_t state; 310 const char *filename; /*%< Journal file name */ 311 FILE * fp; /*%< File handle */ 312 isc_offset_t offset; /*%< Current file offset */ 313 journal_header_t header; /*%< In-core journal header */ 314 unsigned char *rawindex; /*%< In-core buffer for journal index in on-disk format */ 315 journal_pos_t *index; /*%< In-core journal index */ 316 317 /*% Current transaction state (when writing). */ 318 struct { 319 unsigned int n_soa; /*%< Number of SOAs seen */ 320 journal_pos_t pos[2]; /*%< Begin/end position */ 321 } x; 322 323 /*% Iteration state (when reading). */ 324 struct { 325 /* These define the part of the journal we iterate over. */ 326 journal_pos_t bpos; /*%< Position before first, */ 327 journal_pos_t epos; /*%< and after last transaction */ 328 /* The rest is iterator state. */ 329 isc_uint32_t current_serial; /*%< Current SOA serial */ 330 isc_buffer_t source; /*%< Data from disk */ 331 isc_buffer_t target; /*%< Data from _fromwire check */ 332 dns_decompress_t dctx; /*%< Dummy decompression ctx */ 333 dns_name_t name; /*%< Current domain name */ 334 dns_rdata_t rdata; /*%< Current rdata */ 335 isc_uint32_t ttl; /*%< Current TTL */ 336 unsigned int xsize; /*%< Size of transaction data */ 337 unsigned int xpos; /*%< Current position in it */ 338 isc_result_t result; /*%< Result of last call */ 339 } it; 340}; 341 342#define DNS_JOURNAL_MAGIC ISC_MAGIC('J', 'O', 'U', 'R') 343#define DNS_JOURNAL_VALID(t) ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC) 344 345static void 346journal_pos_decode(journal_rawpos_t *raw, journal_pos_t *cooked) { 347 cooked->serial = decode_uint32(raw->serial); 348 cooked->offset = decode_uint32(raw->offset); 349} 350 351static void 352journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) { 353 encode_uint32(cooked->serial, raw->serial); 354 encode_uint32(cooked->offset, raw->offset); 355} 356 357static void 358journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) { 359 INSIST(sizeof(cooked->format) == sizeof(raw->h.format)); 360 memcpy(cooked->format, raw->h.format, sizeof(cooked->format)); 361 journal_pos_decode(&raw->h.begin, &cooked->begin); 362 journal_pos_decode(&raw->h.end, &cooked->end); 363 cooked->index_size = decode_uint32(raw->h.index_size); 364 cooked->sourceserial = decode_uint32(raw->h.sourceserial); 365 cooked->serialset = ISC_TF(raw->h.flags & JOURNAL_SERIALSET); 366} 367 368static void 369journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) { 370 unsigned char flags = 0; 371 372 INSIST(sizeof(cooked->format) == sizeof(raw->h.format)); 373 memset(raw->pad, 0, sizeof(raw->pad)); 374 memcpy(raw->h.format, cooked->format, sizeof(raw->h.format)); 375 journal_pos_encode(&raw->h.begin, &cooked->begin); 376 journal_pos_encode(&raw->h.end, &cooked->end); 377 encode_uint32(cooked->index_size, raw->h.index_size); 378 encode_uint32(cooked->sourceserial, raw->h.sourceserial); 379 if (cooked->serialset) 380 flags |= JOURNAL_SERIALSET; 381 raw->h.flags = flags; 382} 383 384/* 385 * Journal file I/O subroutines, with error checking and reporting. 386 */ 387static isc_result_t 388journal_seek(dns_journal_t *j, isc_uint32_t offset) { 389 isc_result_t result; 390 result = isc_stdio_seek(j->fp, (long)offset, SEEK_SET); 391 if (result != ISC_R_SUCCESS) { 392 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 393 "%s: seek: %s", j->filename, 394 isc_result_totext(result)); 395 return (ISC_R_UNEXPECTED); 396 } 397 j->offset = offset; 398 return (ISC_R_SUCCESS); 399} 400 401static isc_result_t 402journal_read(dns_journal_t *j, void *mem, size_t nbytes) { 403 isc_result_t result; 404 405 result = isc_stdio_read(mem, 1, nbytes, j->fp, NULL); 406 if (result != ISC_R_SUCCESS) { 407 if (result == ISC_R_EOF) 408 return (ISC_R_NOMORE); 409 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 410 "%s: read: %s", 411 j->filename, isc_result_totext(result)); 412 return (ISC_R_UNEXPECTED); 413 } 414 j->offset += nbytes; 415 return (ISC_R_SUCCESS); 416} 417 418static isc_result_t 419journal_write(dns_journal_t *j, void *mem, size_t nbytes) { 420 isc_result_t result; 421 422 result = isc_stdio_write(mem, 1, nbytes, j->fp, NULL); 423 if (result != ISC_R_SUCCESS) { 424 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 425 "%s: write: %s", 426 j->filename, isc_result_totext(result)); 427 return (ISC_R_UNEXPECTED); 428 } 429 j->offset += nbytes; 430 return (ISC_R_SUCCESS); 431} 432 433static isc_result_t 434journal_fsync(dns_journal_t *j) { 435 isc_result_t result; 436 result = isc_stdio_flush(j->fp); 437 if (result != ISC_R_SUCCESS) { 438 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 439 "%s: flush: %s", 440 j->filename, isc_result_totext(result)); 441 return (ISC_R_UNEXPECTED); 442 } 443 result = isc_stdio_sync(j->fp); 444 if (result != ISC_R_SUCCESS) { 445 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 446 "%s: fsync: %s", 447 j->filename, isc_result_totext(result)); 448 return (ISC_R_UNEXPECTED); 449 } 450 return (ISC_R_SUCCESS); 451} 452 453/* 454 * Read/write a transaction header at the current file position. 455 */ 456 457static isc_result_t 458journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) { 459 journal_rawxhdr_t raw; 460 isc_result_t result; 461 result = journal_read(j, &raw, sizeof(raw)); 462 if (result != ISC_R_SUCCESS) 463 return (result); 464 xhdr->size = decode_uint32(raw.size); 465 xhdr->serial0 = decode_uint32(raw.serial0); 466 xhdr->serial1 = decode_uint32(raw.serial1); 467 return (ISC_R_SUCCESS); 468} 469 470static isc_result_t 471journal_write_xhdr(dns_journal_t *j, isc_uint32_t size, 472 isc_uint32_t serial0, isc_uint32_t serial1) 473{ 474 journal_rawxhdr_t raw; 475 encode_uint32(size, raw.size); 476 encode_uint32(serial0, raw.serial0); 477 encode_uint32(serial1, raw.serial1); 478 return (journal_write(j, &raw, sizeof(raw))); 479} 480 481 482/* 483 * Read an RR header at the current file position. 484 */ 485 486static isc_result_t 487journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) { 488 journal_rawrrhdr_t raw; 489 isc_result_t result; 490 result = journal_read(j, &raw, sizeof(raw)); 491 if (result != ISC_R_SUCCESS) 492 return (result); 493 rrhdr->size = decode_uint32(raw.size); 494 return (ISC_R_SUCCESS); 495} 496 497static isc_result_t 498journal_file_create(isc_mem_t *mctx, const char *filename) { 499 FILE *fp = NULL; 500 isc_result_t result; 501 journal_header_t header; 502 journal_rawheader_t rawheader; 503 int index_size = 56; /* XXX configurable */ 504 int size; 505 void *mem; /* Memory for temporary index image. */ 506 507 INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE); 508 509 result = isc_stdio_open(filename, "wb", &fp); 510 if (result != ISC_R_SUCCESS) { 511 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 512 "%s: create: %s", 513 filename, isc_result_totext(result)); 514 return (ISC_R_UNEXPECTED); 515 } 516 517 header = initial_journal_header; 518 header.index_size = index_size; 519 journal_header_encode(&header, &rawheader); 520 521 size = sizeof(journal_rawheader_t) + 522 index_size * sizeof(journal_rawpos_t); 523 524 mem = isc_mem_get(mctx, size); 525 if (mem == NULL) { 526 (void)isc_stdio_close(fp); 527 (void)isc_file_remove(filename); 528 return (ISC_R_NOMEMORY); 529 } 530 memset(mem, 0, size); 531 memcpy(mem, &rawheader, sizeof(rawheader)); 532 533 result = isc_stdio_write(mem, 1, (size_t) size, fp, NULL); 534 if (result != ISC_R_SUCCESS) { 535 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 536 "%s: write: %s", 537 filename, isc_result_totext(result)); 538 (void)isc_stdio_close(fp); 539 (void)isc_file_remove(filename); 540 isc_mem_put(mctx, mem, size); 541 return (ISC_R_UNEXPECTED); 542 } 543 isc_mem_put(mctx, mem, size); 544 545 result = isc_stdio_close(fp); 546 if (result != ISC_R_SUCCESS) { 547 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 548 "%s: close: %s", 549 filename, isc_result_totext(result)); 550 (void)isc_file_remove(filename); 551 return (ISC_R_UNEXPECTED); 552 } 553 554 return (ISC_R_SUCCESS); 555} 556 557static isc_result_t 558journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t write, 559 isc_boolean_t create, dns_journal_t **journalp) 560{ 561 FILE *fp = NULL; 562 isc_result_t result; 563 journal_rawheader_t rawheader; 564 dns_journal_t *j; 565 566 INSIST(journalp != NULL && *journalp == NULL); 567 j = isc_mem_get(mctx, sizeof(*j)); 568 if (j == NULL) 569 return (ISC_R_NOMEMORY); 570 571 j->mctx = NULL; 572 isc_mem_attach(mctx, &j->mctx); 573 j->state = JOURNAL_STATE_INVALID; 574 j->fp = NULL; 575 j->filename = filename; 576 j->index = NULL; 577 j->rawindex = NULL; 578 579 result = isc_stdio_open(j->filename, write ? "rb+" : "rb", &fp); 580 581 if (result == ISC_R_FILENOTFOUND) { 582 if (create) { 583 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(1), 584 "journal file %s does not exist, " 585 "creating it", j->filename); 586 CHECK(journal_file_create(mctx, filename)); 587 /* 588 * Retry. 589 */ 590 result = isc_stdio_open(j->filename, "rb+", &fp); 591 } else { 592 FAIL(ISC_R_NOTFOUND); 593 } 594 } 595 if (result != ISC_R_SUCCESS) { 596 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 597 "%s: open: %s", 598 j->filename, isc_result_totext(result)); 599 FAIL(ISC_R_UNEXPECTED); 600 } 601 602 j->fp = fp; 603 604 /* 605 * Set magic early so that seek/read can succeed. 606 */ 607 j->magic = DNS_JOURNAL_MAGIC; 608 609 CHECK(journal_seek(j, 0)); 610 CHECK(journal_read(j, &rawheader, sizeof(rawheader))); 611 612 if (memcmp(rawheader.h.format, initial_journal_header.format, 613 sizeof(initial_journal_header.format)) != 0) { 614 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 615 "%s: journal format not recognized", 616 j->filename); 617 FAIL(ISC_R_UNEXPECTED); 618 } 619 journal_header_decode(&rawheader, &j->header); 620 621 /* 622 * If there is an index, read the raw index into a dynamically 623 * allocated buffer and then convert it into a cooked index. 624 */ 625 if (j->header.index_size != 0) { 626 unsigned int i; 627 unsigned int rawbytes; 628 unsigned char *p; 629 630 rawbytes = j->header.index_size * sizeof(journal_rawpos_t); 631 j->rawindex = isc_mem_get(mctx, rawbytes); 632 if (j->rawindex == NULL) 633 FAIL(ISC_R_NOMEMORY); 634 635 CHECK(journal_read(j, j->rawindex, rawbytes)); 636 637 j->index = isc_mem_get(mctx, j->header.index_size * 638 sizeof(journal_pos_t)); 639 if (j->index == NULL) 640 FAIL(ISC_R_NOMEMORY); 641 642 p = j->rawindex; 643 for (i = 0; i < j->header.index_size; i++) { 644 j->index[i].serial = decode_uint32(p); 645 p += 4; 646 j->index[i].offset = decode_uint32(p); 647 p += 4; 648 } 649 INSIST(p == j->rawindex + rawbytes); 650 } 651 j->offset = -1; /* Invalid, must seek explicitly. */ 652 653 /* 654 * Initialize the iterator. 655 */ 656 dns_name_init(&j->it.name, NULL); 657 dns_rdata_init(&j->it.rdata); 658 659 /* 660 * Set up empty initial buffers for unchecked and checked 661 * wire format RR data. They will be reallocated 662 * later. 663 */ 664 isc_buffer_init(&j->it.source, NULL, 0); 665 isc_buffer_init(&j->it.target, NULL, 0); 666 dns_decompress_init(&j->it.dctx, -1, DNS_DECOMPRESS_NONE); 667 668 j->state = 669 write ? JOURNAL_STATE_WRITE : JOURNAL_STATE_READ; 670 671 *journalp = j; 672 return (ISC_R_SUCCESS); 673 674 failure: 675 j->magic = 0; 676 if (j->index != NULL) { 677 isc_mem_put(j->mctx, j->index, j->header.index_size * 678 sizeof(journal_rawpos_t)); 679 j->index = NULL; 680 } 681 if (j->fp != NULL) 682 (void)isc_stdio_close(j->fp); 683 isc_mem_putanddetach(&j->mctx, j, sizeof(*j)); 684 return (result); 685} 686 687isc_result_t 688dns_journal_open(isc_mem_t *mctx, const char *filename, unsigned int mode, 689 dns_journal_t **journalp) 690{ 691 isc_result_t result; 692 int namelen; 693 char backup[1024]; 694 isc_boolean_t write, create; 695 696 create = ISC_TF(mode & DNS_JOURNAL_CREATE); 697 write = ISC_TF(mode & (DNS_JOURNAL_WRITE|DNS_JOURNAL_CREATE)); 698 699 result = journal_open(mctx, filename, write, create, journalp); 700 if (result == ISC_R_NOTFOUND) { 701 namelen = strlen(filename); 702 if (namelen > 4 && strcmp(filename + namelen - 4, ".jnl") == 0) 703 namelen -= 4; 704 705 result = isc_string_printf(backup, sizeof(backup), "%.*s.jbk", 706 namelen, filename); 707 if (result != ISC_R_SUCCESS) 708 return (result); 709 result = journal_open(mctx, backup, write, write, journalp); 710 } 711 return (result); 712} 713 714/* 715 * A comparison function defining the sorting order for 716 * entries in the IXFR-style journal file. 717 * 718 * The IXFR format requires that deletions are sorted before 719 * additions, and within either one, SOA records are sorted 720 * before others. 721 * 722 * Also sort the non-SOA records by type as a courtesy to the 723 * server receiving the IXFR - it may help reduce the amount of 724 * rdataset merging it has to do. 725 */ 726static int 727ixfr_order(const void *av, const void *bv) { 728 dns_difftuple_t const * const *ap = av; 729 dns_difftuple_t const * const *bp = bv; 730 dns_difftuple_t const *a = *ap; 731 dns_difftuple_t const *b = *bp; 732 int r; 733 int bop = 0, aop = 0; 734 735 switch (a->op) { 736 case DNS_DIFFOP_DEL: 737 case DNS_DIFFOP_DELRESIGN: 738 aop = 1; 739 break; 740 case DNS_DIFFOP_ADD: 741 case DNS_DIFFOP_ADDRESIGN: 742 aop = 0; 743 break; 744 default: 745 INSIST(0); 746 } 747 748 switch (b->op) { 749 case DNS_DIFFOP_DEL: 750 case DNS_DIFFOP_DELRESIGN: 751 bop = 1; 752 break; 753 case DNS_DIFFOP_ADD: 754 case DNS_DIFFOP_ADDRESIGN: 755 bop = 0; 756 break; 757 default: 758 INSIST(0); 759 } 760 761 r = bop - aop; 762 if (r != 0) 763 return (r); 764 765 r = (b->rdata.type == dns_rdatatype_soa) - 766 (a->rdata.type == dns_rdatatype_soa); 767 if (r != 0) 768 return (r); 769 770 r = (a->rdata.type - b->rdata.type); 771 return (r); 772} 773 774/* 775 * Advance '*pos' to the next journal transaction. 776 * 777 * Requires: 778 * *pos refers to a valid journal transaction. 779 * 780 * Ensures: 781 * When ISC_R_SUCCESS is returned, 782 * *pos refers to the next journal transaction. 783 * 784 * Returns one of: 785 * 786 * ISC_R_SUCCESS 787 * ISC_R_NOMORE *pos pointed at the last transaction 788 * Other results due to file errors are possible. 789 */ 790static isc_result_t 791journal_next(dns_journal_t *j, journal_pos_t *pos) { 792 isc_result_t result; 793 journal_xhdr_t xhdr; 794 REQUIRE(DNS_JOURNAL_VALID(j)); 795 796 result = journal_seek(j, pos->offset); 797 if (result != ISC_R_SUCCESS) 798 return (result); 799 800 if (pos->serial == j->header.end.serial) 801 return (ISC_R_NOMORE); 802 /* 803 * Read the header of the current transaction. 804 * This will return ISC_R_NOMORE if we are at EOF. 805 */ 806 result = journal_read_xhdr(j, &xhdr); 807 if (result != ISC_R_SUCCESS) 808 return (result); 809 810 /* 811 * Check serial number consistency. 812 */ 813 if (xhdr.serial0 != pos->serial) { 814 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 815 "%s: journal file corrupt: " 816 "expected serial %u, got %u", 817 j->filename, pos->serial, xhdr.serial0); 818 return (ISC_R_UNEXPECTED); 819 } 820 821 /* 822 * Check for offset wraparound. 823 */ 824 if ((isc_offset_t)(pos->offset + sizeof(journal_rawxhdr_t) + xhdr.size) 825 < pos->offset) { 826 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 827 "%s: offset too large", j->filename); 828 return (ISC_R_UNEXPECTED); 829 } 830 831 pos->offset += sizeof(journal_rawxhdr_t) + xhdr.size; 832 pos->serial = xhdr.serial1; 833 return (ISC_R_SUCCESS); 834} 835 836/* 837 * If the index of the journal 'j' contains an entry "better" 838 * than '*best_guess', replace '*best_guess' with it. 839 * 840 * "Better" means having a serial number closer to 'serial' 841 * but not greater than 'serial'. 842 */ 843static void 844index_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *best_guess) { 845 unsigned int i; 846 if (j->index == NULL) 847 return; 848 for (i = 0; i < j->header.index_size; i++) { 849 if (POS_VALID(j->index[i]) && 850 DNS_SERIAL_GE(serial, j->index[i].serial) && 851 DNS_SERIAL_GT(j->index[i].serial, best_guess->serial)) 852 *best_guess = j->index[i]; 853 } 854} 855 856/* 857 * Add a new index entry. If there is no room, make room by removing 858 * the odd-numbered entries and compacting the others into the first 859 * half of the index. This decimates old index entries exponentially 860 * over time, so that the index always contains a much larger fraction 861 * of recent serial numbers than of old ones. This is deliberate - 862 * most index searches are for outgoing IXFR, and IXFR tends to request 863 * recent versions more often than old ones. 864 */ 865static void 866index_add(dns_journal_t *j, journal_pos_t *pos) { 867 unsigned int i; 868 if (j->index == NULL) 869 return; 870 /* 871 * Search for a vacant position. 872 */ 873 for (i = 0; i < j->header.index_size; i++) { 874 if (! POS_VALID(j->index[i])) 875 break; 876 } 877 if (i == j->header.index_size) { 878 unsigned int k = 0; 879 /* 880 * Found no vacant position. Make some room. 881 */ 882 for (i = 0; i < j->header.index_size; i += 2) { 883 j->index[k++] = j->index[i]; 884 } 885 i = k; /* 'i' identifies the first vacant position. */ 886 while (k < j->header.index_size) { 887 POS_INVALIDATE(j->index[k]); 888 k++; 889 } 890 } 891 INSIST(i < j->header.index_size); 892 INSIST(! POS_VALID(j->index[i])); 893 894 /* 895 * Store the new index entry. 896 */ 897 j->index[i] = *pos; 898} 899 900/* 901 * Invalidate any existing index entries that could become 902 * ambiguous when a new transaction with number 'serial' is added. 903 */ 904static void 905index_invalidate(dns_journal_t *j, isc_uint32_t serial) { 906 unsigned int i; 907 if (j->index == NULL) 908 return; 909 for (i = 0; i < j->header.index_size; i++) { 910 if (! DNS_SERIAL_GT(serial, j->index[i].serial)) 911 POS_INVALIDATE(j->index[i]); 912 } 913} 914 915/* 916 * Try to find a transaction with initial serial number 'serial' 917 * in the journal 'j'. 918 * 919 * If found, store its position at '*pos' and return ISC_R_SUCCESS. 920 * 921 * If 'serial' is current (= the ending serial number of the 922 * last transaction in the journal), set '*pos' to 923 * the position immediately following the last transaction and 924 * return ISC_R_SUCCESS. 925 * 926 * If 'serial' is within the range of addressable serial numbers 927 * covered by the journal but that particular serial number is missing 928 * (from the journal, not just from the index), return ISC_R_NOTFOUND. 929 * 930 * If 'serial' is outside the range of addressable serial numbers 931 * covered by the journal, return ISC_R_RANGE. 932 * 933 */ 934static isc_result_t 935journal_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *pos) { 936 isc_result_t result; 937 journal_pos_t current_pos; 938 REQUIRE(DNS_JOURNAL_VALID(j)); 939 940 if (DNS_SERIAL_GT(j->header.begin.serial, serial)) 941 return (ISC_R_RANGE); 942 if (DNS_SERIAL_GT(serial, j->header.end.serial)) 943 return (ISC_R_RANGE); 944 if (serial == j->header.end.serial) { 945 *pos = j->header.end; 946 return (ISC_R_SUCCESS); 947 } 948 949 current_pos = j->header.begin; 950 index_find(j, serial, ¤t_pos); 951 952 while (current_pos.serial != serial) { 953 if (DNS_SERIAL_GT(current_pos.serial, serial)) 954 return (ISC_R_NOTFOUND); 955 result = journal_next(j, ¤t_pos); 956 if (result != ISC_R_SUCCESS) 957 return (result); 958 } 959 *pos = current_pos; 960 return (ISC_R_SUCCESS); 961} 962 963isc_result_t 964dns_journal_begin_transaction(dns_journal_t *j) { 965 isc_uint32_t offset; 966 isc_result_t result; 967 journal_rawxhdr_t hdr; 968 969 REQUIRE(DNS_JOURNAL_VALID(j)); 970 REQUIRE(j->state == JOURNAL_STATE_WRITE || 971 j->state == JOURNAL_STATE_INLINE); 972 973 /* 974 * Find the file offset where the new transaction should 975 * be written, and seek there. 976 */ 977 if (JOURNAL_EMPTY(&j->header)) { 978 offset = sizeof(journal_rawheader_t) + 979 j->header.index_size * sizeof(journal_rawpos_t); 980 } else { 981 offset = j->header.end.offset; 982 } 983 j->x.pos[0].offset = offset; 984 j->x.pos[1].offset = offset; /* Initial value, will be incremented. */ 985 j->x.n_soa = 0; 986 987 CHECK(journal_seek(j, offset)); 988 989 /* 990 * Write a dummy transaction header of all zeroes to reserve 991 * space. It will be filled in when the transaction is 992 * finished. 993 */ 994 memset(&hdr, 0, sizeof(hdr)); 995 CHECK(journal_write(j, &hdr, sizeof(hdr))); 996 j->x.pos[1].offset = j->offset; 997 998 j->state = JOURNAL_STATE_TRANSACTION; 999 result = ISC_R_SUCCESS; 1000 failure: 1001 return (result); 1002} 1003 1004isc_result_t 1005dns_journal_writediff(dns_journal_t *j, dns_diff_t *diff) { 1006 dns_difftuple_t *t; 1007 isc_buffer_t buffer; 1008 void *mem = NULL; 1009 unsigned int size; 1010 isc_result_t result; 1011 isc_region_t used; 1012 1013 REQUIRE(DNS_DIFF_VALID(diff)); 1014 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION); 1015 1016 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "writing to journal"); 1017 (void)dns_diff_print(diff, NULL); 1018 1019 /* 1020 * Pass 1: determine the buffer size needed, and 1021 * keep track of SOA serial numbers. 1022 */ 1023 size = 0; 1024 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL; 1025 t = ISC_LIST_NEXT(t, link)) 1026 { 1027 if (t->rdata.type == dns_rdatatype_soa) { 1028 if (j->x.n_soa < 2) 1029 j->x.pos[j->x.n_soa].serial = 1030 dns_soa_getserial(&t->rdata); 1031 j->x.n_soa++; 1032 } 1033 size += sizeof(journal_rawrrhdr_t); 1034 size += t->name.length; /* XXX should have access macro? */ 1035 size += 10; 1036 size += t->rdata.length; 1037 } 1038 1039 mem = isc_mem_get(j->mctx, size); 1040 if (mem == NULL) 1041 return (ISC_R_NOMEMORY); 1042 1043 isc_buffer_init(&buffer, mem, size); 1044 1045 /* 1046 * Pass 2. Write RRs to buffer. 1047 */ 1048 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL; 1049 t = ISC_LIST_NEXT(t, link)) 1050 { 1051 /* 1052 * Write the RR header. 1053 */ 1054 isc_buffer_putuint32(&buffer, t->name.length + 10 + 1055 t->rdata.length); 1056 /* 1057 * Write the owner name, RR header, and RR data. 1058 */ 1059 isc_buffer_putmem(&buffer, t->name.ndata, t->name.length); 1060 isc_buffer_putuint16(&buffer, t->rdata.type); 1061 isc_buffer_putuint16(&buffer, t->rdata.rdclass); 1062 isc_buffer_putuint32(&buffer, t->ttl); 1063 INSIST(t->rdata.length < 65536); 1064 isc_buffer_putuint16(&buffer, (isc_uint16_t)t->rdata.length); 1065 INSIST(isc_buffer_availablelength(&buffer) >= t->rdata.length); 1066 isc_buffer_putmem(&buffer, t->rdata.data, t->rdata.length); 1067 } 1068 1069 isc_buffer_usedregion(&buffer, &used); 1070 INSIST(used.length == size); 1071 1072 j->x.pos[1].offset += used.length; 1073 1074 /* 1075 * Write the buffer contents to the journal file. 1076 */ 1077 CHECK(journal_write(j, used.base, used.length)); 1078 1079 result = ISC_R_SUCCESS; 1080 1081 failure: 1082 if (mem != NULL) 1083 isc_mem_put(j->mctx, mem, size); 1084 return (result); 1085 1086} 1087 1088isc_result_t 1089dns_journal_commit(dns_journal_t *j) { 1090 isc_result_t result; 1091 journal_rawheader_t rawheader; 1092 1093 REQUIRE(DNS_JOURNAL_VALID(j)); 1094 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION || 1095 j->state == JOURNAL_STATE_INLINE); 1096 1097 /* 1098 * Just write out a updated header. 1099 */ 1100 if (j->state == JOURNAL_STATE_INLINE) { 1101 CHECK(journal_fsync(j)); 1102 journal_header_encode(&j->header, &rawheader); 1103 CHECK(journal_seek(j, 0)); 1104 CHECK(journal_write(j, &rawheader, sizeof(rawheader))); 1105 CHECK(journal_fsync(j)); 1106 j->state = JOURNAL_STATE_WRITE; 1107 return (ISC_R_SUCCESS); 1108 } 1109 1110 /* 1111 * Perform some basic consistency checks. 1112 */ 1113 if (j->x.n_soa != 2) { 1114 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1115 "%s: malformed transaction: %d SOAs", 1116 j->filename, j->x.n_soa); 1117 return (ISC_R_UNEXPECTED); 1118 } 1119 if (! (DNS_SERIAL_GT(j->x.pos[1].serial, j->x.pos[0].serial) || 1120 (bind8_compat && 1121 j->x.pos[1].serial == j->x.pos[0].serial))) 1122 { 1123 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1124 "%s: malformed transaction: serial number " 1125 "would decrease", j->filename); 1126 return (ISC_R_UNEXPECTED); 1127 } 1128 if (! JOURNAL_EMPTY(&j->header)) { 1129 if (j->x.pos[0].serial != j->header.end.serial) { 1130 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1131 "malformed transaction: " 1132 "%s last serial %u != " 1133 "transaction first serial %u", 1134 j->filename, 1135 j->header.end.serial, 1136 j->x.pos[0].serial); 1137 return (ISC_R_UNEXPECTED); 1138 } 1139 } 1140 1141 /* 1142 * Some old journal entries may become non-addressable 1143 * when we increment the current serial number. Purge them 1144 * by stepping header.begin forward to the first addressable 1145 * transaction. Also purge them from the index. 1146 */ 1147 if (! JOURNAL_EMPTY(&j->header)) { 1148 while (! DNS_SERIAL_GT(j->x.pos[1].serial, 1149 j->header.begin.serial)) { 1150 CHECK(journal_next(j, &j->header.begin)); 1151 } 1152 index_invalidate(j, j->x.pos[1].serial); 1153 } 1154#ifdef notyet 1155 if (DNS_SERIAL_GT(last_dumped_serial, j->x.pos[1].serial)) { 1156 force_dump(...); 1157 } 1158#endif 1159 1160 /* 1161 * Commit the transaction data to stable storage. 1162 */ 1163 CHECK(journal_fsync(j)); 1164 1165 if (j->state == JOURNAL_STATE_TRANSACTION) { 1166 isc_offset_t offset; 1167 offset = (j->x.pos[1].offset - j->x.pos[0].offset) - 1168 sizeof(journal_rawxhdr_t); 1169 /* 1170 * Update the transaction header. 1171 */ 1172 CHECK(journal_seek(j, j->x.pos[0].offset)); 1173 CHECK(journal_write_xhdr(j, offset, j->x.pos[0].serial, 1174 j->x.pos[1].serial)); 1175 } 1176 1177 /* 1178 * Update the journal header. 1179 */ 1180 if (JOURNAL_EMPTY(&j->header)) 1181 j->header.begin = j->x.pos[0]; 1182 j->header.end = j->x.pos[1]; 1183 journal_header_encode(&j->header, &rawheader); 1184 CHECK(journal_seek(j, 0)); 1185 CHECK(journal_write(j, &rawheader, sizeof(rawheader))); 1186 1187 /* 1188 * Update the index. 1189 */ 1190 index_add(j, &j->x.pos[0]); 1191 1192 /* 1193 * Convert the index into on-disk format and write 1194 * it to disk. 1195 */ 1196 CHECK(index_to_disk(j)); 1197 1198 /* 1199 * Commit the header to stable storage. 1200 */ 1201 CHECK(journal_fsync(j)); 1202 1203 /* 1204 * We no longer have a transaction open. 1205 */ 1206 j->state = JOURNAL_STATE_WRITE; 1207 1208 result = ISC_R_SUCCESS; 1209 1210 failure: 1211 return (result); 1212} 1213 1214isc_result_t 1215dns_journal_write_transaction(dns_journal_t *j, dns_diff_t *diff) { 1216 isc_result_t result; 1217 CHECK(dns_diff_sort(diff, ixfr_order)); 1218 CHECK(dns_journal_begin_transaction(j)); 1219 CHECK(dns_journal_writediff(j, diff)); 1220 CHECK(dns_journal_commit(j)); 1221 result = ISC_R_SUCCESS; 1222 failure: 1223 return (result); 1224} 1225 1226void 1227dns_journal_destroy(dns_journal_t **journalp) { 1228 dns_journal_t *j = *journalp; 1229 REQUIRE(DNS_JOURNAL_VALID(j)); 1230 1231 j->it.result = ISC_R_FAILURE; 1232 dns_name_invalidate(&j->it.name); 1233 dns_decompress_invalidate(&j->it.dctx); 1234 if (j->rawindex != NULL) 1235 isc_mem_put(j->mctx, j->rawindex, j->header.index_size * 1236 sizeof(journal_rawpos_t)); 1237 if (j->index != NULL) 1238 isc_mem_put(j->mctx, j->index, j->header.index_size * 1239 sizeof(journal_pos_t)); 1240 if (j->it.target.base != NULL) 1241 isc_mem_put(j->mctx, j->it.target.base, j->it.target.length); 1242 if (j->it.source.base != NULL) 1243 isc_mem_put(j->mctx, j->it.source.base, j->it.source.length); 1244 1245 if (j->fp != NULL) 1246 (void)isc_stdio_close(j->fp); 1247 j->magic = 0; 1248 isc_mem_putanddetach(&j->mctx, j, sizeof(*j)); 1249 *journalp = NULL; 1250} 1251 1252/* 1253 * Roll the open journal 'j' into the database 'db'. 1254 * A new database version will be created. 1255 */ 1256 1257/* XXX Share code with incoming IXFR? */ 1258 1259static isc_result_t 1260roll_forward(dns_journal_t *j, dns_db_t *db, unsigned int options, 1261 isc_uint32_t resign) 1262{ 1263 isc_buffer_t source; /* Transaction data from disk */ 1264 isc_buffer_t target; /* Ditto after _fromwire check */ 1265 isc_uint32_t db_serial; /* Database SOA serial */ 1266 isc_uint32_t end_serial; /* Last journal SOA serial */ 1267 isc_result_t result; 1268 dns_dbversion_t *ver = NULL; 1269 journal_pos_t pos; 1270 dns_diff_t diff; 1271 unsigned int n_soa = 0; 1272 unsigned int n_put = 0; 1273 dns_diffop_t op; 1274 1275 REQUIRE(DNS_JOURNAL_VALID(j)); 1276 REQUIRE(DNS_DB_VALID(db)); 1277 1278 dns_diff_init(j->mctx, &diff); 1279 diff.resign = resign; 1280 1281 /* 1282 * Set up empty initial buffers for unchecked and checked 1283 * wire format transaction data. They will be reallocated 1284 * later. 1285 */ 1286 isc_buffer_init(&source, NULL, 0); 1287 isc_buffer_init(&target, NULL, 0); 1288 1289 /* 1290 * Create the new database version. 1291 */ 1292 CHECK(dns_db_newversion(db, &ver)); 1293 1294 /* 1295 * Get the current database SOA serial number. 1296 */ 1297 CHECK(dns_db_getsoaserial(db, ver, &db_serial)); 1298 1299 /* 1300 * Locate a journal entry for the current database serial. 1301 */ 1302 CHECK(journal_find(j, db_serial, &pos)); 1303 /* 1304 * XXX do more drastic things, like marking zone stale, 1305 * if this fails? 1306 */ 1307 /* 1308 * XXXRTH The zone code should probably mark the zone as bad and 1309 * scream loudly into the log if this is a dynamic update 1310 * log reply that failed. 1311 */ 1312 1313 end_serial = dns_journal_last_serial(j); 1314 if (db_serial == end_serial) 1315 CHECK(DNS_R_UPTODATE); 1316 1317 CHECK(dns_journal_iter_init(j, db_serial, end_serial)); 1318 1319 for (result = dns_journal_first_rr(j); 1320 result == ISC_R_SUCCESS; 1321 result = dns_journal_next_rr(j)) 1322 { 1323 dns_name_t *name; 1324 isc_uint32_t ttl; 1325 dns_rdata_t *rdata; 1326 dns_difftuple_t *tuple = NULL; 1327 1328 name = NULL; 1329 rdata = NULL; 1330 dns_journal_current_rr(j, &name, &ttl, &rdata); 1331 1332 if (rdata->type == dns_rdatatype_soa) { 1333 n_soa++; 1334 if (n_soa == 2) 1335 db_serial = j->it.current_serial; 1336 } 1337 1338 if (n_soa == 3) 1339 n_soa = 1; 1340 if (n_soa == 0) { 1341 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1342 "%s: journal file corrupt: missing " 1343 "initial SOA", j->filename); 1344 FAIL(ISC_R_UNEXPECTED); 1345 } 1346 if ((options & DNS_JOURNALOPT_RESIGN) != 0) 1347 op = (n_soa == 1) ? DNS_DIFFOP_DELRESIGN : 1348 DNS_DIFFOP_ADDRESIGN; 1349 else 1350 op = (n_soa == 1) ? DNS_DIFFOP_DEL : DNS_DIFFOP_ADD; 1351 1352 CHECK(dns_difftuple_create(diff.mctx, op, name, ttl, rdata, 1353 &tuple)); 1354 dns_diff_append(&diff, &tuple); 1355 1356 if (++n_put > 100) { 1357 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), 1358 "%s: applying diff to database (%u)", 1359 j->filename, db_serial); 1360 (void)dns_diff_print(&diff, NULL); 1361 CHECK(dns_diff_apply(&diff, db, ver)); 1362 dns_diff_clear(&diff); 1363 n_put = 0; 1364 } 1365 } 1366 if (result == ISC_R_NOMORE) 1367 result = ISC_R_SUCCESS; 1368 CHECK(result); 1369 1370 if (n_put != 0) { 1371 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), 1372 "%s: applying final diff to database (%u)", 1373 j->filename, db_serial); 1374 (void)dns_diff_print(&diff, NULL); 1375 CHECK(dns_diff_apply(&diff, db, ver)); 1376 dns_diff_clear(&diff); 1377 } 1378 1379 failure: 1380 if (ver != NULL) 1381 dns_db_closeversion(db, &ver, result == ISC_R_SUCCESS ? 1382 ISC_TRUE : ISC_FALSE); 1383 1384 if (source.base != NULL) 1385 isc_mem_put(j->mctx, source.base, source.length); 1386 if (target.base != NULL) 1387 isc_mem_put(j->mctx, target.base, target.length); 1388 1389 dns_diff_clear(&diff); 1390 1391 return (result); 1392} 1393 1394isc_result_t 1395dns_journal_rollforward(isc_mem_t *mctx, dns_db_t *db, 1396 unsigned int options, const char *filename) 1397{ 1398 REQUIRE((options & DNS_JOURNALOPT_RESIGN) == 0); 1399 return (dns_journal_rollforward2(mctx, db, options, 0, filename)); 1400} 1401 1402isc_result_t 1403dns_journal_rollforward2(isc_mem_t *mctx, dns_db_t *db, unsigned int options, 1404 isc_uint32_t resign, const char *filename) 1405{ 1406 dns_journal_t *j; 1407 isc_result_t result; 1408 1409 REQUIRE(DNS_DB_VALID(db)); 1410 REQUIRE(filename != NULL); 1411 1412 j = NULL; 1413 result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j); 1414 if (result == ISC_R_NOTFOUND) { 1415 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), 1416 "no journal file, but that's OK"); 1417 return (DNS_R_NOJOURNAL); 1418 } 1419 if (result != ISC_R_SUCCESS) 1420 return (result); 1421 if (JOURNAL_EMPTY(&j->header)) 1422 result = DNS_R_UPTODATE; 1423 else 1424 result = roll_forward(j, db, options, resign); 1425 1426 dns_journal_destroy(&j); 1427 1428 return (result); 1429} 1430 1431isc_result_t 1432dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) { 1433 dns_journal_t *j; 1434 isc_buffer_t source; /* Transaction data from disk */ 1435 isc_buffer_t target; /* Ditto after _fromwire check */ 1436 isc_uint32_t start_serial; /* Database SOA serial */ 1437 isc_uint32_t end_serial; /* Last journal SOA serial */ 1438 isc_result_t result; 1439 dns_diff_t diff; 1440 unsigned int n_soa = 0; 1441 unsigned int n_put = 0; 1442 1443 REQUIRE(filename != NULL); 1444 1445 j = NULL; 1446 result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j); 1447 if (result == ISC_R_NOTFOUND) { 1448 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file"); 1449 return (DNS_R_NOJOURNAL); 1450 } 1451 1452 if (result != ISC_R_SUCCESS) { 1453 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1454 "journal open failure: %s: %s", 1455 isc_result_totext(result), filename); 1456 return (result); 1457 } 1458 1459 if (j->header.serialset) 1460 fprintf(file, "Source serial = %u\n", j->header.sourceserial); 1461 dns_diff_init(j->mctx, &diff); 1462 1463 /* 1464 * Set up empty initial buffers for unchecked and checked 1465 * wire format transaction data. They will be reallocated 1466 * later. 1467 */ 1468 isc_buffer_init(&source, NULL, 0); 1469 isc_buffer_init(&target, NULL, 0); 1470 1471 start_serial = dns_journal_first_serial(j); 1472 end_serial = dns_journal_last_serial(j); 1473 1474 CHECK(dns_journal_iter_init(j, start_serial, end_serial)); 1475 1476 for (result = dns_journal_first_rr(j); 1477 result == ISC_R_SUCCESS; 1478 result = dns_journal_next_rr(j)) 1479 { 1480 dns_name_t *name; 1481 isc_uint32_t ttl; 1482 dns_rdata_t *rdata; 1483 dns_difftuple_t *tuple = NULL; 1484 1485 name = NULL; 1486 rdata = NULL; 1487 dns_journal_current_rr(j, &name, &ttl, &rdata); 1488 1489 if (rdata->type == dns_rdatatype_soa) 1490 n_soa++; 1491 1492 if (n_soa == 3) 1493 n_soa = 1; 1494 if (n_soa == 0) { 1495 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1496 "%s: journal file corrupt: missing " 1497 "initial SOA", j->filename); 1498 FAIL(ISC_R_UNEXPECTED); 1499 } 1500 CHECK(dns_difftuple_create(diff.mctx, n_soa == 1 ? 1501 DNS_DIFFOP_DEL : DNS_DIFFOP_ADD, 1502 name, ttl, rdata, &tuple)); 1503 dns_diff_append(&diff, &tuple); 1504 1505 if (++n_put > 100) { 1506 result = dns_diff_print(&diff, file); 1507 dns_diff_clear(&diff); 1508 n_put = 0; 1509 if (result != ISC_R_SUCCESS) 1510 break; 1511 } 1512 } 1513 if (result == ISC_R_NOMORE) 1514 result = ISC_R_SUCCESS; 1515 CHECK(result); 1516 1517 if (n_put != 0) { 1518 result = dns_diff_print(&diff, file); 1519 dns_diff_clear(&diff); 1520 } 1521 goto cleanup; 1522 1523 failure: 1524 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1525 "%s: cannot print: journal file corrupt", j->filename); 1526 1527 cleanup: 1528 if (source.base != NULL) 1529 isc_mem_put(j->mctx, source.base, source.length); 1530 if (target.base != NULL) 1531 isc_mem_put(j->mctx, target.base, target.length); 1532 1533 dns_diff_clear(&diff); 1534 dns_journal_destroy(&j); 1535 1536 return (result); 1537} 1538 1539/**************************************************************************/ 1540/* 1541 * Miscellaneous accessors. 1542 */ 1543isc_uint32_t 1544dns_journal_first_serial(dns_journal_t *j) { 1545 return (j->header.begin.serial); 1546} 1547 1548isc_uint32_t 1549dns_journal_last_serial(dns_journal_t *j) { 1550 return (j->header.end.serial); 1551} 1552 1553void 1554dns_journal_set_sourceserial(dns_journal_t *j, isc_uint32_t sourceserial) { 1555 1556 REQUIRE(j->state == JOURNAL_STATE_WRITE || 1557 j->state == JOURNAL_STATE_INLINE || 1558 j->state == JOURNAL_STATE_TRANSACTION); 1559 1560 j->header.sourceserial = sourceserial; 1561 j->header.serialset = ISC_TRUE; 1562 if (j->state == JOURNAL_STATE_WRITE) 1563 j->state = JOURNAL_STATE_INLINE; 1564} 1565 1566isc_boolean_t 1567dns_journal_get_sourceserial(dns_journal_t *j, isc_uint32_t *sourceserial) { 1568 REQUIRE(sourceserial != NULL); 1569 1570 if (!j->header.serialset) 1571 return (ISC_FALSE); 1572 *sourceserial = j->header.sourceserial; 1573 return (ISC_TRUE); 1574} 1575 1576/**************************************************************************/ 1577/* 1578 * Iteration support. 1579 * 1580 * When serving an outgoing IXFR, we transmit a part the journal starting 1581 * at the serial number in the IXFR request and ending at the serial 1582 * number that is current when the IXFR request arrives. The ending 1583 * serial number is not necessarily at the end of the journal: 1584 * the journal may grow while the IXFR is in progress, but we stop 1585 * when we reach the serial number that was current when the IXFR started. 1586 */ 1587 1588static isc_result_t read_one_rr(dns_journal_t *j); 1589 1590/* 1591 * Make sure the buffer 'b' is has at least 'size' bytes 1592 * allocated, and clear it. 1593 * 1594 * Requires: 1595 * Either b->base is NULL, or it points to b->length bytes of memory 1596 * previously allocated by isc_mem_get(). 1597 */ 1598 1599static isc_result_t 1600size_buffer(isc_mem_t *mctx, isc_buffer_t *b, unsigned size) { 1601 if (b->length < size) { 1602 void *mem = isc_mem_get(mctx, size); 1603 if (mem == NULL) 1604 return (ISC_R_NOMEMORY); 1605 if (b->base != NULL) 1606 isc_mem_put(mctx, b->base, b->length); 1607 b->base = mem; 1608 b->length = size; 1609 } 1610 isc_buffer_clear(b); 1611 return (ISC_R_SUCCESS); 1612} 1613 1614isc_result_t 1615dns_journal_iter_init(dns_journal_t *j, 1616 isc_uint32_t begin_serial, isc_uint32_t end_serial) 1617{ 1618 isc_result_t result; 1619 1620 CHECK(journal_find(j, begin_serial, &j->it.bpos)); 1621 INSIST(j->it.bpos.serial == begin_serial); 1622 1623 CHECK(journal_find(j, end_serial, &j->it.epos)); 1624 INSIST(j->it.epos.serial == end_serial); 1625 1626 result = ISC_R_SUCCESS; 1627 failure: 1628 j->it.result = result; 1629 return (j->it.result); 1630} 1631 1632 1633isc_result_t 1634dns_journal_first_rr(dns_journal_t *j) { 1635 isc_result_t result; 1636 1637 /* 1638 * Seek to the beginning of the first transaction we are 1639 * interested in. 1640 */ 1641 CHECK(journal_seek(j, j->it.bpos.offset)); 1642 j->it.current_serial = j->it.bpos.serial; 1643 1644 j->it.xsize = 0; /* We have no transaction data yet... */ 1645 j->it.xpos = 0; /* ...and haven't used any of it. */ 1646 1647 return (read_one_rr(j)); 1648 1649 failure: 1650 return (result); 1651} 1652 1653static isc_result_t 1654read_one_rr(dns_journal_t *j) { 1655 isc_result_t result; 1656 1657 dns_rdatatype_t rdtype; 1658 dns_rdataclass_t rdclass; 1659 unsigned int rdlen; 1660 isc_uint32_t ttl; 1661 journal_xhdr_t xhdr; 1662 journal_rrhdr_t rrhdr; 1663 1664 INSIST(j->offset <= j->it.epos.offset); 1665 if (j->offset == j->it.epos.offset) 1666 return (ISC_R_NOMORE); 1667 if (j->it.xpos == j->it.xsize) { 1668 /* 1669 * We are at a transaction boundary. 1670 * Read another transaction header. 1671 */ 1672 CHECK(journal_read_xhdr(j, &xhdr)); 1673 if (xhdr.size == 0) { 1674 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1675 "%s: journal corrupt: empty transaction", 1676 j->filename); 1677 FAIL(ISC_R_UNEXPECTED); 1678 } 1679 if (xhdr.serial0 != j->it.current_serial) { 1680 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1681 "%s: journal file corrupt: " 1682 "expected serial %u, got %u", 1683 j->filename, 1684 j->it.current_serial, xhdr.serial0); 1685 FAIL(ISC_R_UNEXPECTED); 1686 } 1687 j->it.xsize = xhdr.size; 1688 j->it.xpos = 0; 1689 } 1690 /* 1691 * Read an RR. 1692 */ 1693 CHECK(journal_read_rrhdr(j, &rrhdr)); 1694 /* 1695 * Perform a sanity check on the journal RR size. 1696 * The smallest possible RR has a 1-byte owner name 1697 * and a 10-byte header. The largest possible 1698 * RR has 65535 bytes of data, a header, and a maximum- 1699 * size owner name, well below 70 k total. 1700 */ 1701 if (rrhdr.size < 1+10 || rrhdr.size > 70000) { 1702 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1703 "%s: journal corrupt: impossible RR size " 1704 "(%d bytes)", j->filename, rrhdr.size); 1705 FAIL(ISC_R_UNEXPECTED); 1706 } 1707 1708 CHECK(size_buffer(j->mctx, &j->it.source, rrhdr.size)); 1709 CHECK(journal_read(j, j->it.source.base, rrhdr.size)); 1710 isc_buffer_add(&j->it.source, rrhdr.size); 1711 1712 /* 1713 * The target buffer is made the same size 1714 * as the source buffer, with the assumption that when 1715 * no compression in present, the output of dns_*_fromwire() 1716 * is no larger than the input. 1717 */ 1718 CHECK(size_buffer(j->mctx, &j->it.target, rrhdr.size)); 1719 1720 /* 1721 * Parse the owner name. We don't know where it 1722 * ends yet, so we make the entire "remaining" 1723 * part of the buffer "active". 1724 */ 1725 isc_buffer_setactive(&j->it.source, 1726 j->it.source.used - j->it.source.current); 1727 CHECK(dns_name_fromwire(&j->it.name, &j->it.source, 1728 &j->it.dctx, 0, &j->it.target)); 1729 1730 /* 1731 * Check that the RR header is there, and parse it. 1732 */ 1733 if (isc_buffer_remaininglength(&j->it.source) < 10) 1734 FAIL(DNS_R_FORMERR); 1735 1736 rdtype = isc_buffer_getuint16(&j->it.source); 1737 rdclass = isc_buffer_getuint16(&j->it.source); 1738 ttl = isc_buffer_getuint32(&j->it.source); 1739 rdlen = isc_buffer_getuint16(&j->it.source); 1740 1741 /* 1742 * Parse the rdata. 1743 */ 1744 if (isc_buffer_remaininglength(&j->it.source) != rdlen) 1745 FAIL(DNS_R_FORMERR); 1746 isc_buffer_setactive(&j->it.source, rdlen); 1747 dns_rdata_reset(&j->it.rdata); 1748 CHECK(dns_rdata_fromwire(&j->it.rdata, rdclass, 1749 rdtype, &j->it.source, &j->it.dctx, 1750 0, &j->it.target)); 1751 j->it.ttl = ttl; 1752 1753 j->it.xpos += sizeof(journal_rawrrhdr_t) + rrhdr.size; 1754 if (rdtype == dns_rdatatype_soa) { 1755 /* XXX could do additional consistency checks here */ 1756 j->it.current_serial = dns_soa_getserial(&j->it.rdata); 1757 } 1758 1759 result = ISC_R_SUCCESS; 1760 1761 failure: 1762 j->it.result = result; 1763 return (result); 1764} 1765 1766isc_result_t 1767dns_journal_next_rr(dns_journal_t *j) { 1768 j->it.result = read_one_rr(j); 1769 return (j->it.result); 1770} 1771 1772void 1773dns_journal_current_rr(dns_journal_t *j, dns_name_t **name, isc_uint32_t *ttl, 1774 dns_rdata_t **rdata) 1775{ 1776 REQUIRE(j->it.result == ISC_R_SUCCESS); 1777 *name = &j->it.name; 1778 *ttl = j->it.ttl; 1779 *rdata = &j->it.rdata; 1780} 1781 1782/**************************************************************************/ 1783/* 1784 * Generating diffs from databases 1785 */ 1786 1787/* 1788 * Construct a diff containing all the RRs at the current name of the 1789 * database iterator 'dbit' in database 'db', version 'ver'. 1790 * Set '*name' to the current name, and append the diff to 'diff'. 1791 * All new tuples will have the operation 'op'. 1792 * 1793 * Requires: 'name' must have buffer large enough to hold the name. 1794 * Typically, a dns_fixedname_t would be used. 1795 */ 1796static isc_result_t 1797get_name_diff(dns_db_t *db, dns_dbversion_t *ver, isc_stdtime_t now, 1798 dns_dbiterator_t *dbit, dns_name_t *name, dns_diffop_t op, 1799 dns_diff_t *diff) 1800{ 1801 isc_result_t result; 1802 dns_dbnode_t *node = NULL; 1803 dns_rdatasetiter_t *rdsiter = NULL; 1804 dns_difftuple_t *tuple = NULL; 1805 1806 result = dns_dbiterator_current(dbit, &node, name); 1807 if (result != ISC_R_SUCCESS) 1808 return (result); 1809 1810 result = dns_db_allrdatasets(db, node, ver, now, &rdsiter); 1811 if (result != ISC_R_SUCCESS) 1812 goto cleanup_node; 1813 1814 for (result = dns_rdatasetiter_first(rdsiter); 1815 result == ISC_R_SUCCESS; 1816 result = dns_rdatasetiter_next(rdsiter)) 1817 { 1818 dns_rdataset_t rdataset; 1819 1820 dns_rdataset_init(&rdataset); 1821 dns_rdatasetiter_current(rdsiter, &rdataset); 1822 1823 for (result = dns_rdataset_first(&rdataset); 1824 result == ISC_R_SUCCESS; 1825 result = dns_rdataset_next(&rdataset)) 1826 { 1827 dns_rdata_t rdata = DNS_RDATA_INIT; 1828 dns_rdataset_current(&rdataset, &rdata); 1829 result = dns_difftuple_create(diff->mctx, op, name, 1830 rdataset.ttl, &rdata, 1831 &tuple); 1832 if (result != ISC_R_SUCCESS) { 1833 dns_rdataset_disassociate(&rdataset); 1834 goto cleanup_iterator; 1835 } 1836 dns_diff_append(diff, &tuple); 1837 } 1838 dns_rdataset_disassociate(&rdataset); 1839 if (result != ISC_R_NOMORE) 1840 goto cleanup_iterator; 1841 } 1842 if (result != ISC_R_NOMORE) 1843 goto cleanup_iterator; 1844 1845 result = ISC_R_SUCCESS; 1846 1847 cleanup_iterator: 1848 dns_rdatasetiter_destroy(&rdsiter); 1849 1850 cleanup_node: 1851 dns_db_detachnode(db, &node); 1852 1853 return (result); 1854} 1855 1856/* 1857 * Comparison function for use by dns_diff_subtract when sorting 1858 * the diffs to be subtracted. The sort keys are the rdata type 1859 * and the rdata itself. The owner name is ignored, because 1860 * it is known to be the same for all tuples. 1861 */ 1862static int 1863rdata_order(const void *av, const void *bv) { 1864 dns_difftuple_t const * const *ap = av; 1865 dns_difftuple_t const * const *bp = bv; 1866 dns_difftuple_t const *a = *ap; 1867 dns_difftuple_t const *b = *bp; 1868 int r; 1869 r = (b->rdata.type - a->rdata.type); 1870 if (r != 0) 1871 return (r); 1872 r = dns_rdata_compare(&a->rdata, &b->rdata); 1873 return (r); 1874} 1875 1876static isc_result_t 1877dns_diff_subtract(dns_diff_t diff[2], dns_diff_t *r) { 1878 isc_result_t result; 1879 dns_difftuple_t *p[2]; 1880 int i, t; 1881 isc_boolean_t append; 1882 1883 CHECK(dns_diff_sort(&diff[0], rdata_order)); 1884 CHECK(dns_diff_sort(&diff[1], rdata_order)); 1885 1886 for (;;) { 1887 p[0] = ISC_LIST_HEAD(diff[0].tuples); 1888 p[1] = ISC_LIST_HEAD(diff[1].tuples); 1889 if (p[0] == NULL && p[1] == NULL) 1890 break; 1891 1892 for (i = 0; i < 2; i++) 1893 if (p[!i] == NULL) { 1894 ISC_LIST_UNLINK(diff[i].tuples, p[i], link); 1895 ISC_LIST_APPEND(r->tuples, p[i], link); 1896 goto next; 1897 } 1898 t = rdata_order(&p[0], &p[1]); 1899 if (t < 0) { 1900 ISC_LIST_UNLINK(diff[0].tuples, p[0], link); 1901 ISC_LIST_APPEND(r->tuples, p[0], link); 1902 goto next; 1903 } 1904 if (t > 0) { 1905 ISC_LIST_UNLINK(diff[1].tuples, p[1], link); 1906 ISC_LIST_APPEND(r->tuples, p[1], link); 1907 goto next; 1908 } 1909 INSIST(t == 0); 1910 /* 1911 * Identical RRs in both databases; skip them both 1912 * if the ttl differs. 1913 */ 1914 append = ISC_TF(p[0]->ttl != p[1]->ttl); 1915 for (i = 0; i < 2; i++) { 1916 ISC_LIST_UNLINK(diff[i].tuples, p[i], link); 1917 if (append) { 1918 ISC_LIST_APPEND(r->tuples, p[i], link); 1919 } else { 1920 dns_difftuple_free(&p[i]); 1921 } 1922 } 1923 next: ; 1924 } 1925 result = ISC_R_SUCCESS; 1926 failure: 1927 return (result); 1928} 1929 1930static isc_result_t 1931diff_namespace(dns_db_t *dba, dns_dbversion_t *dbvera, 1932 dns_db_t *dbb, dns_dbversion_t *dbverb, 1933 unsigned int options, dns_diff_t *resultdiff) 1934{ 1935 dns_db_t *db[2]; 1936 dns_dbversion_t *ver[2]; 1937 dns_dbiterator_t *dbit[2] = { NULL, NULL }; 1938 isc_boolean_t have[2] = { ISC_FALSE, ISC_FALSE }; 1939 dns_fixedname_t fixname[2]; 1940 isc_result_t result, itresult[2]; 1941 dns_diff_t diff[2]; 1942 int i, t; 1943 1944 db[0] = dba, db[1] = dbb; 1945 ver[0] = dbvera, ver[1] = dbverb; 1946 1947 dns_diff_init(resultdiff->mctx, &diff[0]); 1948 dns_diff_init(resultdiff->mctx, &diff[1]); 1949 1950 dns_fixedname_init(&fixname[0]); 1951 dns_fixedname_init(&fixname[1]); 1952 1953 result = dns_db_createiterator(db[0], options, &dbit[0]); 1954 if (result != ISC_R_SUCCESS) 1955 return (result); 1956 result = dns_db_createiterator(db[1], options, &dbit[1]); 1957 if (result != ISC_R_SUCCESS) 1958 goto cleanup_iterator; 1959 1960 itresult[0] = dns_dbiterator_first(dbit[0]); 1961 itresult[1] = dns_dbiterator_first(dbit[1]); 1962 1963 for (;;) { 1964 for (i = 0; i < 2; i++) { 1965 if (! have[i] && itresult[i] == ISC_R_SUCCESS) { 1966 CHECK(get_name_diff(db[i], ver[i], 0, dbit[i], 1967 dns_fixedname_name(&fixname[i]), 1968 i == 0 ? 1969 DNS_DIFFOP_ADD : 1970 DNS_DIFFOP_DEL, 1971 &diff[i])); 1972 itresult[i] = dns_dbiterator_next(dbit[i]); 1973 have[i] = ISC_TRUE; 1974 } 1975 } 1976 1977 if (! have[0] && ! have[1]) { 1978 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1979 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1980 break; 1981 } 1982 1983 for (i = 0; i < 2; i++) { 1984 if (! have[!i]) { 1985 ISC_LIST_APPENDLIST(resultdiff->tuples, 1986 diff[i].tuples, link); 1987 INSIST(ISC_LIST_EMPTY(diff[i].tuples)); 1988 have[i] = ISC_FALSE; 1989 goto next; 1990 } 1991 } 1992 1993 t = dns_name_compare(dns_fixedname_name(&fixname[0]), 1994 dns_fixedname_name(&fixname[1])); 1995 if (t < 0) { 1996 ISC_LIST_APPENDLIST(resultdiff->tuples, 1997 diff[0].tuples, link); 1998 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1999 have[0] = ISC_FALSE; 2000 continue; 2001 } 2002 if (t > 0) { 2003 ISC_LIST_APPENDLIST(resultdiff->tuples, 2004 diff[1].tuples, link); 2005 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 2006 have[1] = ISC_FALSE; 2007 continue; 2008 } 2009 INSIST(t == 0); 2010 CHECK(dns_diff_subtract(diff, resultdiff)); 2011 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 2012 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 2013 have[0] = have[1] = ISC_FALSE; 2014 next: ; 2015 } 2016 if (itresult[0] != ISC_R_NOMORE) 2017 FAIL(itresult[0]); 2018 if (itresult[1] != ISC_R_NOMORE) 2019 FAIL(itresult[1]); 2020 2021 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 2022 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 2023 2024 failure: 2025 dns_dbiterator_destroy(&dbit[1]); 2026 2027 cleanup_iterator: 2028 dns_dbiterator_destroy(&dbit[0]); 2029 dns_diff_clear(&diff[0]); 2030 dns_diff_clear(&diff[1]); 2031 return (result); 2032} 2033 2034/* 2035 * Compare the databases 'dba' and 'dbb' and generate a journal 2036 * entry containing the changes to make 'dba' from 'dbb' (note 2037 * the order). This journal entry will consist of a single, 2038 * possibly very large transaction. 2039 */ 2040isc_result_t 2041dns_db_diff(isc_mem_t *mctx, dns_db_t *dba, dns_dbversion_t *dbvera, 2042 dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename) 2043{ 2044 isc_result_t result; 2045 dns_diff_t diff; 2046 2047 dns_diff_init(mctx, &diff); 2048 2049 result = dns_db_diffx(&diff, dba, dbvera, dbb, dbverb, filename); 2050 2051 dns_diff_clear(&diff); 2052 2053 return (result); 2054} 2055 2056isc_result_t 2057dns_db_diffx(dns_diff_t *diff, dns_db_t *dba, dns_dbversion_t *dbvera, 2058 dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename) 2059{ 2060 isc_result_t result; 2061 dns_journal_t *journal = NULL; 2062 2063 if (filename != NULL) { 2064 result = dns_journal_open(diff->mctx, filename, 2065 DNS_JOURNAL_CREATE, &journal); 2066 if (result != ISC_R_SUCCESS) 2067 return (result); 2068 } 2069 2070 CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NONSEC3, diff)); 2071 CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NSEC3ONLY, diff)); 2072 2073 if (journal != NULL) { 2074 if (ISC_LIST_EMPTY(diff->tuples)) 2075 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no changes"); 2076 else 2077 CHECK(dns_journal_write_transaction(journal, diff)); 2078 } 2079 2080 failure: 2081 if (journal != NULL) 2082 dns_journal_destroy(&journal); 2083 return (result); 2084} 2085 2086isc_result_t 2087dns_journal_compact(isc_mem_t *mctx, char *filename, isc_uint32_t serial, 2088 isc_uint32_t target_size) 2089{ 2090 unsigned int i; 2091 journal_pos_t best_guess; 2092 journal_pos_t current_pos; 2093 dns_journal_t *j = NULL; 2094 dns_journal_t *new = NULL; 2095 journal_rawheader_t rawheader; 2096 unsigned int copy_length; 2097 int namelen; 2098 char *buf = NULL; 2099 unsigned int size = 0; 2100 isc_result_t result; 2101 unsigned int indexend; 2102 char newname[1024]; 2103 char backup[1024]; 2104 isc_boolean_t is_backup = ISC_FALSE; 2105 2106 namelen = strlen(filename); 2107 if (namelen > 4 && strcmp(filename + namelen - 4, ".jnl") == 0) 2108 namelen -= 4; 2109 2110 result = isc_string_printf(newname, sizeof(newname), "%.*s.jnw", 2111 namelen, filename); 2112 if (result != ISC_R_SUCCESS) 2113 return (result); 2114 2115 result = isc_string_printf(backup, sizeof(backup), "%.*s.jbk", 2116 namelen, filename); 2117 if (result != ISC_R_SUCCESS) 2118 return (result); 2119 2120 result = journal_open(mctx, filename, ISC_FALSE, ISC_FALSE, &j); 2121 if (result == ISC_R_NOTFOUND) { 2122 is_backup = ISC_TRUE; 2123 result = journal_open(mctx, backup, ISC_FALSE, ISC_FALSE, &j); 2124 } 2125 if (result != ISC_R_SUCCESS) 2126 return (result); 2127 2128 if (JOURNAL_EMPTY(&j->header)) { 2129 dns_journal_destroy(&j); 2130 return (ISC_R_SUCCESS); 2131 } 2132 2133 if (DNS_SERIAL_GT(j->header.begin.serial, serial) || 2134 DNS_SERIAL_GT(serial, j->header.end.serial)) { 2135 dns_journal_destroy(&j); 2136 return (ISC_R_RANGE); 2137 } 2138 2139 /* 2140 * Cope with very small target sizes. 2141 */ 2142 indexend = sizeof(journal_rawheader_t) + 2143 j->header.index_size * sizeof(journal_rawpos_t); 2144 if (target_size < indexend * 2) 2145 target_size = target_size/2 + indexend; 2146 2147 /* 2148 * See if there is any work to do. 2149 */ 2150 if ((isc_uint32_t) j->header.end.offset < target_size) { 2151 dns_journal_destroy(&j); 2152 return (ISC_R_SUCCESS); 2153 } 2154 2155 CHECK(journal_open(mctx, newname, ISC_TRUE, ISC_TRUE, &new)); 2156 2157 /* 2158 * Remove overhead so space test below can succeed. 2159 */ 2160 if (target_size >= indexend) 2161 target_size -= indexend; 2162 2163 /* 2164 * Find if we can create enough free space. 2165 */ 2166 best_guess = j->header.begin; 2167 for (i = 0; i < j->header.index_size; i++) { 2168 if (POS_VALID(j->index[i]) && 2169 DNS_SERIAL_GE(serial, j->index[i].serial) && 2170 ((isc_uint32_t)(j->header.end.offset - j->index[i].offset) 2171 >= target_size / 2) && 2172 j->index[i].offset > best_guess.offset) 2173 best_guess = j->index[i]; 2174 } 2175 2176 current_pos = best_guess; 2177 while (current_pos.serial != serial) { 2178 CHECK(journal_next(j, ¤t_pos)); 2179 if (current_pos.serial == j->header.end.serial) 2180 break; 2181 2182 if (DNS_SERIAL_GE(serial, current_pos.serial) && 2183 ((isc_uint32_t)(j->header.end.offset - current_pos.offset) 2184 >= (target_size / 2)) && 2185 current_pos.offset > best_guess.offset) 2186 best_guess = current_pos; 2187 else 2188 break; 2189 } 2190 2191 INSIST(best_guess.serial != j->header.end.serial); 2192 if (best_guess.serial != serial) 2193 CHECK(journal_next(j, &best_guess)); 2194 2195 /* 2196 * We should now be roughly half target_size provided 2197 * we did not reach 'serial'. If not we will just copy 2198 * all uncommitted deltas regardless of the size. 2199 */ 2200 copy_length = j->header.end.offset - best_guess.offset; 2201 2202 if (copy_length != 0) { 2203 /* 2204 * Copy best_guess to end into space just freed. 2205 */ 2206 size = 64*1024; 2207 if (copy_length < size) 2208 size = copy_length; 2209 buf = isc_mem_get(mctx, size); 2210 if (buf == NULL) { 2211 result = ISC_R_NOMEMORY; 2212 goto failure; 2213 } 2214 2215 CHECK(journal_seek(j, best_guess.offset)); 2216 CHECK(journal_seek(new, indexend)); 2217 for (i = 0; i < copy_length; i += size) { 2218 unsigned int len = (copy_length - i) > size ? size : 2219 (copy_length - i); 2220 CHECK(journal_read(j, buf, len)); 2221 CHECK(journal_write(new, buf, len)); 2222 } 2223 2224 CHECK(journal_fsync(new)); 2225 2226 /* 2227 * Compute new header. 2228 */ 2229 new->header.begin.serial = best_guess.serial; 2230 new->header.begin.offset = indexend; 2231 new->header.end.serial = j->header.end.serial; 2232 new->header.end.offset = indexend + copy_length; 2233 new->header.sourceserial = j->header.sourceserial; 2234 new->header.serialset = j->header.serialset; 2235 2236 /* 2237 * Update the journal header. 2238 */ 2239 journal_header_encode(&new->header, &rawheader); 2240 CHECK(journal_seek(new, 0)); 2241 CHECK(journal_write(new, &rawheader, sizeof(rawheader))); 2242 CHECK(journal_fsync(new)); 2243 2244 /* 2245 * Build new index. 2246 */ 2247 current_pos = new->header.begin; 2248 while (current_pos.serial != new->header.end.serial) { 2249 index_add(new, ¤t_pos); 2250 CHECK(journal_next(new, ¤t_pos)); 2251 } 2252 2253 /* 2254 * Write index. 2255 */ 2256 CHECK(index_to_disk(new)); 2257 CHECK(journal_fsync(new)); 2258 2259 indexend = new->header.end.offset; 2260 POST(indexend); 2261 } 2262 2263 /* 2264 * Close both journals before trying to rename files (this is 2265 * necessary on WIN32). 2266 */ 2267 dns_journal_destroy(&j); 2268 dns_journal_destroy(&new); 2269 2270 /* 2271 * With a UFS file system this should just succeed and be atomic. 2272 * Any IXFR outs will just continue and the old journal will be 2273 * removed on final close. 2274 * 2275 * With MSDOS / NTFS we need to do a two stage rename, triggered 2276 * by EEXIST. (If any IXFR's are running in other threads, however, 2277 * this will fail, and the journal will not be compacted. But 2278 * if so, hopefully they'll be finished by the next time we 2279 * compact.) 2280 */ 2281 if (rename(newname, filename) == -1) { 2282 if (errno == EEXIST && !is_backup) { 2283 result = isc_file_remove(backup); 2284 if (result != ISC_R_SUCCESS && 2285 result != ISC_R_FILENOTFOUND) 2286 goto failure; 2287 if (rename(filename, backup) == -1) 2288 goto maperrno; 2289 if (rename(newname, filename) == -1) 2290 goto maperrno; 2291 (void)isc_file_remove(backup); 2292 } else { 2293 maperrno: 2294 result = ISC_R_FAILURE; 2295 goto failure; 2296 } 2297 } 2298 2299 result = ISC_R_SUCCESS; 2300 2301 failure: 2302 (void)isc_file_remove(newname); 2303 if (buf != NULL) 2304 isc_mem_put(mctx, buf, size); 2305 if (j != NULL) 2306 dns_journal_destroy(&j); 2307 if (new != NULL) 2308 dns_journal_destroy(&new); 2309 return (result); 2310} 2311 2312static isc_result_t 2313index_to_disk(dns_journal_t *j) { 2314 isc_result_t result = ISC_R_SUCCESS; 2315 2316 if (j->header.index_size != 0) { 2317 unsigned int i; 2318 unsigned char *p; 2319 unsigned int rawbytes; 2320 2321 rawbytes = j->header.index_size * sizeof(journal_rawpos_t); 2322 2323 p = j->rawindex; 2324 for (i = 0; i < j->header.index_size; i++) { 2325 encode_uint32(j->index[i].serial, p); 2326 p += 4; 2327 encode_uint32(j->index[i].offset, p); 2328 p += 4; 2329 } 2330 INSIST(p == j->rawindex + rawbytes); 2331 2332 CHECK(journal_seek(j, sizeof(journal_rawheader_t))); 2333 CHECK(journal_write(j, j->rawindex, rawbytes)); 2334 } 2335failure: 2336 return (result); 2337} 2338