journal.c revision 153816
1/* 2 * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") 3 * Copyright (C) 1999-2002 Internet Software Consortium. 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 * PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18/* $Id: journal.c,v 1.77.2.1.10.13 2005/11/03 23:08:41 marka Exp $ */ 19 20#include <config.h> 21 22#include <stdlib.h> 23#include <unistd.h> 24 25#include <isc/file.h> 26#include <isc/mem.h> 27#include <isc/stdio.h> 28#include <isc/string.h> 29#include <isc/util.h> 30 31#include <dns/compress.h> 32#include <dns/db.h> 33#include <dns/dbiterator.h> 34#include <dns/diff.h> 35#include <dns/fixedname.h> 36#include <dns/journal.h> 37#include <dns/log.h> 38#include <dns/rdataset.h> 39#include <dns/rdatasetiter.h> 40#include <dns/result.h> 41#include <dns/soa.h> 42 43/* 44 * When true, accept IXFR difference sequences where the 45 * SOA serial number does not change (BIND 8 sends such 46 * sequences). 47 */ 48static isc_boolean_t bind8_compat = ISC_TRUE; /* XXX config */ 49 50/**************************************************************************/ 51/* 52 * Miscellaneous utilities. 53 */ 54 55#define JOURNAL_COMMON_LOGARGS \ 56 dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL 57 58#define JOURNAL_DEBUG_LOGARGS(n) \ 59 JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n) 60 61/* 62 * It would be non-sensical (or at least obtuse) to use FAIL() with an 63 * ISC_R_SUCCESS code, but the test is there to keep the Solaris compiler 64 * from complaining about "end-of-loop code not reached". 65 */ 66#define FAIL(code) \ 67 do { result = (code); \ 68 if (result != ISC_R_SUCCESS) goto failure; \ 69 } while (0) 70 71#define CHECK(op) \ 72 do { result = (op); \ 73 if (result != ISC_R_SUCCESS) goto failure; \ 74 } while (0) 75 76static isc_result_t index_to_disk(dns_journal_t *); 77 78static inline isc_uint32_t 79decode_uint32(unsigned char *p) { 80 return ((p[0] << 24) + 81 (p[1] << 16) + 82 (p[2] << 8) + 83 (p[3] << 0)); 84} 85 86static inline void 87encode_uint32(isc_uint32_t val, unsigned char *p) { 88 p[0] = (isc_uint8_t)(val >> 24); 89 p[1] = (isc_uint8_t)(val >> 16); 90 p[2] = (isc_uint8_t)(val >> 8); 91 p[3] = (isc_uint8_t)(val >> 0); 92} 93 94isc_result_t 95dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx, 96 dns_diffop_t op, dns_difftuple_t **tp) 97{ 98 isc_result_t result; 99 dns_dbnode_t *node; 100 dns_rdataset_t rdataset; 101 dns_rdata_t rdata = DNS_RDATA_INIT; 102 dns_name_t *zonename; 103 104 zonename = dns_db_origin(db); 105 106 node = NULL; 107 result = dns_db_findnode(db, zonename, ISC_FALSE, &node); 108 if (result != ISC_R_SUCCESS) 109 goto nonode; 110 111 dns_rdataset_init(&rdataset); 112 result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0, 113 (isc_stdtime_t)0, &rdataset, NULL); 114 if (result != ISC_R_SUCCESS) 115 goto freenode; 116 117 result = dns_rdataset_first(&rdataset); 118 if (result != ISC_R_SUCCESS) 119 goto freenode; 120 121 dns_rdataset_current(&rdataset, &rdata); 122 123 result = dns_difftuple_create(mctx, op, zonename, rdataset.ttl, 124 &rdata, tp); 125 126 dns_rdataset_disassociate(&rdataset); 127 dns_db_detachnode(db, &node); 128 return (ISC_R_SUCCESS); 129 130 freenode: 131 dns_db_detachnode(db, &node); 132 nonode: 133 UNEXPECTED_ERROR(__FILE__, __LINE__, "missing SOA"); 134 return (result); 135} 136 137/**************************************************************************/ 138/* 139 * Journalling. 140 */ 141 142/* 143 * A journal file consists of 144 * 145 * - A fixed-size header of type journal_rawheader_t. 146 * 147 * - The index. This is an unordered array of index entries 148 * of type journal_rawpos_t giving the locations 149 * of some arbitrary subset of the journal's addressable 150 * transactions. The index entries are used as hints to 151 * speed up the process of locating a transaction with a given 152 * serial number. Unused index entries have an "offset" 153 * field of zero. The size of the index can vary between 154 * journal files, but does not change during the lifetime 155 * of a file. The size can be zero. 156 * 157 * - The journal data. This consists of one or more transactions. 158 * Each transaction begins with a transaction header of type 159 * journal_rawxhdr_t. The transaction header is followed by a 160 * sequence of RRs, similar in structure to an IXFR difference 161 * sequence (RFC1995). That is, the pre-transaction SOA, 162 * zero or more other deleted RRs, the post-transaction SOA, 163 * and zero or more other added RRs. Unlike in IXFR, each RR 164 * is prefixed with a 32-bit length. 165 * 166 * The journal data part grows as new transactions are 167 * appended to the file. Only those transactions 168 * whose serial number is current-(2^31-1) to current 169 * are considered "addressable" and may be pointed 170 * to from the header or index. They may be preceded 171 * by old transactions that are no longer addressable, 172 * and they may be followed by transactions that were 173 * appended to the journal but never committed by updating 174 * the "end" position in the header. The latter will 175 * be overwritten when new transactions are added. 176 */ 177 178/* 179 * On-disk representation of a "pointer" to a journal entry. 180 * These are used in the journal header to locate the beginning 181 * and end of the journal, and in the journal index to locate 182 * other transactions. 183 */ 184typedef struct { 185 unsigned char serial[4]; /* SOA serial before update. */ 186 /* 187 * XXXRTH Should offset be 8 bytes? 188 * XXXDCL ... probably, since isc_offset_t is 8 bytes on many OSs. 189 * XXXAG ... but we will not be able to seek >2G anyway on many 190 * platforms as long as we are using fseek() rather 191 * than lseek(). 192 */ 193 unsigned char offset[4]; /* Offset from beginning of file. */ 194} journal_rawpos_t; 195 196/* 197 * The on-disk representation of the journal header. 198 * All numbers are stored in big-endian order. 199 */ 200 201/* 202 * The header is of a fixed size, with some spare room for future 203 * extensions. 204 */ 205#define JOURNAL_HEADER_SIZE 64 /* Bytes. */ 206 207typedef union { 208 struct { 209 /* File format version ID. */ 210 unsigned char format[16]; 211 /* Position of the first addressable transaction */ 212 journal_rawpos_t begin; 213 /* Position of the next (yet nonexistent) transaction. */ 214 journal_rawpos_t end; 215 /* Number of index entries following the header. */ 216 unsigned char index_size[4]; 217 } h; 218 /* Pad the header to a fixed size. */ 219 unsigned char pad[JOURNAL_HEADER_SIZE]; 220} journal_rawheader_t; 221 222/* 223 * The on-disk representation of the transaction header. 224 * There is one of these at the beginning of each transaction. 225 */ 226typedef struct { 227 unsigned char size[4]; /* In bytes, excluding header. */ 228 unsigned char serial0[4]; /* SOA serial before update. */ 229 unsigned char serial1[4]; /* SOA serial after update. */ 230} journal_rawxhdr_t; 231 232/* 233 * The on-disk representation of the RR header. 234 * There is one of these at the beginning of each RR. 235 */ 236typedef struct { 237 unsigned char size[4]; /* In bytes, excluding header. */ 238} journal_rawrrhdr_t; 239 240/* 241 * The in-core representation of the journal header. 242 */ 243typedef struct { 244 isc_uint32_t serial; 245 isc_offset_t offset; 246} journal_pos_t; 247 248#define POS_VALID(pos) ((pos).offset != 0) 249#define POS_INVALIDATE(pos) ((pos).offset = 0, (pos).serial = 0) 250 251typedef struct { 252 unsigned char format[16]; 253 journal_pos_t begin; 254 journal_pos_t end; 255 isc_uint32_t index_size; 256} journal_header_t; 257 258/* 259 * The in-core representation of the transaction header. 260 */ 261 262typedef struct { 263 isc_uint32_t size; 264 isc_uint32_t serial0; 265 isc_uint32_t serial1; 266} journal_xhdr_t; 267 268/* 269 * The in-core representation of the RR header. 270 */ 271typedef struct { 272 isc_uint32_t size; 273} journal_rrhdr_t; 274 275 276/* 277 * Initial contents to store in the header of a newly created 278 * journal file. 279 * 280 * The header starts with the magic string ";BIND LOG V9\n" 281 * to identify the file as a BIND 9 journal file. An ASCII 282 * identification string is used rather than a binary magic 283 * number to be consistent with BIND 8 (BIND 8 journal files 284 * are ASCII text files). 285 */ 286 287static journal_header_t 288initial_journal_header = { ";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0 }; 289 290#define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset) 291 292typedef enum { 293 JOURNAL_STATE_INVALID, 294 JOURNAL_STATE_READ, 295 JOURNAL_STATE_WRITE, 296 JOURNAL_STATE_TRANSACTION 297} journal_state_t; 298 299struct dns_journal { 300 unsigned int magic; /* JOUR */ 301 isc_mem_t *mctx; /* Memory context */ 302 journal_state_t state; 303 const char *filename; /* Journal file name */ 304 FILE * fp; /* File handle */ 305 isc_offset_t offset; /* Current file offset */ 306 journal_header_t header; /* In-core journal header */ 307 unsigned char *rawindex; /* In-core buffer for journal 308 index in on-disk format */ 309 journal_pos_t *index; /* In-core journal index */ 310 311 /* Current transaction state (when writing). */ 312 struct { 313 unsigned int n_soa; /* Number of SOAs seen */ 314 journal_pos_t pos[2]; /* Begin/end position */ 315 } x; 316 317 /* Iteration state (when reading). */ 318 struct { 319 /* These define the part of the journal we iterate over. */ 320 journal_pos_t bpos; /* Position before first, */ 321 journal_pos_t epos; /* and after last 322 transaction */ 323 /* The rest is iterator state. */ 324 isc_uint32_t current_serial; /* Current SOA serial */ 325 isc_buffer_t source; /* Data from disk */ 326 isc_buffer_t target; /* Data from _fromwire check */ 327 dns_decompress_t dctx; /* Dummy decompression ctx */ 328 dns_name_t name; /* Current domain name */ 329 dns_rdata_t rdata; /* Current rdata */ 330 isc_uint32_t ttl; /* Current TTL */ 331 unsigned int xsize; /* Size of transaction data */ 332 unsigned int xpos; /* Current position in it */ 333 isc_result_t result; /* Result of last call */ 334 } it; 335}; 336 337#define DNS_JOURNAL_MAGIC ISC_MAGIC('J', 'O', 'U', 'R') 338#define DNS_JOURNAL_VALID(t) ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC) 339 340static void 341journal_pos_decode(journal_rawpos_t *raw, journal_pos_t *cooked) { 342 cooked->serial = decode_uint32(raw->serial); 343 cooked->offset = decode_uint32(raw->offset); 344} 345 346static void 347journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) { 348 encode_uint32(cooked->serial, raw->serial); 349 encode_uint32(cooked->offset, raw->offset); 350} 351 352static void 353journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) { 354 INSIST(sizeof(cooked->format) == sizeof(raw->h.format)); 355 memcpy(cooked->format, raw->h.format, sizeof(cooked->format)); 356 journal_pos_decode(&raw->h.begin, &cooked->begin); 357 journal_pos_decode(&raw->h.end, &cooked->end); 358 cooked->index_size = decode_uint32(raw->h.index_size); 359} 360 361static void 362journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) { 363 INSIST(sizeof(cooked->format) == sizeof(raw->h.format)); 364 memset(raw->pad, 0, sizeof(raw->pad)); 365 memcpy(raw->h.format, cooked->format, sizeof(raw->h.format)); 366 journal_pos_encode(&raw->h.begin, &cooked->begin); 367 journal_pos_encode(&raw->h.end, &cooked->end); 368 encode_uint32(cooked->index_size, raw->h.index_size); 369} 370 371/* 372 * Journal file I/O subroutines, with error checking and reporting. 373 */ 374static isc_result_t 375journal_seek(dns_journal_t *j, isc_uint32_t offset) { 376 isc_result_t result; 377 result = isc_stdio_seek(j->fp, (long)offset, SEEK_SET); 378 if (result != ISC_R_SUCCESS) { 379 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 380 "%s: seek: %s", j->filename, 381 isc_result_totext(result)); 382 return (ISC_R_UNEXPECTED); 383 } 384 j->offset = offset; 385 return (ISC_R_SUCCESS); 386} 387 388static isc_result_t 389journal_read(dns_journal_t *j, void *mem, size_t nbytes) { 390 isc_result_t result; 391 392 result = isc_stdio_read(mem, 1, nbytes, j->fp, NULL); 393 if (result != ISC_R_SUCCESS) { 394 if (result == ISC_R_EOF) 395 return (ISC_R_NOMORE); 396 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 397 "%s: read: %s", 398 j->filename, isc_result_totext(result)); 399 return (ISC_R_UNEXPECTED); 400 } 401 j->offset += nbytes; 402 return (ISC_R_SUCCESS); 403} 404 405static isc_result_t 406journal_write(dns_journal_t *j, void *mem, size_t nbytes) { 407 isc_result_t result; 408 409 result = isc_stdio_write(mem, 1, nbytes, j->fp, NULL); 410 if (result != ISC_R_SUCCESS) { 411 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 412 "%s: write: %s", 413 j->filename, isc_result_totext(result)); 414 return (ISC_R_UNEXPECTED); 415 } 416 j->offset += nbytes; 417 return (ISC_R_SUCCESS); 418} 419 420static isc_result_t 421journal_fsync(dns_journal_t *j) { 422 isc_result_t result; 423 result = isc_stdio_flush(j->fp); 424 if (result != ISC_R_SUCCESS) { 425 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 426 "%s: flush: %s", 427 j->filename, isc_result_totext(result)); 428 return (ISC_R_UNEXPECTED); 429 } 430 result = isc_stdio_sync(j->fp); 431 if (result != ISC_R_SUCCESS) { 432 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 433 "%s: fsync: %s", 434 j->filename, isc_result_totext(result)); 435 return (ISC_R_UNEXPECTED); 436 } 437 return (ISC_R_SUCCESS); 438} 439 440/* 441 * Read/write a transaction header at the current file position. 442 */ 443 444static isc_result_t 445journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) { 446 journal_rawxhdr_t raw; 447 isc_result_t result; 448 result = journal_read(j, &raw, sizeof(raw)); 449 if (result != ISC_R_SUCCESS) 450 return (result); 451 xhdr->size = decode_uint32(raw.size); 452 xhdr->serial0 = decode_uint32(raw.serial0); 453 xhdr->serial1 = decode_uint32(raw.serial1); 454 return (ISC_R_SUCCESS); 455} 456 457static isc_result_t 458journal_write_xhdr(dns_journal_t *j, isc_uint32_t size, 459 isc_uint32_t serial0, isc_uint32_t serial1) 460{ 461 journal_rawxhdr_t raw; 462 encode_uint32(size, raw.size); 463 encode_uint32(serial0, raw.serial0); 464 encode_uint32(serial1, raw.serial1); 465 return (journal_write(j, &raw, sizeof(raw))); 466} 467 468 469/* 470 * Read an RR header at the current file position. 471 */ 472 473static isc_result_t 474journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) { 475 journal_rawrrhdr_t raw; 476 isc_result_t result; 477 result = journal_read(j, &raw, sizeof(raw)); 478 if (result != ISC_R_SUCCESS) 479 return (result); 480 rrhdr->size = decode_uint32(raw.size); 481 return (ISC_R_SUCCESS); 482} 483 484static isc_result_t 485journal_file_create(isc_mem_t *mctx, const char *filename) { 486 FILE *fp = NULL; 487 isc_result_t result; 488 journal_header_t header; 489 journal_rawheader_t rawheader; 490 int index_size = 56; /* XXX configurable */ 491 int size; 492 void *mem; /* Memory for temporary index image. */ 493 494 INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE); 495 496 result = isc_stdio_open(filename, "wb", &fp); 497 if (result != ISC_R_SUCCESS) { 498 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 499 "%s: create: %s", 500 filename, isc_result_totext(result)); 501 return (ISC_R_UNEXPECTED); 502 } 503 504 header = initial_journal_header; 505 header.index_size = index_size; 506 journal_header_encode(&header, &rawheader); 507 508 size = sizeof(journal_rawheader_t) + 509 index_size * sizeof(journal_rawpos_t); 510 511 mem = isc_mem_get(mctx, size); 512 if (mem == NULL) { 513 (void)isc_stdio_close(fp); 514 (void)isc_file_remove(filename); 515 return (ISC_R_NOMEMORY); 516 } 517 memset(mem, 0, size); 518 memcpy(mem, &rawheader, sizeof(rawheader)); 519 520 result = isc_stdio_write(mem, 1, (size_t) size, fp, NULL); 521 if (result != ISC_R_SUCCESS) { 522 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 523 "%s: write: %s", 524 filename, isc_result_totext(result)); 525 (void)isc_stdio_close(fp); 526 (void)isc_file_remove(filename); 527 isc_mem_put(mctx, mem, size); 528 return (ISC_R_UNEXPECTED); 529 } 530 isc_mem_put(mctx, mem, size); 531 532 result = isc_stdio_close(fp); 533 if (result != ISC_R_SUCCESS) { 534 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 535 "%s: close: %s", 536 filename, isc_result_totext(result)); 537 (void)isc_file_remove(filename); 538 return (ISC_R_UNEXPECTED); 539 } 540 541 return (ISC_R_SUCCESS); 542} 543 544static isc_result_t 545journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t write, 546 isc_boolean_t create, dns_journal_t **journalp) { 547 FILE *fp = NULL; 548 isc_result_t result; 549 journal_rawheader_t rawheader; 550 dns_journal_t *j; 551 552 INSIST(journalp != NULL && *journalp == NULL); 553 j = isc_mem_get(mctx, sizeof(*j)); 554 if (j == NULL) 555 return (ISC_R_NOMEMORY); 556 557 j->mctx = mctx; 558 j->state = JOURNAL_STATE_INVALID; 559 j->fp = NULL; 560 j->filename = filename; 561 j->index = NULL; 562 j->rawindex = NULL; 563 564 result = isc_stdio_open(j->filename, write ? "rb+" : "rb", &fp); 565 566 if (result == ISC_R_FILENOTFOUND) { 567 if (create) { 568 isc_log_write(JOURNAL_COMMON_LOGARGS, 569 ISC_LOG_INFO, 570 "journal file %s does not exist, " 571 "creating it", 572 j->filename); 573 CHECK(journal_file_create(mctx, filename)); 574 /* 575 * Retry. 576 */ 577 result = isc_stdio_open(j->filename, "rb+", &fp); 578 } else { 579 FAIL(ISC_R_NOTFOUND); 580 } 581 } 582 if (result != ISC_R_SUCCESS) { 583 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 584 "%s: open: %s", 585 j->filename, isc_result_totext(result)); 586 FAIL(ISC_R_UNEXPECTED); 587 } 588 589 j->fp = fp; 590 591 /* 592 * Set magic early so that seek/read can succeed. 593 */ 594 j->magic = DNS_JOURNAL_MAGIC; 595 596 CHECK(journal_seek(j, 0)); 597 CHECK(journal_read(j, &rawheader, sizeof(rawheader))); 598 599 if (memcmp(rawheader.h.format, initial_journal_header.format, 600 sizeof(initial_journal_header.format)) != 0) { 601 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 602 "%s: journal format not recognized", 603 j->filename); 604 FAIL(ISC_R_UNEXPECTED); 605 } 606 journal_header_decode(&rawheader, &j->header); 607 608 /* 609 * If there is an index, read the raw index into a dynamically 610 * allocated buffer and then convert it into a cooked index. 611 */ 612 if (j->header.index_size != 0) { 613 unsigned int i; 614 unsigned int rawbytes; 615 unsigned char *p; 616 617 rawbytes = j->header.index_size * sizeof(journal_rawpos_t); 618 j->rawindex = isc_mem_get(mctx, rawbytes); 619 if (j->rawindex == NULL) 620 FAIL(ISC_R_NOMEMORY); 621 622 CHECK(journal_read(j, j->rawindex, rawbytes)); 623 624 j->index = isc_mem_get(mctx, j->header.index_size * 625 sizeof(journal_pos_t)); 626 if (j->index == NULL) 627 FAIL(ISC_R_NOMEMORY); 628 629 p = j->rawindex; 630 for (i = 0; i < j->header.index_size; i++) { 631 j->index[i].serial = decode_uint32(p); 632 p += 4; 633 j->index[i].offset = decode_uint32(p); 634 p += 4; 635 } 636 INSIST(p == j->rawindex + rawbytes); 637 } 638 j->offset = -1; /* Invalid, must seek explicitly. */ 639 640 /* 641 * Initialize the iterator. 642 */ 643 dns_name_init(&j->it.name, NULL); 644 dns_rdata_init(&j->it.rdata); 645 646 /* 647 * Set up empty initial buffers for uncheched and checked 648 * wire format RR data. They will be reallocated 649 * later. 650 */ 651 isc_buffer_init(&j->it.source, NULL, 0); 652 isc_buffer_init(&j->it.target, NULL, 0); 653 dns_decompress_init(&j->it.dctx, -1, DNS_DECOMPRESS_NONE); 654 655 j->state = 656 write ? JOURNAL_STATE_WRITE : JOURNAL_STATE_READ; 657 658 *journalp = j; 659 return (ISC_R_SUCCESS); 660 661 failure: 662 j->magic = 0; 663 if (j->index != NULL) { 664 isc_mem_put(j->mctx, j->index, j->header.index_size * 665 sizeof(journal_rawpos_t)); 666 j->index = NULL; 667 } 668 if (j->fp != NULL) 669 (void)isc_stdio_close(j->fp); 670 isc_mem_put(j->mctx, j, sizeof(*j)); 671 return (result); 672} 673 674isc_result_t 675dns_journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t write, 676 dns_journal_t **journalp) { 677 return (journal_open(mctx, filename, write, write, journalp)); 678} 679 680/* 681 * A comparison function defining the sorting order for 682 * entries in the IXFR-style journal file. 683 * 684 * The IXFR format requires that deletions are sorted before 685 * additions, and within either one, SOA records are sorted 686 * before others. 687 * 688 * Also sort the non-SOA records by type as a courtesy to the 689 * server receiving the IXFR - it may help reduce the amount of 690 * rdataset merging it has to do. 691 */ 692static int 693ixfr_order(const void *av, const void *bv) { 694 dns_difftuple_t const * const *ap = av; 695 dns_difftuple_t const * const *bp = bv; 696 dns_difftuple_t const *a = *ap; 697 dns_difftuple_t const *b = *bp; 698 int r; 699 700 r = (b->op == DNS_DIFFOP_DEL) - (a->op == DNS_DIFFOP_DEL); 701 if (r != 0) 702 return (r); 703 704 r = (b->rdata.type == dns_rdatatype_soa) - 705 (a->rdata.type == dns_rdatatype_soa); 706 if (r != 0) 707 return (r); 708 709 r = (a->rdata.type - b->rdata.type); 710 return (r); 711} 712 713/* 714 * Advance '*pos' to the next journal transaction. 715 * 716 * Requires: 717 * *pos refers to a valid journal transaction. 718 * 719 * Ensures: 720 * When ISC_R_SUCCESS is returned, 721 * *pos refers to the next journal transaction. 722 * 723 * Returns one of: 724 * 725 * ISC_R_SUCCESS 726 * ISC_R_NOMORE *pos pointed at the last transaction 727 * Other results due to file errors are possible. 728 */ 729static isc_result_t 730journal_next(dns_journal_t *j, journal_pos_t *pos) { 731 isc_result_t result; 732 journal_xhdr_t xhdr; 733 REQUIRE(DNS_JOURNAL_VALID(j)); 734 735 result = journal_seek(j, pos->offset); 736 if (result != ISC_R_SUCCESS) 737 return (result); 738 739 if (pos->serial == j->header.end.serial) 740 return (ISC_R_NOMORE); 741 /* 742 * Read the header of the current transaction. 743 * This will return ISC_R_NOMORE if we are at EOF. 744 */ 745 result = journal_read_xhdr(j, &xhdr); 746 if (result != ISC_R_SUCCESS) 747 return (result); 748 749 /* 750 * Check serial number consistency. 751 */ 752 if (xhdr.serial0 != pos->serial) { 753 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 754 "%s: journal file corrupt: " 755 "expected serial %u, got %u", 756 j->filename, pos->serial, xhdr.serial0); 757 return (ISC_R_UNEXPECTED); 758 } 759 760 /* 761 * Check for offset wraparound. 762 */ 763 if ((isc_offset_t)(pos->offset + sizeof(journal_rawxhdr_t) + xhdr.size) 764 < pos->offset) { 765 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 766 "%s: offset too large", j->filename); 767 return (ISC_R_UNEXPECTED); 768 } 769 770 pos->offset += sizeof(journal_rawxhdr_t) + xhdr.size; 771 pos->serial = xhdr.serial1; 772 return (ISC_R_SUCCESS); 773} 774 775/* 776 * If the index of the journal 'j' contains an entry "better" 777 * than '*best_guess', replace '*best_guess' with it. 778 * 779 * "Better" means having a serial number closer to 'serial' 780 * but not greater than 'serial'. 781 */ 782static void 783index_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *best_guess) { 784 unsigned int i; 785 if (j->index == NULL) 786 return; 787 for (i = 0; i < j->header.index_size; i++) { 788 if (POS_VALID(j->index[i]) && 789 DNS_SERIAL_GE(serial, j->index[i].serial) && 790 DNS_SERIAL_GT(j->index[i].serial, best_guess->serial)) 791 *best_guess = j->index[i]; 792 } 793} 794 795/* 796 * Add a new index entry. If there is no room, make room by removing 797 * the odd-numbered entries and compacting the others into the first 798 * half of the index. This decimates old index entries exponentially 799 * over time, so that the index always contains a much larger fraction 800 * of recent serial numbers than of old ones. This is deliberate - 801 * most index searches are for outgoing IXFR, and IXFR tends to request 802 * recent versions more often than old ones. 803 */ 804static void 805index_add(dns_journal_t *j, journal_pos_t *pos) { 806 unsigned int i; 807 if (j->index == NULL) 808 return; 809 /* 810 * Search for a vacant position. 811 */ 812 for (i = 0; i < j->header.index_size; i++) { 813 if (! POS_VALID(j->index[i])) 814 break; 815 } 816 if (i == j->header.index_size) { 817 unsigned int k = 0; 818 /* 819 * Found no vacant position. Make some room. 820 */ 821 for (i = 0; i < j->header.index_size; i += 2) { 822 j->index[k++] = j->index[i]; 823 } 824 i = k; /* 'i' identifies the first vacant position. */ 825 while (k < j->header.index_size) { 826 POS_INVALIDATE(j->index[k]); 827 k++; 828 } 829 } 830 INSIST(i < j->header.index_size); 831 INSIST(! POS_VALID(j->index[i])); 832 833 /* 834 * Store the new index entry. 835 */ 836 j->index[i] = *pos; 837} 838 839/* 840 * Invalidate any existing index entries that could become 841 * ambiguous when a new transaction with number 'serial' is added. 842 */ 843static void 844index_invalidate(dns_journal_t *j, isc_uint32_t serial) { 845 unsigned int i; 846 if (j->index == NULL) 847 return; 848 for (i = 0; i < j->header.index_size; i++) { 849 if (! DNS_SERIAL_GT(serial, j->index[i].serial)) 850 POS_INVALIDATE(j->index[i]); 851 } 852} 853 854/* 855 * Try to find a transaction with initial serial number 'serial' 856 * in the journal 'j'. 857 * 858 * If found, store its position at '*pos' and return ISC_R_SUCCESS. 859 * 860 * If 'serial' is current (= the ending serial number of the 861 * last transaction in the journal), set '*pos' to 862 * the position immediately following the last transaction and 863 * return ISC_R_SUCCESS. 864 * 865 * If 'serial' is within the range of addressable serial numbers 866 * covered by the journal but that particular serial number is missing 867 * (from the journal, not just from the index), return ISC_R_NOTFOUND. 868 * 869 * If 'serial' is outside the range of addressable serial numbers 870 * covered by the journal, return ISC_R_RANGE. 871 * 872 */ 873static isc_result_t 874journal_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *pos) { 875 isc_result_t result; 876 journal_pos_t current_pos; 877 REQUIRE(DNS_JOURNAL_VALID(j)); 878 879 if (DNS_SERIAL_GT(j->header.begin.serial, serial)) 880 return (ISC_R_RANGE); 881 if (DNS_SERIAL_GT(serial, j->header.end.serial)) 882 return (ISC_R_RANGE); 883 if (serial == j->header.end.serial) { 884 *pos = j->header.end; 885 return (ISC_R_SUCCESS); 886 } 887 888 current_pos = j->header.begin; 889 index_find(j, serial, ¤t_pos); 890 891 while (current_pos.serial != serial) { 892 if (DNS_SERIAL_GT(current_pos.serial, serial)) 893 return (ISC_R_NOTFOUND); 894 result = journal_next(j, ¤t_pos); 895 if (result != ISC_R_SUCCESS) 896 return (result); 897 } 898 *pos = current_pos; 899 return (ISC_R_SUCCESS); 900} 901 902isc_result_t 903dns_journal_begin_transaction(dns_journal_t *j) { 904 isc_uint32_t offset; 905 isc_result_t result; 906 journal_rawxhdr_t hdr; 907 908 REQUIRE(DNS_JOURNAL_VALID(j)); 909 REQUIRE(j->state == JOURNAL_STATE_WRITE); 910 911 /* 912 * Find the file offset where the new transaction should 913 * be written, and seek there. 914 */ 915 if (JOURNAL_EMPTY(&j->header)) { 916 offset = sizeof(journal_rawheader_t) + 917 j->header.index_size * sizeof(journal_rawpos_t); 918 } else { 919 offset = j->header.end.offset; 920 } 921 j->x.pos[0].offset = offset; 922 j->x.pos[1].offset = offset; /* Initial value, will be incremented. */ 923 j->x.n_soa = 0; 924 925 CHECK(journal_seek(j, offset)); 926 927 /* 928 * Write a dummy transaction header of all zeroes to reserve 929 * space. It will be filled in when the transaction is 930 * finished. 931 */ 932 memset(&hdr, 0, sizeof(hdr)); 933 CHECK(journal_write(j, &hdr, sizeof(hdr))); 934 j->x.pos[1].offset = j->offset; 935 936 j->state = JOURNAL_STATE_TRANSACTION; 937 result = ISC_R_SUCCESS; 938 failure: 939 return (result); 940} 941 942isc_result_t 943dns_journal_writediff(dns_journal_t *j, dns_diff_t *diff) { 944 dns_difftuple_t *t; 945 isc_buffer_t buffer; 946 void *mem = NULL; 947 unsigned int size; 948 isc_result_t result; 949 isc_region_t used; 950 951 REQUIRE(DNS_DIFF_VALID(diff)); 952 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION); 953 954 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "writing to journal"); 955 (void)dns_diff_print(diff, NULL); 956 957 /* 958 * Pass 1: determine the buffer size needed, and 959 * keep track of SOA serial numbers. 960 */ 961 size = 0; 962 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL; 963 t = ISC_LIST_NEXT(t, link)) 964 { 965 if (t->rdata.type == dns_rdatatype_soa) { 966 if (j->x.n_soa < 2) 967 j->x.pos[j->x.n_soa].serial = 968 dns_soa_getserial(&t->rdata); 969 j->x.n_soa++; 970 } 971 size += sizeof(journal_rawrrhdr_t); 972 size += t->name.length; /* XXX should have access macro? */ 973 size += 10; 974 size += t->rdata.length; 975 } 976 977 mem = isc_mem_get(j->mctx, size); 978 if (mem == NULL) 979 return (ISC_R_NOMEMORY); 980 981 isc_buffer_init(&buffer, mem, size); 982 983 /* 984 * Pass 2. Write RRs to buffer. 985 */ 986 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL; 987 t = ISC_LIST_NEXT(t, link)) 988 { 989 /* 990 * Write the RR header. 991 */ 992 isc_buffer_putuint32(&buffer, t->name.length + 10 + 993 t->rdata.length); 994 /* 995 * Write the owner name, RR header, and RR data. 996 */ 997 isc_buffer_putmem(&buffer, t->name.ndata, t->name.length); 998 isc_buffer_putuint16(&buffer, t->rdata.type); 999 isc_buffer_putuint16(&buffer, t->rdata.rdclass); 1000 isc_buffer_putuint32(&buffer, t->ttl); 1001 INSIST(t->rdata.length < 65536); 1002 isc_buffer_putuint16(&buffer, (isc_uint16_t)t->rdata.length); 1003 INSIST(isc_buffer_availablelength(&buffer) >= t->rdata.length); 1004 isc_buffer_putmem(&buffer, t->rdata.data, t->rdata.length); 1005 } 1006 1007 isc_buffer_usedregion(&buffer, &used); 1008 INSIST(used.length == size); 1009 1010 j->x.pos[1].offset += used.length; 1011 1012 /* 1013 * Write the buffer contents to the journal file. 1014 */ 1015 CHECK(journal_write(j, used.base, used.length)); 1016 1017 result = ISC_R_SUCCESS; 1018 1019 failure: 1020 if (mem != NULL) 1021 isc_mem_put(j->mctx, mem, size); 1022 return (result); 1023 1024} 1025 1026isc_result_t 1027dns_journal_commit(dns_journal_t *j) { 1028 isc_result_t result; 1029 journal_rawheader_t rawheader; 1030 1031 REQUIRE(DNS_JOURNAL_VALID(j)); 1032 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION); 1033 1034 /* 1035 * Perform some basic consistency checks. 1036 */ 1037 if (j->x.n_soa != 2) { 1038 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1039 "%s: malformed transaction: %d SOAs", 1040 j->filename, j->x.n_soa); 1041 return (ISC_R_UNEXPECTED); 1042 } 1043 if (! (DNS_SERIAL_GT(j->x.pos[1].serial, j->x.pos[0].serial) || 1044 (bind8_compat && 1045 j->x.pos[1].serial == j->x.pos[0].serial))) 1046 { 1047 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1048 "%s: malformed transaction: serial number " 1049 "would decrease", j->filename); 1050 return (ISC_R_UNEXPECTED); 1051 } 1052 if (! JOURNAL_EMPTY(&j->header)) { 1053 if (j->x.pos[0].serial != j->header.end.serial) { 1054 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1055 "malformed transaction: " 1056 "%s last serial %u != " 1057 "transaction first serial %u", 1058 j->filename, 1059 j->header.end.serial, 1060 j->x.pos[0].serial); 1061 return (ISC_R_UNEXPECTED); 1062 } 1063 } 1064 1065 /* 1066 * Some old journal entries may become non-addressable 1067 * when we increment the current serial number. Purge them 1068 * by stepping header.begin forward to the first addressable 1069 * transaction. Also purge them from the index. 1070 */ 1071 if (! JOURNAL_EMPTY(&j->header)) { 1072 while (! DNS_SERIAL_GT(j->x.pos[1].serial, 1073 j->header.begin.serial)) { 1074 CHECK(journal_next(j, &j->header.begin)); 1075 } 1076 index_invalidate(j, j->x.pos[1].serial); 1077 } 1078#ifdef notyet 1079 if (DNS_SERIAL_GT(last_dumped_serial, j->x.pos[1].serial)) { 1080 force_dump(...); 1081 } 1082#endif 1083 1084 /* 1085 * Commit the transaction data to stable storage. 1086 */ 1087 CHECK(journal_fsync(j)); 1088 1089 /* 1090 * Update the transaction header. 1091 */ 1092 CHECK(journal_seek(j, j->x.pos[0].offset)); 1093 CHECK(journal_write_xhdr(j, (j->x.pos[1].offset - j->x.pos[0].offset) - 1094 sizeof(journal_rawxhdr_t), 1095 j->x.pos[0].serial, j->x.pos[1].serial)); 1096 1097 /* 1098 * Update the journal header. 1099 */ 1100 if (JOURNAL_EMPTY(&j->header)) { 1101 j->header.begin = j->x.pos[0]; 1102 } 1103 j->header.end = j->x.pos[1]; 1104 journal_header_encode(&j->header, &rawheader); 1105 CHECK(journal_seek(j, 0)); 1106 CHECK(journal_write(j, &rawheader, sizeof(rawheader))); 1107 1108 /* 1109 * Update the index. 1110 */ 1111 index_add(j, &j->x.pos[0]); 1112 1113 /* 1114 * Convert the index into on-disk format and write 1115 * it to disk. 1116 */ 1117 CHECK(index_to_disk(j)); 1118 1119 /* 1120 * Commit the header to stable storage. 1121 */ 1122 CHECK(journal_fsync(j)); 1123 1124 /* 1125 * We no longer have a transaction open. 1126 */ 1127 j->state = JOURNAL_STATE_WRITE; 1128 1129 result = ISC_R_SUCCESS; 1130 1131 failure: 1132 return (result); 1133} 1134 1135isc_result_t 1136dns_journal_write_transaction(dns_journal_t *j, dns_diff_t *diff) { 1137 isc_result_t result; 1138 CHECK(dns_diff_sort(diff, ixfr_order)); 1139 CHECK(dns_journal_begin_transaction(j)); 1140 CHECK(dns_journal_writediff(j, diff)); 1141 CHECK(dns_journal_commit(j)); 1142 result = ISC_R_SUCCESS; 1143 failure: 1144 return (result); 1145} 1146 1147void 1148dns_journal_destroy(dns_journal_t **journalp) { 1149 dns_journal_t *j = *journalp; 1150 REQUIRE(DNS_JOURNAL_VALID(j)); 1151 1152 j->it.result = ISC_R_FAILURE; 1153 dns_name_invalidate(&j->it.name); 1154 dns_decompress_invalidate(&j->it.dctx); 1155 if (j->rawindex != NULL) 1156 isc_mem_put(j->mctx, j->rawindex, j->header.index_size * 1157 sizeof(journal_rawpos_t)); 1158 if (j->index != NULL) 1159 isc_mem_put(j->mctx, j->index, j->header.index_size * 1160 sizeof(journal_pos_t)); 1161 if (j->it.target.base != NULL) 1162 isc_mem_put(j->mctx, j->it.target.base, j->it.target.length); 1163 if (j->it.source.base != NULL) 1164 isc_mem_put(j->mctx, j->it.source.base, j->it.source.length); 1165 1166 if (j->fp != NULL) 1167 (void)isc_stdio_close(j->fp); 1168 j->magic = 0; 1169 isc_mem_put(j->mctx, j, sizeof(*j)); 1170 *journalp = NULL; 1171} 1172 1173/* 1174 * Roll the open journal 'j' into the database 'db'. 1175 * A new database version will be created. 1176 */ 1177 1178/* XXX Share code with incoming IXFR? */ 1179 1180static isc_result_t 1181roll_forward(dns_journal_t *j, dns_db_t *db) { 1182 isc_buffer_t source; /* Transaction data from disk */ 1183 isc_buffer_t target; /* Ditto after _fromwire check */ 1184 isc_uint32_t db_serial; /* Database SOA serial */ 1185 isc_uint32_t end_serial; /* Last journal SOA serial */ 1186 isc_result_t result; 1187 dns_dbversion_t *ver = NULL; 1188 journal_pos_t pos; 1189 dns_diff_t diff; 1190 unsigned int n_soa = 0; 1191 unsigned int n_put = 0; 1192 1193 REQUIRE(DNS_JOURNAL_VALID(j)); 1194 REQUIRE(DNS_DB_VALID(db)); 1195 1196 dns_diff_init(j->mctx, &diff); 1197 1198 /* 1199 * Set up empty initial buffers for uncheched and checked 1200 * wire format transaction data. They will be reallocated 1201 * later. 1202 */ 1203 isc_buffer_init(&source, NULL, 0); 1204 isc_buffer_init(&target, NULL, 0); 1205 1206 /* 1207 * Create the new database version. 1208 */ 1209 CHECK(dns_db_newversion(db, &ver)); 1210 1211 /* 1212 * Get the current database SOA serial number. 1213 */ 1214 CHECK(dns_db_getsoaserial(db, ver, &db_serial)); 1215 1216 /* 1217 * Locate a journal entry for the current database serial. 1218 */ 1219 CHECK(journal_find(j, db_serial, &pos)); 1220 /* 1221 * XXX do more drastic things, like marking zone stale, 1222 * if this fails? 1223 */ 1224 /* 1225 * XXXRTH The zone code should probably mark the zone as bad and 1226 * scream loudly into the log if this is a dynamic update 1227 * log reply that failed. 1228 */ 1229 1230 end_serial = dns_journal_last_serial(j); 1231 if (db_serial == end_serial) 1232 CHECK(DNS_R_UPTODATE); 1233 1234 CHECK(dns_journal_iter_init(j, db_serial, end_serial)); 1235 1236 for (result = dns_journal_first_rr(j); 1237 result == ISC_R_SUCCESS; 1238 result = dns_journal_next_rr(j)) 1239 { 1240 dns_name_t *name; 1241 isc_uint32_t ttl; 1242 dns_rdata_t *rdata; 1243 dns_difftuple_t *tuple = NULL; 1244 1245 name = NULL; 1246 rdata = NULL; 1247 dns_journal_current_rr(j, &name, &ttl, &rdata); 1248 1249 if (rdata->type == dns_rdatatype_soa) { 1250 n_soa++; 1251 if (n_soa == 2) 1252 db_serial = j->it.current_serial; 1253 } 1254 1255 if (n_soa == 3) 1256 n_soa = 1; 1257 if (n_soa == 0) { 1258 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1259 "%s: journal file corrupt: missing " 1260 "initial SOA", j->filename); 1261 FAIL(ISC_R_UNEXPECTED); 1262 } 1263 CHECK(dns_difftuple_create(diff.mctx, n_soa == 1 ? 1264 DNS_DIFFOP_DEL : DNS_DIFFOP_ADD, 1265 name, ttl, rdata, &tuple)); 1266 dns_diff_append(&diff, &tuple); 1267 1268 if (++n_put > 100) { 1269 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), 1270 "%s: applying diff to database (%u)", 1271 j->filename, db_serial); 1272 (void)dns_diff_print(&diff, NULL); 1273 CHECK(dns_diff_apply(&diff, db, ver)); 1274 dns_diff_clear(&diff); 1275 n_put = 0; 1276 } 1277 } 1278 if (result == ISC_R_NOMORE) 1279 result = ISC_R_SUCCESS; 1280 CHECK(result); 1281 1282 if (n_put != 0) { 1283 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), 1284 "%s: applying final diff to database (%u)", 1285 j->filename, db_serial); 1286 (void)dns_diff_print(&diff, NULL); 1287 CHECK(dns_diff_apply(&diff, db, ver)); 1288 dns_diff_clear(&diff); 1289 } 1290 1291 failure: 1292 if (ver != NULL) 1293 dns_db_closeversion(db, &ver, result == ISC_R_SUCCESS ? 1294 ISC_TRUE : ISC_FALSE); 1295 1296 if (source.base != NULL) 1297 isc_mem_put(j->mctx, source.base, source.length); 1298 if (target.base != NULL) 1299 isc_mem_put(j->mctx, target.base, target.length); 1300 1301 dns_diff_clear(&diff); 1302 1303 return (result); 1304} 1305 1306isc_result_t 1307dns_journal_rollforward(isc_mem_t *mctx, dns_db_t *db, const char *filename) { 1308 dns_journal_t *j; 1309 isc_result_t result; 1310 1311 REQUIRE(DNS_DB_VALID(db)); 1312 REQUIRE(filename != NULL); 1313 1314 j = NULL; 1315 result = dns_journal_open(mctx, filename, ISC_FALSE, &j); 1316 if (result == ISC_R_NOTFOUND) { 1317 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), 1318 "no journal file, but that's OK"); 1319 return (DNS_R_NOJOURNAL); 1320 } 1321 if (result != ISC_R_SUCCESS) 1322 return (result); 1323 if (JOURNAL_EMPTY(&j->header)) 1324 result = DNS_R_UPTODATE; 1325 else 1326 result = roll_forward(j, db); 1327 1328 dns_journal_destroy(&j); 1329 1330 return (result); 1331} 1332 1333isc_result_t 1334dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) { 1335 dns_journal_t *j; 1336 isc_buffer_t source; /* Transaction data from disk */ 1337 isc_buffer_t target; /* Ditto after _fromwire check */ 1338 isc_uint32_t start_serial; /* Database SOA serial */ 1339 isc_uint32_t end_serial; /* Last journal SOA serial */ 1340 isc_result_t result; 1341 dns_diff_t diff; 1342 unsigned int n_soa = 0; 1343 unsigned int n_put = 0; 1344 1345 REQUIRE(filename != NULL); 1346 1347 j = NULL; 1348 result = dns_journal_open(mctx, filename, ISC_FALSE, &j); 1349 if (result == ISC_R_NOTFOUND) { 1350 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file"); 1351 return (DNS_R_NOJOURNAL); 1352 } 1353 1354 if (result != ISC_R_SUCCESS) { 1355 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1356 "journal open failure: %s: %s", 1357 isc_result_totext(result), j->filename); 1358 return (result); 1359 } 1360 1361 dns_diff_init(j->mctx, &diff); 1362 1363 /* 1364 * Set up empty initial buffers for uncheched and checked 1365 * wire format transaction data. They will be reallocated 1366 * later. 1367 */ 1368 isc_buffer_init(&source, NULL, 0); 1369 isc_buffer_init(&target, NULL, 0); 1370 1371 start_serial = dns_journal_first_serial(j); 1372 end_serial = dns_journal_last_serial(j); 1373 1374 CHECK(dns_journal_iter_init(j, start_serial, end_serial)); 1375 1376 for (result = dns_journal_first_rr(j); 1377 result == ISC_R_SUCCESS; 1378 result = dns_journal_next_rr(j)) 1379 { 1380 dns_name_t *name; 1381 isc_uint32_t ttl; 1382 dns_rdata_t *rdata; 1383 dns_difftuple_t *tuple = NULL; 1384 1385 name = NULL; 1386 rdata = NULL; 1387 dns_journal_current_rr(j, &name, &ttl, &rdata); 1388 1389 if (rdata->type == dns_rdatatype_soa) 1390 n_soa++; 1391 1392 if (n_soa == 3) 1393 n_soa = 1; 1394 if (n_soa == 0) { 1395 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1396 "%s: journal file corrupt: missing " 1397 "initial SOA", j->filename); 1398 FAIL(ISC_R_UNEXPECTED); 1399 } 1400 CHECK(dns_difftuple_create(diff.mctx, n_soa == 1 ? 1401 DNS_DIFFOP_DEL : DNS_DIFFOP_ADD, 1402 name, ttl, rdata, &tuple)); 1403 dns_diff_append(&diff, &tuple); 1404 1405 if (++n_put > 100) { 1406 result = dns_diff_print(&diff, file); 1407 dns_diff_clear(&diff); 1408 n_put = 0; 1409 if (result != ISC_R_SUCCESS) 1410 break; 1411 } 1412 } 1413 if (result == ISC_R_NOMORE) 1414 result = ISC_R_SUCCESS; 1415 CHECK(result); 1416 1417 if (n_put != 0) { 1418 result = dns_diff_print(&diff, file); 1419 dns_diff_clear(&diff); 1420 } 1421 goto cleanup; 1422 1423 failure: 1424 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1425 "%s: cannot print: journal file corrupt", j->filename); 1426 1427 cleanup: 1428 if (source.base != NULL) 1429 isc_mem_put(j->mctx, source.base, source.length); 1430 if (target.base != NULL) 1431 isc_mem_put(j->mctx, target.base, target.length); 1432 1433 dns_diff_clear(&diff); 1434 dns_journal_destroy(&j); 1435 1436 return (result); 1437} 1438 1439/**************************************************************************/ 1440/* 1441 * Miscellaneous accessors. 1442 */ 1443isc_uint32_t dns_journal_first_serial(dns_journal_t *j) { 1444 return (j->header.begin.serial); 1445} 1446 1447isc_uint32_t dns_journal_last_serial(dns_journal_t *j) { 1448 return (j->header.end.serial); 1449} 1450 1451/**************************************************************************/ 1452/* 1453 * Iteration support. 1454 * 1455 * When serving an outgoing IXFR, we transmit a part the journal starting 1456 * at the serial number in the IXFR request and ending at the serial 1457 * number that is current when the IXFR request arrives. The ending 1458 * serial number is not necessarily at the end of the journal: 1459 * the journal may grow while the IXFR is in progress, but we stop 1460 * when we reach the serial number that was current when the IXFR started. 1461 */ 1462 1463static isc_result_t read_one_rr(dns_journal_t *j); 1464 1465/* 1466 * Make sure the buffer 'b' is has at least 'size' bytes 1467 * allocated, and clear it. 1468 * 1469 * Requires: 1470 * Either b->base is NULL, or it points to b->length bytes of memory 1471 * previously allocated by isc_mem_get(). 1472 */ 1473 1474static isc_result_t 1475size_buffer(isc_mem_t *mctx, isc_buffer_t *b, unsigned size) { 1476 if (b->length < size) { 1477 void *mem = isc_mem_get(mctx, size); 1478 if (mem == NULL) 1479 return (ISC_R_NOMEMORY); 1480 if (b->base != NULL) 1481 isc_mem_put(mctx, b->base, b->length); 1482 b->base = mem; 1483 b->length = size; 1484 } 1485 isc_buffer_clear(b); 1486 return (ISC_R_SUCCESS); 1487} 1488 1489isc_result_t 1490dns_journal_iter_init(dns_journal_t *j, 1491 isc_uint32_t begin_serial, isc_uint32_t end_serial) 1492{ 1493 isc_result_t result; 1494 1495 CHECK(journal_find(j, begin_serial, &j->it.bpos)); 1496 INSIST(j->it.bpos.serial == begin_serial); 1497 1498 CHECK(journal_find(j, end_serial, &j->it.epos)); 1499 INSIST(j->it.epos.serial == end_serial); 1500 1501 result = ISC_R_SUCCESS; 1502 failure: 1503 j->it.result = result; 1504 return (j->it.result); 1505} 1506 1507 1508isc_result_t 1509dns_journal_first_rr(dns_journal_t *j) { 1510 isc_result_t result; 1511 1512 /* 1513 * Seek to the beginning of the first transaction we are 1514 * interested in. 1515 */ 1516 CHECK(journal_seek(j, j->it.bpos.offset)); 1517 j->it.current_serial = j->it.bpos.serial; 1518 1519 j->it.xsize = 0; /* We have no transaction data yet... */ 1520 j->it.xpos = 0; /* ...and haven't used any of it. */ 1521 1522 return (read_one_rr(j)); 1523 1524 failure: 1525 return (result); 1526} 1527 1528static isc_result_t 1529read_one_rr(dns_journal_t *j) { 1530 isc_result_t result; 1531 1532 dns_rdatatype_t rdtype; 1533 dns_rdataclass_t rdclass; 1534 unsigned int rdlen; 1535 isc_uint32_t ttl; 1536 journal_xhdr_t xhdr; 1537 journal_rrhdr_t rrhdr; 1538 1539 INSIST(j->offset <= j->it.epos.offset); 1540 if (j->offset == j->it.epos.offset) 1541 return (ISC_R_NOMORE); 1542 if (j->it.xpos == j->it.xsize) { 1543 /* 1544 * We are at a transaction boundary. 1545 * Read another transaction header. 1546 */ 1547 CHECK(journal_read_xhdr(j, &xhdr)); 1548 if (xhdr.size == 0) { 1549 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1550 "%s: journal corrupt: empty transaction", 1551 j->filename); 1552 FAIL(ISC_R_UNEXPECTED); 1553 } 1554 if (xhdr.serial0 != j->it.current_serial) { 1555 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1556 "%s: journal file corrupt: " 1557 "expected serial %u, got %u", 1558 j->filename, 1559 j->it.current_serial, xhdr.serial0); 1560 FAIL(ISC_R_UNEXPECTED); 1561 } 1562 j->it.xsize = xhdr.size; 1563 j->it.xpos = 0; 1564 } 1565 /* 1566 * Read an RR. 1567 */ 1568 CHECK(journal_read_rrhdr(j, &rrhdr)); 1569 /* 1570 * Perform a sanity check on the journal RR size. 1571 * The smallest possible RR has a 1-byte owner name 1572 * and a 10-byte header. The largest possible 1573 * RR has 65535 bytes of data, a header, and a maximum- 1574 * size owner name, well below 70 k total. 1575 */ 1576 if (rrhdr.size < 1+10 || rrhdr.size > 70000) { 1577 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1578 "%s: journal corrupt: impossible RR size " 1579 "(%d bytes)", j->filename, rrhdr.size); 1580 FAIL(ISC_R_UNEXPECTED); 1581 } 1582 1583 CHECK(size_buffer(j->mctx, &j->it.source, rrhdr.size)); 1584 CHECK(journal_read(j, j->it.source.base, rrhdr.size)); 1585 isc_buffer_add(&j->it.source, rrhdr.size); 1586 1587 /* 1588 * The target buffer is made the same size 1589 * as the source buffer, with the assumption that when 1590 * no compression in present, the output of dns_*_fromwire() 1591 * is no larger than the input. 1592 */ 1593 CHECK(size_buffer(j->mctx, &j->it.target, rrhdr.size)); 1594 1595 /* 1596 * Parse the owner name. We don't know where it 1597 * ends yet, so we make the entire "remaining" 1598 * part of the buffer "active". 1599 */ 1600 isc_buffer_setactive(&j->it.source, 1601 j->it.source.used - j->it.source.current); 1602 CHECK(dns_name_fromwire(&j->it.name, &j->it.source, 1603 &j->it.dctx, 0, &j->it.target)); 1604 1605 /* 1606 * Check that the RR header is there, and parse it. 1607 */ 1608 if (isc_buffer_remaininglength(&j->it.source) < 10) 1609 FAIL(DNS_R_FORMERR); 1610 1611 rdtype = isc_buffer_getuint16(&j->it.source); 1612 rdclass = isc_buffer_getuint16(&j->it.source); 1613 ttl = isc_buffer_getuint32(&j->it.source); 1614 rdlen = isc_buffer_getuint16(&j->it.source); 1615 1616 /* 1617 * Parse the rdata. 1618 */ 1619 isc_buffer_setactive(&j->it.source, rdlen); 1620 dns_rdata_reset(&j->it.rdata); 1621 CHECK(dns_rdata_fromwire(&j->it.rdata, rdclass, 1622 rdtype, &j->it.source, &j->it.dctx, 1623 0, &j->it.target)); 1624 j->it.ttl = ttl; 1625 1626 j->it.xpos += sizeof(journal_rawrrhdr_t) + rrhdr.size; 1627 if (rdtype == dns_rdatatype_soa) { 1628 /* XXX could do additional consistency checks here */ 1629 j->it.current_serial = dns_soa_getserial(&j->it.rdata); 1630 } 1631 1632 result = ISC_R_SUCCESS; 1633 1634 failure: 1635 j->it.result = result; 1636 return (result); 1637} 1638 1639isc_result_t 1640dns_journal_next_rr(dns_journal_t *j) { 1641 j->it.result = read_one_rr(j); 1642 return (j->it.result); 1643} 1644 1645void 1646dns_journal_current_rr(dns_journal_t *j, dns_name_t **name, isc_uint32_t *ttl, 1647 dns_rdata_t **rdata) 1648{ 1649 REQUIRE(j->it.result == ISC_R_SUCCESS); 1650 *name = &j->it.name; 1651 *ttl = j->it.ttl; 1652 *rdata = &j->it.rdata; 1653} 1654 1655/**************************************************************************/ 1656/* 1657 * Generating diffs from databases 1658 */ 1659 1660/* 1661 * Construct a diff containing all the RRs at the current name of the 1662 * database iterator 'dbit' in database 'db', version 'ver'. 1663 * Set '*name' to the current name, and append the diff to 'diff'. 1664 * All new tuples will have the operation 'op'. 1665 * 1666 * Requires: 'name' must have buffer large enough to hold the name. 1667 * Typically, a dns_fixedname_t would be used. 1668 */ 1669static isc_result_t 1670get_name_diff(dns_db_t *db, dns_dbversion_t *ver, isc_stdtime_t now, 1671 dns_dbiterator_t *dbit, dns_name_t *name, dns_diffop_t op, 1672 dns_diff_t *diff) 1673{ 1674 isc_result_t result; 1675 dns_dbnode_t *node = NULL; 1676 dns_rdatasetiter_t *rdsiter = NULL; 1677 dns_difftuple_t *tuple = NULL; 1678 1679 result = dns_dbiterator_current(dbit, &node, name); 1680 if (result != ISC_R_SUCCESS) 1681 return (result); 1682 1683 result = dns_db_allrdatasets(db, node, ver, now, &rdsiter); 1684 if (result != ISC_R_SUCCESS) 1685 goto cleanup_node; 1686 1687 for (result = dns_rdatasetiter_first(rdsiter); 1688 result == ISC_R_SUCCESS; 1689 result = dns_rdatasetiter_next(rdsiter)) 1690 { 1691 dns_rdataset_t rdataset; 1692 1693 dns_rdataset_init(&rdataset); 1694 dns_rdatasetiter_current(rdsiter, &rdataset); 1695 1696 for (result = dns_rdataset_first(&rdataset); 1697 result == ISC_R_SUCCESS; 1698 result = dns_rdataset_next(&rdataset)) 1699 { 1700 dns_rdata_t rdata = DNS_RDATA_INIT; 1701 dns_rdataset_current(&rdataset, &rdata); 1702 result = dns_difftuple_create(diff->mctx, op, name, 1703 rdataset.ttl, &rdata, 1704 &tuple); 1705 if (result != ISC_R_SUCCESS) { 1706 dns_rdataset_disassociate(&rdataset); 1707 goto cleanup_iterator; 1708 } 1709 dns_diff_append(diff, &tuple); 1710 } 1711 dns_rdataset_disassociate(&rdataset); 1712 if (result != ISC_R_NOMORE) 1713 goto cleanup_iterator; 1714 } 1715 if (result != ISC_R_NOMORE) 1716 goto cleanup_iterator; 1717 1718 result = ISC_R_SUCCESS; 1719 1720 cleanup_iterator: 1721 dns_rdatasetiter_destroy(&rdsiter); 1722 1723 cleanup_node: 1724 dns_db_detachnode(db, &node); 1725 1726 return (result); 1727} 1728 1729/* 1730 * Comparison function for use by dns_diff_subtract when sorting 1731 * the diffs to be subtracted. The sort keys are the rdata type 1732 * and the rdata itself. The owner name is ignored, because 1733 * it is known to be the same for all tuples. 1734 */ 1735static int 1736rdata_order(const void *av, const void *bv) { 1737 dns_difftuple_t const * const *ap = av; 1738 dns_difftuple_t const * const *bp = bv; 1739 dns_difftuple_t const *a = *ap; 1740 dns_difftuple_t const *b = *bp; 1741 int r; 1742 r = (b->rdata.type - a->rdata.type); 1743 if (r != 0) 1744 return (r); 1745 r = dns_rdata_compare(&a->rdata, &b->rdata); 1746 return (r); 1747} 1748 1749static isc_result_t 1750dns_diff_subtract(dns_diff_t diff[2], dns_diff_t *r) { 1751 isc_result_t result; 1752 dns_difftuple_t *p[2]; 1753 int i, t; 1754 isc_boolean_t append; 1755 1756 CHECK(dns_diff_sort(&diff[0], rdata_order)); 1757 CHECK(dns_diff_sort(&diff[1], rdata_order)); 1758 1759 for (;;) { 1760 p[0] = ISC_LIST_HEAD(diff[0].tuples); 1761 p[1] = ISC_LIST_HEAD(diff[1].tuples); 1762 if (p[0] == NULL && p[1] == NULL) 1763 break; 1764 1765 for (i = 0; i < 2; i++) 1766 if (p[!i] == NULL) { 1767 ISC_LIST_UNLINK(diff[i].tuples, p[i], link); 1768 ISC_LIST_APPEND(r->tuples, p[i], link); 1769 goto next; 1770 } 1771 t = rdata_order(&p[0], &p[1]); 1772 if (t < 0) { 1773 ISC_LIST_UNLINK(diff[0].tuples, p[0], link); 1774 ISC_LIST_APPEND(r->tuples, p[0], link); 1775 goto next; 1776 } 1777 if (t > 0) { 1778 ISC_LIST_UNLINK(diff[1].tuples, p[1], link); 1779 ISC_LIST_APPEND(r->tuples, p[1], link); 1780 goto next; 1781 } 1782 INSIST(t == 0); 1783 /* 1784 * Identical RRs in both databases; skip them both 1785 * if the ttl differs. 1786 */ 1787 append = ISC_TF(p[0]->ttl != p[1]->ttl); 1788 for (i = 0; i < 2; i++) { 1789 ISC_LIST_UNLINK(diff[i].tuples, p[i], link); 1790 if (append) { 1791 ISC_LIST_APPEND(r->tuples, p[i], link); 1792 } else { 1793 dns_difftuple_free(&p[i]); 1794 } 1795 } 1796 next: ; 1797 } 1798 result = ISC_R_SUCCESS; 1799 failure: 1800 return (result); 1801} 1802 1803/* 1804 * Compare the databases 'dba' and 'dbb' and generate a journal 1805 * entry containing the changes to make 'dba' from 'dbb' (note 1806 * the order). This journal entry will consist of a single, 1807 * possibly very large transaction. 1808 */ 1809 1810isc_result_t 1811dns_db_diff(isc_mem_t *mctx, 1812 dns_db_t *dba, dns_dbversion_t *dbvera, 1813 dns_db_t *dbb, dns_dbversion_t *dbverb, 1814 const char *journal_filename) 1815{ 1816 dns_db_t *db[2]; 1817 dns_dbversion_t *ver[2]; 1818 dns_dbiterator_t *dbit[2] = { NULL, NULL }; 1819 isc_boolean_t have[2] = { ISC_FALSE, ISC_FALSE }; 1820 dns_fixedname_t fixname[2]; 1821 isc_result_t result, itresult[2]; 1822 dns_diff_t diff[2], resultdiff; 1823 int i, t; 1824 dns_journal_t *journal = NULL; 1825 1826 db[0] = dba, db[1] = dbb; 1827 ver[0] = dbvera, ver[1] = dbverb; 1828 1829 dns_diff_init(mctx, &diff[0]); 1830 dns_diff_init(mctx, &diff[1]); 1831 dns_diff_init(mctx, &resultdiff); 1832 1833 dns_fixedname_init(&fixname[0]); 1834 dns_fixedname_init(&fixname[1]); 1835 1836 result = dns_journal_open(mctx, journal_filename, ISC_TRUE, &journal); 1837 if (result != ISC_R_SUCCESS) 1838 return (result); 1839 1840 result = dns_db_createiterator(db[0], ISC_FALSE, &dbit[0]); 1841 if (result != ISC_R_SUCCESS) 1842 goto cleanup_journal; 1843 result = dns_db_createiterator(db[1], ISC_FALSE, &dbit[1]); 1844 if (result != ISC_R_SUCCESS) 1845 goto cleanup_interator0; 1846 1847 itresult[0] = dns_dbiterator_first(dbit[0]); 1848 itresult[1] = dns_dbiterator_first(dbit[1]); 1849 1850 for (;;) { 1851 for (i = 0; i < 2; i++) { 1852 if (! have[i] && itresult[i] == ISC_R_SUCCESS) { 1853 CHECK(get_name_diff(db[i], ver[i], 0, dbit[i], 1854 dns_fixedname_name(&fixname[i]), 1855 i == 0 ? 1856 DNS_DIFFOP_ADD : 1857 DNS_DIFFOP_DEL, 1858 &diff[i])); 1859 itresult[i] = dns_dbiterator_next(dbit[i]); 1860 have[i] = ISC_TRUE; 1861 } 1862 } 1863 1864 if (! have[0] && ! have[1]) { 1865 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1866 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1867 break; 1868 } 1869 1870 for (i = 0; i < 2; i++) { 1871 if (! have[!i]) { 1872 ISC_LIST_APPENDLIST(resultdiff.tuples, 1873 diff[i].tuples, link); 1874 INSIST(ISC_LIST_EMPTY(diff[i].tuples)); 1875 have[i] = ISC_FALSE; 1876 goto next; 1877 } 1878 } 1879 1880 t = dns_name_compare(dns_fixedname_name(&fixname[0]), 1881 dns_fixedname_name(&fixname[1])); 1882 if (t < 0) { 1883 ISC_LIST_APPENDLIST(resultdiff.tuples, 1884 diff[0].tuples, link); 1885 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1886 have[0] = ISC_FALSE; 1887 continue; 1888 } 1889 if (t > 0) { 1890 ISC_LIST_APPENDLIST(resultdiff.tuples, 1891 diff[1].tuples, link); 1892 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1893 have[1] = ISC_FALSE; 1894 continue; 1895 } 1896 INSIST(t == 0); 1897 CHECK(dns_diff_subtract(diff, &resultdiff)); 1898 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1899 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1900 have[0] = have[1] = ISC_FALSE; 1901 next: ; 1902 } 1903 if (itresult[0] != ISC_R_NOMORE) 1904 FAIL(itresult[0]); 1905 if (itresult[1] != ISC_R_NOMORE) 1906 FAIL(itresult[1]); 1907 1908 if (ISC_LIST_EMPTY(resultdiff.tuples)) { 1909 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no changes"); 1910 } else { 1911 CHECK(dns_journal_write_transaction(journal, &resultdiff)); 1912 } 1913 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1914 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1915 1916 failure: 1917 dns_diff_clear(&resultdiff); 1918 dns_dbiterator_destroy(&dbit[1]); 1919 cleanup_interator0: 1920 dns_dbiterator_destroy(&dbit[0]); 1921 cleanup_journal: 1922 dns_journal_destroy(&journal); 1923 return (result); 1924} 1925 1926isc_result_t 1927dns_journal_compact(isc_mem_t *mctx, char *filename, isc_uint32_t serial, 1928 isc_uint32_t target_size) 1929{ 1930 unsigned int i; 1931 journal_pos_t best_guess; 1932 journal_pos_t current_pos; 1933 dns_journal_t *j = NULL; 1934 journal_rawheader_t rawheader; 1935 unsigned int copy_length; 1936 unsigned int len; 1937 char *buf = NULL; 1938 unsigned int size = 0; 1939 isc_result_t result; 1940 unsigned int indexend; 1941 1942 CHECK(journal_open(mctx, filename, ISC_TRUE, ISC_FALSE, &j)); 1943 1944 if (JOURNAL_EMPTY(&j->header)) { 1945 dns_journal_destroy(&j); 1946 return (ISC_R_SUCCESS); 1947 } 1948 1949 if (DNS_SERIAL_GT(j->header.begin.serial, serial) || 1950 DNS_SERIAL_GT(serial, j->header.end.serial)) { 1951 dns_journal_destroy(&j); 1952 return (ISC_R_RANGE); 1953 } 1954 1955 /* 1956 * Cope with very small target sizes. 1957 */ 1958 indexend = sizeof(journal_rawheader_t) + 1959 j->header.index_size * sizeof(journal_rawpos_t); 1960 if (target_size < indexend * 2) 1961 target_size = target_size/2 + indexend; 1962 1963 /* 1964 * See if there is any work to do. 1965 */ 1966 if ((isc_uint32_t) j->header.end.offset < target_size) { 1967 dns_journal_destroy(&j); 1968 return (ISC_R_SUCCESS); 1969 } 1970 1971 /* 1972 * Remove overhead so space test below can succeed. 1973 */ 1974 if (target_size >= indexend) 1975 target_size -= indexend; 1976 1977 /* 1978 * Find if we can create enough free space. 1979 */ 1980 best_guess = j->header.begin; 1981 for (i = 0; i < j->header.index_size; i++) { 1982 if (POS_VALID(j->index[i]) && 1983 DNS_SERIAL_GE(serial, j->index[i].serial) && 1984 ((isc_uint32_t)(j->header.end.offset - j->index[i].offset) 1985 >= target_size / 2) && 1986 j->index[i].offset > best_guess.offset) 1987 best_guess = j->index[i]; 1988 } 1989 1990 current_pos = best_guess; 1991 while (current_pos.serial != serial) { 1992 CHECK(journal_next(j, ¤t_pos)); 1993 if (current_pos.serial == j->header.end.serial) 1994 break; 1995 1996 if (DNS_SERIAL_GE(serial, current_pos.serial) && 1997 ((isc_uint32_t)(j->header.end.offset - current_pos.offset) 1998 >= (target_size / 2)) && 1999 current_pos.offset > best_guess.offset) 2000 best_guess = current_pos; 2001 else 2002 break; 2003 } 2004 2005 INSIST(best_guess.serial != j->header.end.serial); 2006 if (best_guess.serial != serial) 2007 CHECK(journal_next(j, &best_guess)); 2008 2009 /* 2010 * Enough space to proceed? 2011 */ 2012 if ((isc_uint32_t) (j->header.end.offset - best_guess.offset) > 2013 (isc_uint32_t) (best_guess.offset - indexend)) { 2014 dns_journal_destroy(&j); 2015 return (ISC_R_NOSPACE); 2016 } 2017 2018 copy_length = j->header.end.offset - best_guess.offset; 2019 2020 /* 2021 * Invalidate entire index, will be rebuilt at end. 2022 */ 2023 for (i = 0; i < j->header.index_size; i++) { 2024 if (POS_VALID(j->index[i])) 2025 POS_INVALIDATE(j->index[i]); 2026 } 2027 2028 /* 2029 * Convert the index into on-disk format and write 2030 * it to disk. 2031 */ 2032 CHECK(index_to_disk(j)); 2033 CHECK(journal_fsync(j)); 2034 2035 /* 2036 * Update the journal header. 2037 */ 2038 if (copy_length == 0) { 2039 j->header.begin.serial = 0; 2040 j->header.end.serial = 0; 2041 j->header.begin.offset = 0; 2042 j->header.end.offset = 0; 2043 } else { 2044 j->header.begin = best_guess; 2045 } 2046 journal_header_encode(&j->header, &rawheader); 2047 CHECK(journal_seek(j, 0)); 2048 CHECK(journal_write(j, &rawheader, sizeof(rawheader))); 2049 CHECK(journal_fsync(j)); 2050 2051 if (copy_length != 0) { 2052 /* 2053 * Copy best_guess to end into space just freed. 2054 */ 2055 size = 64*1024; 2056 if (copy_length < size) 2057 size = copy_length; 2058 buf = isc_mem_get(mctx, size); 2059 if (buf == NULL) { 2060 result = ISC_R_NOMEMORY; 2061 goto failure; 2062 } 2063 2064 for (i = 0; i < copy_length; i += size) { 2065 len = (copy_length - i) > size ? size : 2066 (copy_length - i); 2067 CHECK(journal_seek(j, best_guess.offset + i)); 2068 CHECK(journal_read(j, buf, len)); 2069 CHECK(journal_seek(j, indexend + i)); 2070 CHECK(journal_write(j, buf, len)); 2071 } 2072 2073 CHECK(journal_fsync(j)); 2074 2075 /* 2076 * Compute new header. 2077 */ 2078 j->header.begin.offset = indexend; 2079 j->header.end.offset = indexend + copy_length; 2080 /* 2081 * Update the journal header. 2082 */ 2083 journal_header_encode(&j->header, &rawheader); 2084 CHECK(journal_seek(j, 0)); 2085 CHECK(journal_write(j, &rawheader, sizeof(rawheader))); 2086 CHECK(journal_fsync(j)); 2087 2088 /* 2089 * Build new index. 2090 */ 2091 current_pos = j->header.begin; 2092 while (current_pos.serial != j->header.end.serial) { 2093 index_add(j, ¤t_pos); 2094 CHECK(journal_next(j, ¤t_pos)); 2095 } 2096 2097 /* 2098 * Write index. 2099 */ 2100 CHECK(index_to_disk(j)); 2101 CHECK(journal_fsync(j)); 2102 2103 indexend = j->header.end.offset; 2104 } 2105 dns_journal_destroy(&j); 2106 (void)isc_file_truncate(filename, (isc_offset_t)indexend); 2107 result = ISC_R_SUCCESS; 2108 2109 failure: 2110 if (buf != NULL) 2111 isc_mem_put(mctx, buf, size); 2112 if (j != NULL) 2113 dns_journal_destroy(&j); 2114 return (result); 2115} 2116 2117static isc_result_t 2118index_to_disk(dns_journal_t *j) { 2119 isc_result_t result = ISC_R_SUCCESS; 2120 2121 if (j->header.index_size != 0) { 2122 unsigned int i; 2123 unsigned char *p; 2124 unsigned int rawbytes; 2125 2126 rawbytes = j->header.index_size * sizeof(journal_rawpos_t); 2127 2128 p = j->rawindex; 2129 for (i = 0; i < j->header.index_size; i++) { 2130 encode_uint32(j->index[i].serial, p); 2131 p += 4; 2132 encode_uint32(j->index[i].offset, p); 2133 p += 4; 2134 } 2135 INSIST(p == j->rawindex + rawbytes); 2136 2137 CHECK(journal_seek(j, sizeof(journal_rawheader_t))); 2138 CHECK(journal_write(j, j->rawindex, rawbytes)); 2139 } 2140failure: 2141 return (result); 2142} 2143