journal.c revision 170222
1/* 2 * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") 3 * Copyright (C) 1999-2002 Internet Software Consortium. 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 * PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18/* $Id: journal.c,v 1.86.18.8 2005/11/03 23:02:23 marka Exp $ */ 19 20#include <config.h> 21 22#include <stdlib.h> 23#include <unistd.h> 24 25#include <isc/file.h> 26#include <isc/mem.h> 27#include <isc/stdio.h> 28#include <isc/string.h> 29#include <isc/util.h> 30 31#include <dns/compress.h> 32#include <dns/db.h> 33#include <dns/dbiterator.h> 34#include <dns/diff.h> 35#include <dns/fixedname.h> 36#include <dns/journal.h> 37#include <dns/log.h> 38#include <dns/rdataset.h> 39#include <dns/rdatasetiter.h> 40#include <dns/result.h> 41#include <dns/soa.h> 42 43/*! \file 44 * \brief Journalling. 45 * 46 * A journal file consists of 47 * 48 * \li A fixed-size header of type journal_rawheader_t. 49 * 50 * \li The index. This is an unordered array of index entries 51 * of type journal_rawpos_t giving the locations 52 * of some arbitrary subset of the journal's addressable 53 * transactions. The index entries are used as hints to 54 * speed up the process of locating a transaction with a given 55 * serial number. Unused index entries have an "offset" 56 * field of zero. The size of the index can vary between 57 * journal files, but does not change during the lifetime 58 * of a file. The size can be zero. 59 * 60 * \li The journal data. This consists of one or more transactions. 61 * Each transaction begins with a transaction header of type 62 * journal_rawxhdr_t. The transaction header is followed by a 63 * sequence of RRs, similar in structure to an IXFR difference 64 * sequence (RFC1995). That is, the pre-transaction SOA, 65 * zero or more other deleted RRs, the post-transaction SOA, 66 * and zero or more other added RRs. Unlike in IXFR, each RR 67 * is prefixed with a 32-bit length. 68 * 69 * The journal data part grows as new transactions are 70 * appended to the file. Only those transactions 71 * whose serial number is current-(2^31-1) to current 72 * are considered "addressable" and may be pointed 73 * to from the header or index. They may be preceded 74 * by old transactions that are no longer addressable, 75 * and they may be followed by transactions that were 76 * appended to the journal but never committed by updating 77 * the "end" position in the header. The latter will 78 * be overwritten when new transactions are added. 79 */ 80/*% 81 * When true, accept IXFR difference sequences where the 82 * SOA serial number does not change (BIND 8 sends such 83 * sequences). 84 */ 85static isc_boolean_t bind8_compat = ISC_TRUE; /* XXX config */ 86 87/**************************************************************************/ 88/* 89 * Miscellaneous utilities. 90 */ 91 92#define JOURNAL_COMMON_LOGARGS \ 93 dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL 94 95#define JOURNAL_DEBUG_LOGARGS(n) \ 96 JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n) 97 98/*% 99 * It would be non-sensical (or at least obtuse) to use FAIL() with an 100 * ISC_R_SUCCESS code, but the test is there to keep the Solaris compiler 101 * from complaining about "end-of-loop code not reached". 102 */ 103#define FAIL(code) \ 104 do { result = (code); \ 105 if (result != ISC_R_SUCCESS) goto failure; \ 106 } while (0) 107 108#define CHECK(op) \ 109 do { result = (op); \ 110 if (result != ISC_R_SUCCESS) goto failure; \ 111 } while (0) 112 113static isc_result_t index_to_disk(dns_journal_t *); 114 115static inline isc_uint32_t 116decode_uint32(unsigned char *p) { 117 return ((p[0] << 24) + 118 (p[1] << 16) + 119 (p[2] << 8) + 120 (p[3] << 0)); 121} 122 123static inline void 124encode_uint32(isc_uint32_t val, unsigned char *p) { 125 p[0] = (isc_uint8_t)(val >> 24); 126 p[1] = (isc_uint8_t)(val >> 16); 127 p[2] = (isc_uint8_t)(val >> 8); 128 p[3] = (isc_uint8_t)(val >> 0); 129} 130 131isc_result_t 132dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx, 133 dns_diffop_t op, dns_difftuple_t **tp) 134{ 135 isc_result_t result; 136 dns_dbnode_t *node; 137 dns_rdataset_t rdataset; 138 dns_rdata_t rdata = DNS_RDATA_INIT; 139 dns_name_t *zonename; 140 141 zonename = dns_db_origin(db); 142 143 node = NULL; 144 result = dns_db_findnode(db, zonename, ISC_FALSE, &node); 145 if (result != ISC_R_SUCCESS) 146 goto nonode; 147 148 dns_rdataset_init(&rdataset); 149 result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0, 150 (isc_stdtime_t)0, &rdataset, NULL); 151 if (result != ISC_R_SUCCESS) 152 goto freenode; 153 154 result = dns_rdataset_first(&rdataset); 155 if (result != ISC_R_SUCCESS) 156 goto freenode; 157 158 dns_rdataset_current(&rdataset, &rdata); 159 160 result = dns_difftuple_create(mctx, op, zonename, rdataset.ttl, 161 &rdata, tp); 162 163 dns_rdataset_disassociate(&rdataset); 164 dns_db_detachnode(db, &node); 165 return (ISC_R_SUCCESS); 166 167 freenode: 168 dns_db_detachnode(db, &node); 169 nonode: 170 UNEXPECTED_ERROR(__FILE__, __LINE__, "missing SOA"); 171 return (result); 172} 173 174/* Journalling */ 175 176/*% 177 * On-disk representation of a "pointer" to a journal entry. 178 * These are used in the journal header to locate the beginning 179 * and end of the journal, and in the journal index to locate 180 * other transactions. 181 */ 182typedef struct { 183 unsigned char serial[4]; /*%< SOA serial before update. */ 184 /* 185 * XXXRTH Should offset be 8 bytes? 186 * XXXDCL ... probably, since isc_offset_t is 8 bytes on many OSs. 187 * XXXAG ... but we will not be able to seek >2G anyway on many 188 * platforms as long as we are using fseek() rather 189 * than lseek(). 190 */ 191 unsigned char offset[4]; /*%< Offset from beginning of file. */ 192} journal_rawpos_t; 193 194 195/*% 196 * The header is of a fixed size, with some spare room for future 197 * extensions. 198 */ 199#define JOURNAL_HEADER_SIZE 64 /* Bytes. */ 200 201/*% 202 * The on-disk representation of the journal header. 203 * All numbers are stored in big-endian order. 204 */ 205typedef union { 206 struct { 207 /*% File format version ID. */ 208 unsigned char format[16]; 209 /*% Position of the first addressable transaction */ 210 journal_rawpos_t begin; 211 /*% Position of the next (yet nonexistent) transaction. */ 212 journal_rawpos_t end; 213 /*% Number of index entries following the header. */ 214 unsigned char index_size[4]; 215 } h; 216 /* Pad the header to a fixed size. */ 217 unsigned char pad[JOURNAL_HEADER_SIZE]; 218} journal_rawheader_t; 219 220/*% 221 * The on-disk representation of the transaction header. 222 * There is one of these at the beginning of each transaction. 223 */ 224typedef struct { 225 unsigned char size[4]; /*%< In bytes, excluding header. */ 226 unsigned char serial0[4]; /*%< SOA serial before update. */ 227 unsigned char serial1[4]; /*%< SOA serial after update. */ 228} journal_rawxhdr_t; 229 230/*% 231 * The on-disk representation of the RR header. 232 * There is one of these at the beginning of each RR. 233 */ 234typedef struct { 235 unsigned char size[4]; /*%< In bytes, excluding header. */ 236} journal_rawrrhdr_t; 237 238/*% 239 * The in-core representation of the journal header. 240 */ 241typedef struct { 242 isc_uint32_t serial; 243 isc_offset_t offset; 244} journal_pos_t; 245 246#define POS_VALID(pos) ((pos).offset != 0) 247#define POS_INVALIDATE(pos) ((pos).offset = 0, (pos).serial = 0) 248 249typedef struct { 250 unsigned char format[16]; 251 journal_pos_t begin; 252 journal_pos_t end; 253 isc_uint32_t index_size; 254} journal_header_t; 255 256/*% 257 * The in-core representation of the transaction header. 258 */ 259 260typedef struct { 261 isc_uint32_t size; 262 isc_uint32_t serial0; 263 isc_uint32_t serial1; 264} journal_xhdr_t; 265 266/*% 267 * The in-core representation of the RR header. 268 */ 269typedef struct { 270 isc_uint32_t size; 271} journal_rrhdr_t; 272 273 274/*% 275 * Initial contents to store in the header of a newly created 276 * journal file. 277 * 278 * The header starts with the magic string ";BIND LOG V9\n" 279 * to identify the file as a BIND 9 journal file. An ASCII 280 * identification string is used rather than a binary magic 281 * number to be consistent with BIND 8 (BIND 8 journal files 282 * are ASCII text files). 283 */ 284 285static journal_header_t 286initial_journal_header = { ";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0 }; 287 288#define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset) 289 290typedef enum { 291 JOURNAL_STATE_INVALID, 292 JOURNAL_STATE_READ, 293 JOURNAL_STATE_WRITE, 294 JOURNAL_STATE_TRANSACTION 295} journal_state_t; 296 297struct dns_journal { 298 unsigned int magic; /*%< JOUR */ 299 isc_mem_t *mctx; /*%< Memory context */ 300 journal_state_t state; 301 const char *filename; /*%< Journal file name */ 302 FILE * fp; /*%< File handle */ 303 isc_offset_t offset; /*%< Current file offset */ 304 journal_header_t header; /*%< In-core journal header */ 305 unsigned char *rawindex; /*%< In-core buffer for journal index in on-disk format */ 306 journal_pos_t *index; /*%< In-core journal index */ 307 308 /*% Current transaction state (when writing). */ 309 struct { 310 unsigned int n_soa; /*%< Number of SOAs seen */ 311 journal_pos_t pos[2]; /*%< Begin/end position */ 312 } x; 313 314 /*% Iteration state (when reading). */ 315 struct { 316 /* These define the part of the journal we iterate over. */ 317 journal_pos_t bpos; /*%< Position before first, */ 318 journal_pos_t epos; /*%< and after last transaction */ 319 /* The rest is iterator state. */ 320 isc_uint32_t current_serial; /*%< Current SOA serial */ 321 isc_buffer_t source; /*%< Data from disk */ 322 isc_buffer_t target; /*%< Data from _fromwire check */ 323 dns_decompress_t dctx; /*%< Dummy decompression ctx */ 324 dns_name_t name; /*%< Current domain name */ 325 dns_rdata_t rdata; /*%< Current rdata */ 326 isc_uint32_t ttl; /*%< Current TTL */ 327 unsigned int xsize; /*%< Size of transaction data */ 328 unsigned int xpos; /*%< Current position in it */ 329 isc_result_t result; /*%< Result of last call */ 330 } it; 331}; 332 333#define DNS_JOURNAL_MAGIC ISC_MAGIC('J', 'O', 'U', 'R') 334#define DNS_JOURNAL_VALID(t) ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC) 335 336static void 337journal_pos_decode(journal_rawpos_t *raw, journal_pos_t *cooked) { 338 cooked->serial = decode_uint32(raw->serial); 339 cooked->offset = decode_uint32(raw->offset); 340} 341 342static void 343journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) { 344 encode_uint32(cooked->serial, raw->serial); 345 encode_uint32(cooked->offset, raw->offset); 346} 347 348static void 349journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) { 350 INSIST(sizeof(cooked->format) == sizeof(raw->h.format)); 351 memcpy(cooked->format, raw->h.format, sizeof(cooked->format)); 352 journal_pos_decode(&raw->h.begin, &cooked->begin); 353 journal_pos_decode(&raw->h.end, &cooked->end); 354 cooked->index_size = decode_uint32(raw->h.index_size); 355} 356 357static void 358journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) { 359 INSIST(sizeof(cooked->format) == sizeof(raw->h.format)); 360 memset(raw->pad, 0, sizeof(raw->pad)); 361 memcpy(raw->h.format, cooked->format, sizeof(raw->h.format)); 362 journal_pos_encode(&raw->h.begin, &cooked->begin); 363 journal_pos_encode(&raw->h.end, &cooked->end); 364 encode_uint32(cooked->index_size, raw->h.index_size); 365} 366 367/* 368 * Journal file I/O subroutines, with error checking and reporting. 369 */ 370static isc_result_t 371journal_seek(dns_journal_t *j, isc_uint32_t offset) { 372 isc_result_t result; 373 result = isc_stdio_seek(j->fp, (long)offset, SEEK_SET); 374 if (result != ISC_R_SUCCESS) { 375 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 376 "%s: seek: %s", j->filename, 377 isc_result_totext(result)); 378 return (ISC_R_UNEXPECTED); 379 } 380 j->offset = offset; 381 return (ISC_R_SUCCESS); 382} 383 384static isc_result_t 385journal_read(dns_journal_t *j, void *mem, size_t nbytes) { 386 isc_result_t result; 387 388 result = isc_stdio_read(mem, 1, nbytes, j->fp, NULL); 389 if (result != ISC_R_SUCCESS) { 390 if (result == ISC_R_EOF) 391 return (ISC_R_NOMORE); 392 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 393 "%s: read: %s", 394 j->filename, isc_result_totext(result)); 395 return (ISC_R_UNEXPECTED); 396 } 397 j->offset += nbytes; 398 return (ISC_R_SUCCESS); 399} 400 401static isc_result_t 402journal_write(dns_journal_t *j, void *mem, size_t nbytes) { 403 isc_result_t result; 404 405 result = isc_stdio_write(mem, 1, nbytes, j->fp, NULL); 406 if (result != ISC_R_SUCCESS) { 407 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 408 "%s: write: %s", 409 j->filename, isc_result_totext(result)); 410 return (ISC_R_UNEXPECTED); 411 } 412 j->offset += nbytes; 413 return (ISC_R_SUCCESS); 414} 415 416static isc_result_t 417journal_fsync(dns_journal_t *j) { 418 isc_result_t result; 419 result = isc_stdio_flush(j->fp); 420 if (result != ISC_R_SUCCESS) { 421 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 422 "%s: flush: %s", 423 j->filename, isc_result_totext(result)); 424 return (ISC_R_UNEXPECTED); 425 } 426 result = isc_stdio_sync(j->fp); 427 if (result != ISC_R_SUCCESS) { 428 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 429 "%s: fsync: %s", 430 j->filename, isc_result_totext(result)); 431 return (ISC_R_UNEXPECTED); 432 } 433 return (ISC_R_SUCCESS); 434} 435 436/* 437 * Read/write a transaction header at the current file position. 438 */ 439 440static isc_result_t 441journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) { 442 journal_rawxhdr_t raw; 443 isc_result_t result; 444 result = journal_read(j, &raw, sizeof(raw)); 445 if (result != ISC_R_SUCCESS) 446 return (result); 447 xhdr->size = decode_uint32(raw.size); 448 xhdr->serial0 = decode_uint32(raw.serial0); 449 xhdr->serial1 = decode_uint32(raw.serial1); 450 return (ISC_R_SUCCESS); 451} 452 453static isc_result_t 454journal_write_xhdr(dns_journal_t *j, isc_uint32_t size, 455 isc_uint32_t serial0, isc_uint32_t serial1) 456{ 457 journal_rawxhdr_t raw; 458 encode_uint32(size, raw.size); 459 encode_uint32(serial0, raw.serial0); 460 encode_uint32(serial1, raw.serial1); 461 return (journal_write(j, &raw, sizeof(raw))); 462} 463 464 465/* 466 * Read an RR header at the current file position. 467 */ 468 469static isc_result_t 470journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) { 471 journal_rawrrhdr_t raw; 472 isc_result_t result; 473 result = journal_read(j, &raw, sizeof(raw)); 474 if (result != ISC_R_SUCCESS) 475 return (result); 476 rrhdr->size = decode_uint32(raw.size); 477 return (ISC_R_SUCCESS); 478} 479 480static isc_result_t 481journal_file_create(isc_mem_t *mctx, const char *filename) { 482 FILE *fp = NULL; 483 isc_result_t result; 484 journal_header_t header; 485 journal_rawheader_t rawheader; 486 int index_size = 56; /* XXX configurable */ 487 int size; 488 void *mem; /* Memory for temporary index image. */ 489 490 INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE); 491 492 result = isc_stdio_open(filename, "wb", &fp); 493 if (result != ISC_R_SUCCESS) { 494 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 495 "%s: create: %s", 496 filename, isc_result_totext(result)); 497 return (ISC_R_UNEXPECTED); 498 } 499 500 header = initial_journal_header; 501 header.index_size = index_size; 502 journal_header_encode(&header, &rawheader); 503 504 size = sizeof(journal_rawheader_t) + 505 index_size * sizeof(journal_rawpos_t); 506 507 mem = isc_mem_get(mctx, size); 508 if (mem == NULL) { 509 (void)isc_stdio_close(fp); 510 (void)isc_file_remove(filename); 511 return (ISC_R_NOMEMORY); 512 } 513 memset(mem, 0, size); 514 memcpy(mem, &rawheader, sizeof(rawheader)); 515 516 result = isc_stdio_write(mem, 1, (size_t) size, fp, NULL); 517 if (result != ISC_R_SUCCESS) { 518 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 519 "%s: write: %s", 520 filename, isc_result_totext(result)); 521 (void)isc_stdio_close(fp); 522 (void)isc_file_remove(filename); 523 isc_mem_put(mctx, mem, size); 524 return (ISC_R_UNEXPECTED); 525 } 526 isc_mem_put(mctx, mem, size); 527 528 result = isc_stdio_close(fp); 529 if (result != ISC_R_SUCCESS) { 530 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 531 "%s: close: %s", 532 filename, isc_result_totext(result)); 533 (void)isc_file_remove(filename); 534 return (ISC_R_UNEXPECTED); 535 } 536 537 return (ISC_R_SUCCESS); 538} 539 540static isc_result_t 541journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t write, 542 isc_boolean_t create, dns_journal_t **journalp) { 543 FILE *fp = NULL; 544 isc_result_t result; 545 journal_rawheader_t rawheader; 546 dns_journal_t *j; 547 548 INSIST(journalp != NULL && *journalp == NULL); 549 j = isc_mem_get(mctx, sizeof(*j)); 550 if (j == NULL) 551 return (ISC_R_NOMEMORY); 552 553 j->mctx = mctx; 554 j->state = JOURNAL_STATE_INVALID; 555 j->fp = NULL; 556 j->filename = filename; 557 j->index = NULL; 558 j->rawindex = NULL; 559 560 result = isc_stdio_open(j->filename, write ? "rb+" : "rb", &fp); 561 562 if (result == ISC_R_FILENOTFOUND) { 563 if (create) { 564 isc_log_write(JOURNAL_COMMON_LOGARGS, 565 ISC_LOG_INFO, 566 "journal file %s does not exist, " 567 "creating it", 568 j->filename); 569 CHECK(journal_file_create(mctx, filename)); 570 /* 571 * Retry. 572 */ 573 result = isc_stdio_open(j->filename, "rb+", &fp); 574 } else { 575 FAIL(ISC_R_NOTFOUND); 576 } 577 } 578 if (result != ISC_R_SUCCESS) { 579 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 580 "%s: open: %s", 581 j->filename, isc_result_totext(result)); 582 FAIL(ISC_R_UNEXPECTED); 583 } 584 585 j->fp = fp; 586 587 /* 588 * Set magic early so that seek/read can succeed. 589 */ 590 j->magic = DNS_JOURNAL_MAGIC; 591 592 CHECK(journal_seek(j, 0)); 593 CHECK(journal_read(j, &rawheader, sizeof(rawheader))); 594 595 if (memcmp(rawheader.h.format, initial_journal_header.format, 596 sizeof(initial_journal_header.format)) != 0) { 597 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 598 "%s: journal format not recognized", 599 j->filename); 600 FAIL(ISC_R_UNEXPECTED); 601 } 602 journal_header_decode(&rawheader, &j->header); 603 604 /* 605 * If there is an index, read the raw index into a dynamically 606 * allocated buffer and then convert it into a cooked index. 607 */ 608 if (j->header.index_size != 0) { 609 unsigned int i; 610 unsigned int rawbytes; 611 unsigned char *p; 612 613 rawbytes = j->header.index_size * sizeof(journal_rawpos_t); 614 j->rawindex = isc_mem_get(mctx, rawbytes); 615 if (j->rawindex == NULL) 616 FAIL(ISC_R_NOMEMORY); 617 618 CHECK(journal_read(j, j->rawindex, rawbytes)); 619 620 j->index = isc_mem_get(mctx, j->header.index_size * 621 sizeof(journal_pos_t)); 622 if (j->index == NULL) 623 FAIL(ISC_R_NOMEMORY); 624 625 p = j->rawindex; 626 for (i = 0; i < j->header.index_size; i++) { 627 j->index[i].serial = decode_uint32(p); 628 p += 4; 629 j->index[i].offset = decode_uint32(p); 630 p += 4; 631 } 632 INSIST(p == j->rawindex + rawbytes); 633 } 634 j->offset = -1; /* Invalid, must seek explicitly. */ 635 636 /* 637 * Initialize the iterator. 638 */ 639 dns_name_init(&j->it.name, NULL); 640 dns_rdata_init(&j->it.rdata); 641 642 /* 643 * Set up empty initial buffers for uncheched and checked 644 * wire format RR data. They will be reallocated 645 * later. 646 */ 647 isc_buffer_init(&j->it.source, NULL, 0); 648 isc_buffer_init(&j->it.target, NULL, 0); 649 dns_decompress_init(&j->it.dctx, -1, DNS_DECOMPRESS_NONE); 650 651 j->state = 652 write ? JOURNAL_STATE_WRITE : JOURNAL_STATE_READ; 653 654 *journalp = j; 655 return (ISC_R_SUCCESS); 656 657 failure: 658 j->magic = 0; 659 if (j->index != NULL) { 660 isc_mem_put(j->mctx, j->index, j->header.index_size * 661 sizeof(journal_rawpos_t)); 662 j->index = NULL; 663 } 664 if (j->fp != NULL) 665 (void)isc_stdio_close(j->fp); 666 isc_mem_put(j->mctx, j, sizeof(*j)); 667 return (result); 668} 669 670isc_result_t 671dns_journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t write, 672 dns_journal_t **journalp) { 673 return (journal_open(mctx, filename, write, write, journalp)); 674} 675 676/* 677 * A comparison function defining the sorting order for 678 * entries in the IXFR-style journal file. 679 * 680 * The IXFR format requires that deletions are sorted before 681 * additions, and within either one, SOA records are sorted 682 * before others. 683 * 684 * Also sort the non-SOA records by type as a courtesy to the 685 * server receiving the IXFR - it may help reduce the amount of 686 * rdataset merging it has to do. 687 */ 688static int 689ixfr_order(const void *av, const void *bv) { 690 dns_difftuple_t const * const *ap = av; 691 dns_difftuple_t const * const *bp = bv; 692 dns_difftuple_t const *a = *ap; 693 dns_difftuple_t const *b = *bp; 694 int r; 695 696 r = (b->op == DNS_DIFFOP_DEL) - (a->op == DNS_DIFFOP_DEL); 697 if (r != 0) 698 return (r); 699 700 r = (b->rdata.type == dns_rdatatype_soa) - 701 (a->rdata.type == dns_rdatatype_soa); 702 if (r != 0) 703 return (r); 704 705 r = (a->rdata.type - b->rdata.type); 706 return (r); 707} 708 709/* 710 * Advance '*pos' to the next journal transaction. 711 * 712 * Requires: 713 * *pos refers to a valid journal transaction. 714 * 715 * Ensures: 716 * When ISC_R_SUCCESS is returned, 717 * *pos refers to the next journal transaction. 718 * 719 * Returns one of: 720 * 721 * ISC_R_SUCCESS 722 * ISC_R_NOMORE *pos pointed at the last transaction 723 * Other results due to file errors are possible. 724 */ 725static isc_result_t 726journal_next(dns_journal_t *j, journal_pos_t *pos) { 727 isc_result_t result; 728 journal_xhdr_t xhdr; 729 REQUIRE(DNS_JOURNAL_VALID(j)); 730 731 result = journal_seek(j, pos->offset); 732 if (result != ISC_R_SUCCESS) 733 return (result); 734 735 if (pos->serial == j->header.end.serial) 736 return (ISC_R_NOMORE); 737 /* 738 * Read the header of the current transaction. 739 * This will return ISC_R_NOMORE if we are at EOF. 740 */ 741 result = journal_read_xhdr(j, &xhdr); 742 if (result != ISC_R_SUCCESS) 743 return (result); 744 745 /* 746 * Check serial number consistency. 747 */ 748 if (xhdr.serial0 != pos->serial) { 749 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 750 "%s: journal file corrupt: " 751 "expected serial %u, got %u", 752 j->filename, pos->serial, xhdr.serial0); 753 return (ISC_R_UNEXPECTED); 754 } 755 756 /* 757 * Check for offset wraparound. 758 */ 759 if ((isc_offset_t)(pos->offset + sizeof(journal_rawxhdr_t) + xhdr.size) 760 < pos->offset) { 761 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 762 "%s: offset too large", j->filename); 763 return (ISC_R_UNEXPECTED); 764 } 765 766 pos->offset += sizeof(journal_rawxhdr_t) + xhdr.size; 767 pos->serial = xhdr.serial1; 768 return (ISC_R_SUCCESS); 769} 770 771/* 772 * If the index of the journal 'j' contains an entry "better" 773 * than '*best_guess', replace '*best_guess' with it. 774 * 775 * "Better" means having a serial number closer to 'serial' 776 * but not greater than 'serial'. 777 */ 778static void 779index_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *best_guess) { 780 unsigned int i; 781 if (j->index == NULL) 782 return; 783 for (i = 0; i < j->header.index_size; i++) { 784 if (POS_VALID(j->index[i]) && 785 DNS_SERIAL_GE(serial, j->index[i].serial) && 786 DNS_SERIAL_GT(j->index[i].serial, best_guess->serial)) 787 *best_guess = j->index[i]; 788 } 789} 790 791/* 792 * Add a new index entry. If there is no room, make room by removing 793 * the odd-numbered entries and compacting the others into the first 794 * half of the index. This decimates old index entries exponentially 795 * over time, so that the index always contains a much larger fraction 796 * of recent serial numbers than of old ones. This is deliberate - 797 * most index searches are for outgoing IXFR, and IXFR tends to request 798 * recent versions more often than old ones. 799 */ 800static void 801index_add(dns_journal_t *j, journal_pos_t *pos) { 802 unsigned int i; 803 if (j->index == NULL) 804 return; 805 /* 806 * Search for a vacant position. 807 */ 808 for (i = 0; i < j->header.index_size; i++) { 809 if (! POS_VALID(j->index[i])) 810 break; 811 } 812 if (i == j->header.index_size) { 813 unsigned int k = 0; 814 /* 815 * Found no vacant position. Make some room. 816 */ 817 for (i = 0; i < j->header.index_size; i += 2) { 818 j->index[k++] = j->index[i]; 819 } 820 i = k; /* 'i' identifies the first vacant position. */ 821 while (k < j->header.index_size) { 822 POS_INVALIDATE(j->index[k]); 823 k++; 824 } 825 } 826 INSIST(i < j->header.index_size); 827 INSIST(! POS_VALID(j->index[i])); 828 829 /* 830 * Store the new index entry. 831 */ 832 j->index[i] = *pos; 833} 834 835/* 836 * Invalidate any existing index entries that could become 837 * ambiguous when a new transaction with number 'serial' is added. 838 */ 839static void 840index_invalidate(dns_journal_t *j, isc_uint32_t serial) { 841 unsigned int i; 842 if (j->index == NULL) 843 return; 844 for (i = 0; i < j->header.index_size; i++) { 845 if (! DNS_SERIAL_GT(serial, j->index[i].serial)) 846 POS_INVALIDATE(j->index[i]); 847 } 848} 849 850/* 851 * Try to find a transaction with initial serial number 'serial' 852 * in the journal 'j'. 853 * 854 * If found, store its position at '*pos' and return ISC_R_SUCCESS. 855 * 856 * If 'serial' is current (= the ending serial number of the 857 * last transaction in the journal), set '*pos' to 858 * the position immediately following the last transaction and 859 * return ISC_R_SUCCESS. 860 * 861 * If 'serial' is within the range of addressable serial numbers 862 * covered by the journal but that particular serial number is missing 863 * (from the journal, not just from the index), return ISC_R_NOTFOUND. 864 * 865 * If 'serial' is outside the range of addressable serial numbers 866 * covered by the journal, return ISC_R_RANGE. 867 * 868 */ 869static isc_result_t 870journal_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *pos) { 871 isc_result_t result; 872 journal_pos_t current_pos; 873 REQUIRE(DNS_JOURNAL_VALID(j)); 874 875 if (DNS_SERIAL_GT(j->header.begin.serial, serial)) 876 return (ISC_R_RANGE); 877 if (DNS_SERIAL_GT(serial, j->header.end.serial)) 878 return (ISC_R_RANGE); 879 if (serial == j->header.end.serial) { 880 *pos = j->header.end; 881 return (ISC_R_SUCCESS); 882 } 883 884 current_pos = j->header.begin; 885 index_find(j, serial, ¤t_pos); 886 887 while (current_pos.serial != serial) { 888 if (DNS_SERIAL_GT(current_pos.serial, serial)) 889 return (ISC_R_NOTFOUND); 890 result = journal_next(j, ¤t_pos); 891 if (result != ISC_R_SUCCESS) 892 return (result); 893 } 894 *pos = current_pos; 895 return (ISC_R_SUCCESS); 896} 897 898isc_result_t 899dns_journal_begin_transaction(dns_journal_t *j) { 900 isc_uint32_t offset; 901 isc_result_t result; 902 journal_rawxhdr_t hdr; 903 904 REQUIRE(DNS_JOURNAL_VALID(j)); 905 REQUIRE(j->state == JOURNAL_STATE_WRITE); 906 907 /* 908 * Find the file offset where the new transaction should 909 * be written, and seek there. 910 */ 911 if (JOURNAL_EMPTY(&j->header)) { 912 offset = sizeof(journal_rawheader_t) + 913 j->header.index_size * sizeof(journal_rawpos_t); 914 } else { 915 offset = j->header.end.offset; 916 } 917 j->x.pos[0].offset = offset; 918 j->x.pos[1].offset = offset; /* Initial value, will be incremented. */ 919 j->x.n_soa = 0; 920 921 CHECK(journal_seek(j, offset)); 922 923 /* 924 * Write a dummy transaction header of all zeroes to reserve 925 * space. It will be filled in when the transaction is 926 * finished. 927 */ 928 memset(&hdr, 0, sizeof(hdr)); 929 CHECK(journal_write(j, &hdr, sizeof(hdr))); 930 j->x.pos[1].offset = j->offset; 931 932 j->state = JOURNAL_STATE_TRANSACTION; 933 result = ISC_R_SUCCESS; 934 failure: 935 return (result); 936} 937 938isc_result_t 939dns_journal_writediff(dns_journal_t *j, dns_diff_t *diff) { 940 dns_difftuple_t *t; 941 isc_buffer_t buffer; 942 void *mem = NULL; 943 unsigned int size; 944 isc_result_t result; 945 isc_region_t used; 946 947 REQUIRE(DNS_DIFF_VALID(diff)); 948 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION); 949 950 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "writing to journal"); 951 (void)dns_diff_print(diff, NULL); 952 953 /* 954 * Pass 1: determine the buffer size needed, and 955 * keep track of SOA serial numbers. 956 */ 957 size = 0; 958 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL; 959 t = ISC_LIST_NEXT(t, link)) 960 { 961 if (t->rdata.type == dns_rdatatype_soa) { 962 if (j->x.n_soa < 2) 963 j->x.pos[j->x.n_soa].serial = 964 dns_soa_getserial(&t->rdata); 965 j->x.n_soa++; 966 } 967 size += sizeof(journal_rawrrhdr_t); 968 size += t->name.length; /* XXX should have access macro? */ 969 size += 10; 970 size += t->rdata.length; 971 } 972 973 mem = isc_mem_get(j->mctx, size); 974 if (mem == NULL) 975 return (ISC_R_NOMEMORY); 976 977 isc_buffer_init(&buffer, mem, size); 978 979 /* 980 * Pass 2. Write RRs to buffer. 981 */ 982 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL; 983 t = ISC_LIST_NEXT(t, link)) 984 { 985 /* 986 * Write the RR header. 987 */ 988 isc_buffer_putuint32(&buffer, t->name.length + 10 + 989 t->rdata.length); 990 /* 991 * Write the owner name, RR header, and RR data. 992 */ 993 isc_buffer_putmem(&buffer, t->name.ndata, t->name.length); 994 isc_buffer_putuint16(&buffer, t->rdata.type); 995 isc_buffer_putuint16(&buffer, t->rdata.rdclass); 996 isc_buffer_putuint32(&buffer, t->ttl); 997 INSIST(t->rdata.length < 65536); 998 isc_buffer_putuint16(&buffer, (isc_uint16_t)t->rdata.length); 999 INSIST(isc_buffer_availablelength(&buffer) >= t->rdata.length); 1000 isc_buffer_putmem(&buffer, t->rdata.data, t->rdata.length); 1001 } 1002 1003 isc_buffer_usedregion(&buffer, &used); 1004 INSIST(used.length == size); 1005 1006 j->x.pos[1].offset += used.length; 1007 1008 /* 1009 * Write the buffer contents to the journal file. 1010 */ 1011 CHECK(journal_write(j, used.base, used.length)); 1012 1013 result = ISC_R_SUCCESS; 1014 1015 failure: 1016 if (mem != NULL) 1017 isc_mem_put(j->mctx, mem, size); 1018 return (result); 1019 1020} 1021 1022isc_result_t 1023dns_journal_commit(dns_journal_t *j) { 1024 isc_result_t result; 1025 journal_rawheader_t rawheader; 1026 1027 REQUIRE(DNS_JOURNAL_VALID(j)); 1028 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION); 1029 1030 /* 1031 * Perform some basic consistency checks. 1032 */ 1033 if (j->x.n_soa != 2) { 1034 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1035 "%s: malformed transaction: %d SOAs", 1036 j->filename, j->x.n_soa); 1037 return (ISC_R_UNEXPECTED); 1038 } 1039 if (! (DNS_SERIAL_GT(j->x.pos[1].serial, j->x.pos[0].serial) || 1040 (bind8_compat && 1041 j->x.pos[1].serial == j->x.pos[0].serial))) 1042 { 1043 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1044 "%s: malformed transaction: serial number " 1045 "would decrease", j->filename); 1046 return (ISC_R_UNEXPECTED); 1047 } 1048 if (! JOURNAL_EMPTY(&j->header)) { 1049 if (j->x.pos[0].serial != j->header.end.serial) { 1050 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1051 "malformed transaction: " 1052 "%s last serial %u != " 1053 "transaction first serial %u", 1054 j->filename, 1055 j->header.end.serial, 1056 j->x.pos[0].serial); 1057 return (ISC_R_UNEXPECTED); 1058 } 1059 } 1060 1061 /* 1062 * Some old journal entries may become non-addressable 1063 * when we increment the current serial number. Purge them 1064 * by stepping header.begin forward to the first addressable 1065 * transaction. Also purge them from the index. 1066 */ 1067 if (! JOURNAL_EMPTY(&j->header)) { 1068 while (! DNS_SERIAL_GT(j->x.pos[1].serial, 1069 j->header.begin.serial)) { 1070 CHECK(journal_next(j, &j->header.begin)); 1071 } 1072 index_invalidate(j, j->x.pos[1].serial); 1073 } 1074#ifdef notyet 1075 if (DNS_SERIAL_GT(last_dumped_serial, j->x.pos[1].serial)) { 1076 force_dump(...); 1077 } 1078#endif 1079 1080 /* 1081 * Commit the transaction data to stable storage. 1082 */ 1083 CHECK(journal_fsync(j)); 1084 1085 /* 1086 * Update the transaction header. 1087 */ 1088 CHECK(journal_seek(j, j->x.pos[0].offset)); 1089 CHECK(journal_write_xhdr(j, (j->x.pos[1].offset - j->x.pos[0].offset) - 1090 sizeof(journal_rawxhdr_t), 1091 j->x.pos[0].serial, j->x.pos[1].serial)); 1092 1093 /* 1094 * Update the journal header. 1095 */ 1096 if (JOURNAL_EMPTY(&j->header)) { 1097 j->header.begin = j->x.pos[0]; 1098 } 1099 j->header.end = j->x.pos[1]; 1100 journal_header_encode(&j->header, &rawheader); 1101 CHECK(journal_seek(j, 0)); 1102 CHECK(journal_write(j, &rawheader, sizeof(rawheader))); 1103 1104 /* 1105 * Update the index. 1106 */ 1107 index_add(j, &j->x.pos[0]); 1108 1109 /* 1110 * Convert the index into on-disk format and write 1111 * it to disk. 1112 */ 1113 CHECK(index_to_disk(j)); 1114 1115 /* 1116 * Commit the header to stable storage. 1117 */ 1118 CHECK(journal_fsync(j)); 1119 1120 /* 1121 * We no longer have a transaction open. 1122 */ 1123 j->state = JOURNAL_STATE_WRITE; 1124 1125 result = ISC_R_SUCCESS; 1126 1127 failure: 1128 return (result); 1129} 1130 1131isc_result_t 1132dns_journal_write_transaction(dns_journal_t *j, dns_diff_t *diff) { 1133 isc_result_t result; 1134 CHECK(dns_diff_sort(diff, ixfr_order)); 1135 CHECK(dns_journal_begin_transaction(j)); 1136 CHECK(dns_journal_writediff(j, diff)); 1137 CHECK(dns_journal_commit(j)); 1138 result = ISC_R_SUCCESS; 1139 failure: 1140 return (result); 1141} 1142 1143void 1144dns_journal_destroy(dns_journal_t **journalp) { 1145 dns_journal_t *j = *journalp; 1146 REQUIRE(DNS_JOURNAL_VALID(j)); 1147 1148 j->it.result = ISC_R_FAILURE; 1149 dns_name_invalidate(&j->it.name); 1150 dns_decompress_invalidate(&j->it.dctx); 1151 if (j->rawindex != NULL) 1152 isc_mem_put(j->mctx, j->rawindex, j->header.index_size * 1153 sizeof(journal_rawpos_t)); 1154 if (j->index != NULL) 1155 isc_mem_put(j->mctx, j->index, j->header.index_size * 1156 sizeof(journal_pos_t)); 1157 if (j->it.target.base != NULL) 1158 isc_mem_put(j->mctx, j->it.target.base, j->it.target.length); 1159 if (j->it.source.base != NULL) 1160 isc_mem_put(j->mctx, j->it.source.base, j->it.source.length); 1161 1162 if (j->fp != NULL) 1163 (void)isc_stdio_close(j->fp); 1164 j->magic = 0; 1165 isc_mem_put(j->mctx, j, sizeof(*j)); 1166 *journalp = NULL; 1167} 1168 1169/* 1170 * Roll the open journal 'j' into the database 'db'. 1171 * A new database version will be created. 1172 */ 1173 1174/* XXX Share code with incoming IXFR? */ 1175 1176static isc_result_t 1177roll_forward(dns_journal_t *j, dns_db_t *db) { 1178 isc_buffer_t source; /* Transaction data from disk */ 1179 isc_buffer_t target; /* Ditto after _fromwire check */ 1180 isc_uint32_t db_serial; /* Database SOA serial */ 1181 isc_uint32_t end_serial; /* Last journal SOA serial */ 1182 isc_result_t result; 1183 dns_dbversion_t *ver = NULL; 1184 journal_pos_t pos; 1185 dns_diff_t diff; 1186 unsigned int n_soa = 0; 1187 unsigned int n_put = 0; 1188 1189 REQUIRE(DNS_JOURNAL_VALID(j)); 1190 REQUIRE(DNS_DB_VALID(db)); 1191 1192 dns_diff_init(j->mctx, &diff); 1193 1194 /* 1195 * Set up empty initial buffers for uncheched and checked 1196 * wire format transaction data. They will be reallocated 1197 * later. 1198 */ 1199 isc_buffer_init(&source, NULL, 0); 1200 isc_buffer_init(&target, NULL, 0); 1201 1202 /* 1203 * Create the new database version. 1204 */ 1205 CHECK(dns_db_newversion(db, &ver)); 1206 1207 /* 1208 * Get the current database SOA serial number. 1209 */ 1210 CHECK(dns_db_getsoaserial(db, ver, &db_serial)); 1211 1212 /* 1213 * Locate a journal entry for the current database serial. 1214 */ 1215 CHECK(journal_find(j, db_serial, &pos)); 1216 /* 1217 * XXX do more drastic things, like marking zone stale, 1218 * if this fails? 1219 */ 1220 /* 1221 * XXXRTH The zone code should probably mark the zone as bad and 1222 * scream loudly into the log if this is a dynamic update 1223 * log reply that failed. 1224 */ 1225 1226 end_serial = dns_journal_last_serial(j); 1227 if (db_serial == end_serial) 1228 CHECK(DNS_R_UPTODATE); 1229 1230 CHECK(dns_journal_iter_init(j, db_serial, end_serial)); 1231 1232 for (result = dns_journal_first_rr(j); 1233 result == ISC_R_SUCCESS; 1234 result = dns_journal_next_rr(j)) 1235 { 1236 dns_name_t *name; 1237 isc_uint32_t ttl; 1238 dns_rdata_t *rdata; 1239 dns_difftuple_t *tuple = NULL; 1240 1241 name = NULL; 1242 rdata = NULL; 1243 dns_journal_current_rr(j, &name, &ttl, &rdata); 1244 1245 if (rdata->type == dns_rdatatype_soa) { 1246 n_soa++; 1247 if (n_soa == 2) 1248 db_serial = j->it.current_serial; 1249 } 1250 1251 if (n_soa == 3) 1252 n_soa = 1; 1253 if (n_soa == 0) { 1254 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1255 "%s: journal file corrupt: missing " 1256 "initial SOA", j->filename); 1257 FAIL(ISC_R_UNEXPECTED); 1258 } 1259 CHECK(dns_difftuple_create(diff.mctx, n_soa == 1 ? 1260 DNS_DIFFOP_DEL : DNS_DIFFOP_ADD, 1261 name, ttl, rdata, &tuple)); 1262 dns_diff_append(&diff, &tuple); 1263 1264 if (++n_put > 100) { 1265 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), 1266 "%s: applying diff to database (%u)", 1267 j->filename, db_serial); 1268 (void)dns_diff_print(&diff, NULL); 1269 CHECK(dns_diff_apply(&diff, db, ver)); 1270 dns_diff_clear(&diff); 1271 n_put = 0; 1272 } 1273 } 1274 if (result == ISC_R_NOMORE) 1275 result = ISC_R_SUCCESS; 1276 CHECK(result); 1277 1278 if (n_put != 0) { 1279 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), 1280 "%s: applying final diff to database (%u)", 1281 j->filename, db_serial); 1282 (void)dns_diff_print(&diff, NULL); 1283 CHECK(dns_diff_apply(&diff, db, ver)); 1284 dns_diff_clear(&diff); 1285 } 1286 1287 failure: 1288 if (ver != NULL) 1289 dns_db_closeversion(db, &ver, result == ISC_R_SUCCESS ? 1290 ISC_TRUE : ISC_FALSE); 1291 1292 if (source.base != NULL) 1293 isc_mem_put(j->mctx, source.base, source.length); 1294 if (target.base != NULL) 1295 isc_mem_put(j->mctx, target.base, target.length); 1296 1297 dns_diff_clear(&diff); 1298 1299 return (result); 1300} 1301 1302isc_result_t 1303dns_journal_rollforward(isc_mem_t *mctx, dns_db_t *db, const char *filename) { 1304 dns_journal_t *j; 1305 isc_result_t result; 1306 1307 REQUIRE(DNS_DB_VALID(db)); 1308 REQUIRE(filename != NULL); 1309 1310 j = NULL; 1311 result = dns_journal_open(mctx, filename, ISC_FALSE, &j); 1312 if (result == ISC_R_NOTFOUND) { 1313 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), 1314 "no journal file, but that's OK"); 1315 return (DNS_R_NOJOURNAL); 1316 } 1317 if (result != ISC_R_SUCCESS) 1318 return (result); 1319 if (JOURNAL_EMPTY(&j->header)) 1320 result = DNS_R_UPTODATE; 1321 else 1322 result = roll_forward(j, db); 1323 1324 dns_journal_destroy(&j); 1325 1326 return (result); 1327} 1328 1329isc_result_t 1330dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) { 1331 dns_journal_t *j; 1332 isc_buffer_t source; /* Transaction data from disk */ 1333 isc_buffer_t target; /* Ditto after _fromwire check */ 1334 isc_uint32_t start_serial; /* Database SOA serial */ 1335 isc_uint32_t end_serial; /* Last journal SOA serial */ 1336 isc_result_t result; 1337 dns_diff_t diff; 1338 unsigned int n_soa = 0; 1339 unsigned int n_put = 0; 1340 1341 REQUIRE(filename != NULL); 1342 1343 j = NULL; 1344 result = dns_journal_open(mctx, filename, ISC_FALSE, &j); 1345 if (result == ISC_R_NOTFOUND) { 1346 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file"); 1347 return (DNS_R_NOJOURNAL); 1348 } 1349 1350 if (result != ISC_R_SUCCESS) { 1351 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1352 "journal open failure: %s: %s", 1353 isc_result_totext(result), j->filename); 1354 return (result); 1355 } 1356 1357 dns_diff_init(j->mctx, &diff); 1358 1359 /* 1360 * Set up empty initial buffers for uncheched and checked 1361 * wire format transaction data. They will be reallocated 1362 * later. 1363 */ 1364 isc_buffer_init(&source, NULL, 0); 1365 isc_buffer_init(&target, NULL, 0); 1366 1367 start_serial = dns_journal_first_serial(j); 1368 end_serial = dns_journal_last_serial(j); 1369 1370 CHECK(dns_journal_iter_init(j, start_serial, end_serial)); 1371 1372 for (result = dns_journal_first_rr(j); 1373 result == ISC_R_SUCCESS; 1374 result = dns_journal_next_rr(j)) 1375 { 1376 dns_name_t *name; 1377 isc_uint32_t ttl; 1378 dns_rdata_t *rdata; 1379 dns_difftuple_t *tuple = NULL; 1380 1381 name = NULL; 1382 rdata = NULL; 1383 dns_journal_current_rr(j, &name, &ttl, &rdata); 1384 1385 if (rdata->type == dns_rdatatype_soa) 1386 n_soa++; 1387 1388 if (n_soa == 3) 1389 n_soa = 1; 1390 if (n_soa == 0) { 1391 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1392 "%s: journal file corrupt: missing " 1393 "initial SOA", j->filename); 1394 FAIL(ISC_R_UNEXPECTED); 1395 } 1396 CHECK(dns_difftuple_create(diff.mctx, n_soa == 1 ? 1397 DNS_DIFFOP_DEL : DNS_DIFFOP_ADD, 1398 name, ttl, rdata, &tuple)); 1399 dns_diff_append(&diff, &tuple); 1400 1401 if (++n_put > 100) { 1402 result = dns_diff_print(&diff, file); 1403 dns_diff_clear(&diff); 1404 n_put = 0; 1405 if (result != ISC_R_SUCCESS) 1406 break; 1407 } 1408 } 1409 if (result == ISC_R_NOMORE) 1410 result = ISC_R_SUCCESS; 1411 CHECK(result); 1412 1413 if (n_put != 0) { 1414 result = dns_diff_print(&diff, file); 1415 dns_diff_clear(&diff); 1416 } 1417 goto cleanup; 1418 1419 failure: 1420 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1421 "%s: cannot print: journal file corrupt", j->filename); 1422 1423 cleanup: 1424 if (source.base != NULL) 1425 isc_mem_put(j->mctx, source.base, source.length); 1426 if (target.base != NULL) 1427 isc_mem_put(j->mctx, target.base, target.length); 1428 1429 dns_diff_clear(&diff); 1430 dns_journal_destroy(&j); 1431 1432 return (result); 1433} 1434 1435/**************************************************************************/ 1436/* 1437 * Miscellaneous accessors. 1438 */ 1439isc_uint32_t dns_journal_first_serial(dns_journal_t *j) { 1440 return (j->header.begin.serial); 1441} 1442 1443isc_uint32_t dns_journal_last_serial(dns_journal_t *j) { 1444 return (j->header.end.serial); 1445} 1446 1447/**************************************************************************/ 1448/* 1449 * Iteration support. 1450 * 1451 * When serving an outgoing IXFR, we transmit a part the journal starting 1452 * at the serial number in the IXFR request and ending at the serial 1453 * number that is current when the IXFR request arrives. The ending 1454 * serial number is not necessarily at the end of the journal: 1455 * the journal may grow while the IXFR is in progress, but we stop 1456 * when we reach the serial number that was current when the IXFR started. 1457 */ 1458 1459static isc_result_t read_one_rr(dns_journal_t *j); 1460 1461/* 1462 * Make sure the buffer 'b' is has at least 'size' bytes 1463 * allocated, and clear it. 1464 * 1465 * Requires: 1466 * Either b->base is NULL, or it points to b->length bytes of memory 1467 * previously allocated by isc_mem_get(). 1468 */ 1469 1470static isc_result_t 1471size_buffer(isc_mem_t *mctx, isc_buffer_t *b, unsigned size) { 1472 if (b->length < size) { 1473 void *mem = isc_mem_get(mctx, size); 1474 if (mem == NULL) 1475 return (ISC_R_NOMEMORY); 1476 if (b->base != NULL) 1477 isc_mem_put(mctx, b->base, b->length); 1478 b->base = mem; 1479 b->length = size; 1480 } 1481 isc_buffer_clear(b); 1482 return (ISC_R_SUCCESS); 1483} 1484 1485isc_result_t 1486dns_journal_iter_init(dns_journal_t *j, 1487 isc_uint32_t begin_serial, isc_uint32_t end_serial) 1488{ 1489 isc_result_t result; 1490 1491 CHECK(journal_find(j, begin_serial, &j->it.bpos)); 1492 INSIST(j->it.bpos.serial == begin_serial); 1493 1494 CHECK(journal_find(j, end_serial, &j->it.epos)); 1495 INSIST(j->it.epos.serial == end_serial); 1496 1497 result = ISC_R_SUCCESS; 1498 failure: 1499 j->it.result = result; 1500 return (j->it.result); 1501} 1502 1503 1504isc_result_t 1505dns_journal_first_rr(dns_journal_t *j) { 1506 isc_result_t result; 1507 1508 /* 1509 * Seek to the beginning of the first transaction we are 1510 * interested in. 1511 */ 1512 CHECK(journal_seek(j, j->it.bpos.offset)); 1513 j->it.current_serial = j->it.bpos.serial; 1514 1515 j->it.xsize = 0; /* We have no transaction data yet... */ 1516 j->it.xpos = 0; /* ...and haven't used any of it. */ 1517 1518 return (read_one_rr(j)); 1519 1520 failure: 1521 return (result); 1522} 1523 1524static isc_result_t 1525read_one_rr(dns_journal_t *j) { 1526 isc_result_t result; 1527 1528 dns_rdatatype_t rdtype; 1529 dns_rdataclass_t rdclass; 1530 unsigned int rdlen; 1531 isc_uint32_t ttl; 1532 journal_xhdr_t xhdr; 1533 journal_rrhdr_t rrhdr; 1534 1535 INSIST(j->offset <= j->it.epos.offset); 1536 if (j->offset == j->it.epos.offset) 1537 return (ISC_R_NOMORE); 1538 if (j->it.xpos == j->it.xsize) { 1539 /* 1540 * We are at a transaction boundary. 1541 * Read another transaction header. 1542 */ 1543 CHECK(journal_read_xhdr(j, &xhdr)); 1544 if (xhdr.size == 0) { 1545 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1546 "%s: journal corrupt: empty transaction", 1547 j->filename); 1548 FAIL(ISC_R_UNEXPECTED); 1549 } 1550 if (xhdr.serial0 != j->it.current_serial) { 1551 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1552 "%s: journal file corrupt: " 1553 "expected serial %u, got %u", 1554 j->filename, 1555 j->it.current_serial, xhdr.serial0); 1556 FAIL(ISC_R_UNEXPECTED); 1557 } 1558 j->it.xsize = xhdr.size; 1559 j->it.xpos = 0; 1560 } 1561 /* 1562 * Read an RR. 1563 */ 1564 CHECK(journal_read_rrhdr(j, &rrhdr)); 1565 /* 1566 * Perform a sanity check on the journal RR size. 1567 * The smallest possible RR has a 1-byte owner name 1568 * and a 10-byte header. The largest possible 1569 * RR has 65535 bytes of data, a header, and a maximum- 1570 * size owner name, well below 70 k total. 1571 */ 1572 if (rrhdr.size < 1+10 || rrhdr.size > 70000) { 1573 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, 1574 "%s: journal corrupt: impossible RR size " 1575 "(%d bytes)", j->filename, rrhdr.size); 1576 FAIL(ISC_R_UNEXPECTED); 1577 } 1578 1579 CHECK(size_buffer(j->mctx, &j->it.source, rrhdr.size)); 1580 CHECK(journal_read(j, j->it.source.base, rrhdr.size)); 1581 isc_buffer_add(&j->it.source, rrhdr.size); 1582 1583 /* 1584 * The target buffer is made the same size 1585 * as the source buffer, with the assumption that when 1586 * no compression in present, the output of dns_*_fromwire() 1587 * is no larger than the input. 1588 */ 1589 CHECK(size_buffer(j->mctx, &j->it.target, rrhdr.size)); 1590 1591 /* 1592 * Parse the owner name. We don't know where it 1593 * ends yet, so we make the entire "remaining" 1594 * part of the buffer "active". 1595 */ 1596 isc_buffer_setactive(&j->it.source, 1597 j->it.source.used - j->it.source.current); 1598 CHECK(dns_name_fromwire(&j->it.name, &j->it.source, 1599 &j->it.dctx, 0, &j->it.target)); 1600 1601 /* 1602 * Check that the RR header is there, and parse it. 1603 */ 1604 if (isc_buffer_remaininglength(&j->it.source) < 10) 1605 FAIL(DNS_R_FORMERR); 1606 1607 rdtype = isc_buffer_getuint16(&j->it.source); 1608 rdclass = isc_buffer_getuint16(&j->it.source); 1609 ttl = isc_buffer_getuint32(&j->it.source); 1610 rdlen = isc_buffer_getuint16(&j->it.source); 1611 1612 /* 1613 * Parse the rdata. 1614 */ 1615 isc_buffer_setactive(&j->it.source, rdlen); 1616 dns_rdata_reset(&j->it.rdata); 1617 CHECK(dns_rdata_fromwire(&j->it.rdata, rdclass, 1618 rdtype, &j->it.source, &j->it.dctx, 1619 0, &j->it.target)); 1620 j->it.ttl = ttl; 1621 1622 j->it.xpos += sizeof(journal_rawrrhdr_t) + rrhdr.size; 1623 if (rdtype == dns_rdatatype_soa) { 1624 /* XXX could do additional consistency checks here */ 1625 j->it.current_serial = dns_soa_getserial(&j->it.rdata); 1626 } 1627 1628 result = ISC_R_SUCCESS; 1629 1630 failure: 1631 j->it.result = result; 1632 return (result); 1633} 1634 1635isc_result_t 1636dns_journal_next_rr(dns_journal_t *j) { 1637 j->it.result = read_one_rr(j); 1638 return (j->it.result); 1639} 1640 1641void 1642dns_journal_current_rr(dns_journal_t *j, dns_name_t **name, isc_uint32_t *ttl, 1643 dns_rdata_t **rdata) 1644{ 1645 REQUIRE(j->it.result == ISC_R_SUCCESS); 1646 *name = &j->it.name; 1647 *ttl = j->it.ttl; 1648 *rdata = &j->it.rdata; 1649} 1650 1651/**************************************************************************/ 1652/* 1653 * Generating diffs from databases 1654 */ 1655 1656/* 1657 * Construct a diff containing all the RRs at the current name of the 1658 * database iterator 'dbit' in database 'db', version 'ver'. 1659 * Set '*name' to the current name, and append the diff to 'diff'. 1660 * All new tuples will have the operation 'op'. 1661 * 1662 * Requires: 'name' must have buffer large enough to hold the name. 1663 * Typically, a dns_fixedname_t would be used. 1664 */ 1665static isc_result_t 1666get_name_diff(dns_db_t *db, dns_dbversion_t *ver, isc_stdtime_t now, 1667 dns_dbiterator_t *dbit, dns_name_t *name, dns_diffop_t op, 1668 dns_diff_t *diff) 1669{ 1670 isc_result_t result; 1671 dns_dbnode_t *node = NULL; 1672 dns_rdatasetiter_t *rdsiter = NULL; 1673 dns_difftuple_t *tuple = NULL; 1674 1675 result = dns_dbiterator_current(dbit, &node, name); 1676 if (result != ISC_R_SUCCESS) 1677 return (result); 1678 1679 result = dns_db_allrdatasets(db, node, ver, now, &rdsiter); 1680 if (result != ISC_R_SUCCESS) 1681 goto cleanup_node; 1682 1683 for (result = dns_rdatasetiter_first(rdsiter); 1684 result == ISC_R_SUCCESS; 1685 result = dns_rdatasetiter_next(rdsiter)) 1686 { 1687 dns_rdataset_t rdataset; 1688 1689 dns_rdataset_init(&rdataset); 1690 dns_rdatasetiter_current(rdsiter, &rdataset); 1691 1692 for (result = dns_rdataset_first(&rdataset); 1693 result == ISC_R_SUCCESS; 1694 result = dns_rdataset_next(&rdataset)) 1695 { 1696 dns_rdata_t rdata = DNS_RDATA_INIT; 1697 dns_rdataset_current(&rdataset, &rdata); 1698 result = dns_difftuple_create(diff->mctx, op, name, 1699 rdataset.ttl, &rdata, 1700 &tuple); 1701 if (result != ISC_R_SUCCESS) { 1702 dns_rdataset_disassociate(&rdataset); 1703 goto cleanup_iterator; 1704 } 1705 dns_diff_append(diff, &tuple); 1706 } 1707 dns_rdataset_disassociate(&rdataset); 1708 if (result != ISC_R_NOMORE) 1709 goto cleanup_iterator; 1710 } 1711 if (result != ISC_R_NOMORE) 1712 goto cleanup_iterator; 1713 1714 result = ISC_R_SUCCESS; 1715 1716 cleanup_iterator: 1717 dns_rdatasetiter_destroy(&rdsiter); 1718 1719 cleanup_node: 1720 dns_db_detachnode(db, &node); 1721 1722 return (result); 1723} 1724 1725/* 1726 * Comparison function for use by dns_diff_subtract when sorting 1727 * the diffs to be subtracted. The sort keys are the rdata type 1728 * and the rdata itself. The owner name is ignored, because 1729 * it is known to be the same for all tuples. 1730 */ 1731static int 1732rdata_order(const void *av, const void *bv) { 1733 dns_difftuple_t const * const *ap = av; 1734 dns_difftuple_t const * const *bp = bv; 1735 dns_difftuple_t const *a = *ap; 1736 dns_difftuple_t const *b = *bp; 1737 int r; 1738 r = (b->rdata.type - a->rdata.type); 1739 if (r != 0) 1740 return (r); 1741 r = dns_rdata_compare(&a->rdata, &b->rdata); 1742 return (r); 1743} 1744 1745static isc_result_t 1746dns_diff_subtract(dns_diff_t diff[2], dns_diff_t *r) { 1747 isc_result_t result; 1748 dns_difftuple_t *p[2]; 1749 int i, t; 1750 isc_boolean_t append; 1751 1752 CHECK(dns_diff_sort(&diff[0], rdata_order)); 1753 CHECK(dns_diff_sort(&diff[1], rdata_order)); 1754 1755 for (;;) { 1756 p[0] = ISC_LIST_HEAD(diff[0].tuples); 1757 p[1] = ISC_LIST_HEAD(diff[1].tuples); 1758 if (p[0] == NULL && p[1] == NULL) 1759 break; 1760 1761 for (i = 0; i < 2; i++) 1762 if (p[!i] == NULL) { 1763 ISC_LIST_UNLINK(diff[i].tuples, p[i], link); 1764 ISC_LIST_APPEND(r->tuples, p[i], link); 1765 goto next; 1766 } 1767 t = rdata_order(&p[0], &p[1]); 1768 if (t < 0) { 1769 ISC_LIST_UNLINK(diff[0].tuples, p[0], link); 1770 ISC_LIST_APPEND(r->tuples, p[0], link); 1771 goto next; 1772 } 1773 if (t > 0) { 1774 ISC_LIST_UNLINK(diff[1].tuples, p[1], link); 1775 ISC_LIST_APPEND(r->tuples, p[1], link); 1776 goto next; 1777 } 1778 INSIST(t == 0); 1779 /* 1780 * Identical RRs in both databases; skip them both 1781 * if the ttl differs. 1782 */ 1783 append = ISC_TF(p[0]->ttl != p[1]->ttl); 1784 for (i = 0; i < 2; i++) { 1785 ISC_LIST_UNLINK(diff[i].tuples, p[i], link); 1786 if (append) { 1787 ISC_LIST_APPEND(r->tuples, p[i], link); 1788 } else { 1789 dns_difftuple_free(&p[i]); 1790 } 1791 } 1792 next: ; 1793 } 1794 result = ISC_R_SUCCESS; 1795 failure: 1796 return (result); 1797} 1798 1799/* 1800 * Compare the databases 'dba' and 'dbb' and generate a journal 1801 * entry containing the changes to make 'dba' from 'dbb' (note 1802 * the order). This journal entry will consist of a single, 1803 * possibly very large transaction. 1804 */ 1805 1806isc_result_t 1807dns_db_diff(isc_mem_t *mctx, 1808 dns_db_t *dba, dns_dbversion_t *dbvera, 1809 dns_db_t *dbb, dns_dbversion_t *dbverb, 1810 const char *journal_filename) 1811{ 1812 dns_db_t *db[2]; 1813 dns_dbversion_t *ver[2]; 1814 dns_dbiterator_t *dbit[2] = { NULL, NULL }; 1815 isc_boolean_t have[2] = { ISC_FALSE, ISC_FALSE }; 1816 dns_fixedname_t fixname[2]; 1817 isc_result_t result, itresult[2]; 1818 dns_diff_t diff[2], resultdiff; 1819 int i, t; 1820 dns_journal_t *journal = NULL; 1821 1822 db[0] = dba, db[1] = dbb; 1823 ver[0] = dbvera, ver[1] = dbverb; 1824 1825 dns_diff_init(mctx, &diff[0]); 1826 dns_diff_init(mctx, &diff[1]); 1827 dns_diff_init(mctx, &resultdiff); 1828 1829 dns_fixedname_init(&fixname[0]); 1830 dns_fixedname_init(&fixname[1]); 1831 1832 result = dns_journal_open(mctx, journal_filename, ISC_TRUE, &journal); 1833 if (result != ISC_R_SUCCESS) 1834 return (result); 1835 1836 result = dns_db_createiterator(db[0], ISC_FALSE, &dbit[0]); 1837 if (result != ISC_R_SUCCESS) 1838 goto cleanup_journal; 1839 result = dns_db_createiterator(db[1], ISC_FALSE, &dbit[1]); 1840 if (result != ISC_R_SUCCESS) 1841 goto cleanup_interator0; 1842 1843 itresult[0] = dns_dbiterator_first(dbit[0]); 1844 itresult[1] = dns_dbiterator_first(dbit[1]); 1845 1846 for (;;) { 1847 for (i = 0; i < 2; i++) { 1848 if (! have[i] && itresult[i] == ISC_R_SUCCESS) { 1849 CHECK(get_name_diff(db[i], ver[i], 0, dbit[i], 1850 dns_fixedname_name(&fixname[i]), 1851 i == 0 ? 1852 DNS_DIFFOP_ADD : 1853 DNS_DIFFOP_DEL, 1854 &diff[i])); 1855 itresult[i] = dns_dbiterator_next(dbit[i]); 1856 have[i] = ISC_TRUE; 1857 } 1858 } 1859 1860 if (! have[0] && ! have[1]) { 1861 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1862 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1863 break; 1864 } 1865 1866 for (i = 0; i < 2; i++) { 1867 if (! have[!i]) { 1868 ISC_LIST_APPENDLIST(resultdiff.tuples, 1869 diff[i].tuples, link); 1870 INSIST(ISC_LIST_EMPTY(diff[i].tuples)); 1871 have[i] = ISC_FALSE; 1872 goto next; 1873 } 1874 } 1875 1876 t = dns_name_compare(dns_fixedname_name(&fixname[0]), 1877 dns_fixedname_name(&fixname[1])); 1878 if (t < 0) { 1879 ISC_LIST_APPENDLIST(resultdiff.tuples, 1880 diff[0].tuples, link); 1881 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1882 have[0] = ISC_FALSE; 1883 continue; 1884 } 1885 if (t > 0) { 1886 ISC_LIST_APPENDLIST(resultdiff.tuples, 1887 diff[1].tuples, link); 1888 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1889 have[1] = ISC_FALSE; 1890 continue; 1891 } 1892 INSIST(t == 0); 1893 CHECK(dns_diff_subtract(diff, &resultdiff)); 1894 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1895 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1896 have[0] = have[1] = ISC_FALSE; 1897 next: ; 1898 } 1899 if (itresult[0] != ISC_R_NOMORE) 1900 FAIL(itresult[0]); 1901 if (itresult[1] != ISC_R_NOMORE) 1902 FAIL(itresult[1]); 1903 1904 if (ISC_LIST_EMPTY(resultdiff.tuples)) { 1905 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no changes"); 1906 } else { 1907 CHECK(dns_journal_write_transaction(journal, &resultdiff)); 1908 } 1909 INSIST(ISC_LIST_EMPTY(diff[0].tuples)); 1910 INSIST(ISC_LIST_EMPTY(diff[1].tuples)); 1911 1912 failure: 1913 dns_diff_clear(&resultdiff); 1914 dns_dbiterator_destroy(&dbit[1]); 1915 cleanup_interator0: 1916 dns_dbiterator_destroy(&dbit[0]); 1917 cleanup_journal: 1918 dns_journal_destroy(&journal); 1919 return (result); 1920} 1921 1922isc_result_t 1923dns_journal_compact(isc_mem_t *mctx, char *filename, isc_uint32_t serial, 1924 isc_uint32_t target_size) 1925{ 1926 unsigned int i; 1927 journal_pos_t best_guess; 1928 journal_pos_t current_pos; 1929 dns_journal_t *j = NULL; 1930 journal_rawheader_t rawheader; 1931 unsigned int copy_length; 1932 unsigned int len; 1933 char *buf = NULL; 1934 unsigned int size = 0; 1935 isc_result_t result; 1936 unsigned int indexend; 1937 1938 CHECK(journal_open(mctx, filename, ISC_TRUE, ISC_FALSE, &j)); 1939 1940 if (JOURNAL_EMPTY(&j->header)) { 1941 dns_journal_destroy(&j); 1942 return (ISC_R_SUCCESS); 1943 } 1944 1945 if (DNS_SERIAL_GT(j->header.begin.serial, serial) || 1946 DNS_SERIAL_GT(serial, j->header.end.serial)) { 1947 dns_journal_destroy(&j); 1948 return (ISC_R_RANGE); 1949 } 1950 1951 /* 1952 * Cope with very small target sizes. 1953 */ 1954 indexend = sizeof(journal_rawheader_t) + 1955 j->header.index_size * sizeof(journal_rawpos_t); 1956 if (target_size < indexend * 2) 1957 target_size = target_size/2 + indexend; 1958 1959 /* 1960 * See if there is any work to do. 1961 */ 1962 if ((isc_uint32_t) j->header.end.offset < target_size) { 1963 dns_journal_destroy(&j); 1964 return (ISC_R_SUCCESS); 1965 } 1966 1967 /* 1968 * Remove overhead so space test below can succeed. 1969 */ 1970 if (target_size >= indexend) 1971 target_size -= indexend; 1972 1973 /* 1974 * Find if we can create enough free space. 1975 */ 1976 best_guess = j->header.begin; 1977 for (i = 0; i < j->header.index_size; i++) { 1978 if (POS_VALID(j->index[i]) && 1979 DNS_SERIAL_GE(serial, j->index[i].serial) && 1980 ((isc_uint32_t)(j->header.end.offset - j->index[i].offset) 1981 >= target_size / 2) && 1982 j->index[i].offset > best_guess.offset) 1983 best_guess = j->index[i]; 1984 } 1985 1986 current_pos = best_guess; 1987 while (current_pos.serial != serial) { 1988 CHECK(journal_next(j, ¤t_pos)); 1989 if (current_pos.serial == j->header.end.serial) 1990 break; 1991 1992 if (DNS_SERIAL_GE(serial, current_pos.serial) && 1993 ((isc_uint32_t)(j->header.end.offset - current_pos.offset) 1994 >= (target_size / 2)) && 1995 current_pos.offset > best_guess.offset) 1996 best_guess = current_pos; 1997 else 1998 break; 1999 } 2000 2001 INSIST(best_guess.serial != j->header.end.serial); 2002 if (best_guess.serial != serial) 2003 CHECK(journal_next(j, &best_guess)); 2004 2005 /* 2006 * Enough space to proceed? 2007 */ 2008 if ((isc_uint32_t) (j->header.end.offset - best_guess.offset) > 2009 (isc_uint32_t) (best_guess.offset - indexend)) { 2010 dns_journal_destroy(&j); 2011 return (ISC_R_NOSPACE); 2012 } 2013 2014 copy_length = j->header.end.offset - best_guess.offset; 2015 2016 /* 2017 * Invalidate entire index, will be rebuilt at end. 2018 */ 2019 for (i = 0; i < j->header.index_size; i++) { 2020 if (POS_VALID(j->index[i])) 2021 POS_INVALIDATE(j->index[i]); 2022 } 2023 2024 /* 2025 * Convert the index into on-disk format and write 2026 * it to disk. 2027 */ 2028 CHECK(index_to_disk(j)); 2029 CHECK(journal_fsync(j)); 2030 2031 /* 2032 * Update the journal header. 2033 */ 2034 if (copy_length == 0) { 2035 j->header.begin.serial = 0; 2036 j->header.end.serial = 0; 2037 j->header.begin.offset = 0; 2038 j->header.end.offset = 0; 2039 } else { 2040 j->header.begin = best_guess; 2041 } 2042 journal_header_encode(&j->header, &rawheader); 2043 CHECK(journal_seek(j, 0)); 2044 CHECK(journal_write(j, &rawheader, sizeof(rawheader))); 2045 CHECK(journal_fsync(j)); 2046 2047 if (copy_length != 0) { 2048 /* 2049 * Copy best_guess to end into space just freed. 2050 */ 2051 size = 64*1024; 2052 if (copy_length < size) 2053 size = copy_length; 2054 buf = isc_mem_get(mctx, size); 2055 if (buf == NULL) { 2056 result = ISC_R_NOMEMORY; 2057 goto failure; 2058 } 2059 2060 for (i = 0; i < copy_length; i += size) { 2061 len = (copy_length - i) > size ? size : 2062 (copy_length - i); 2063 CHECK(journal_seek(j, best_guess.offset + i)); 2064 CHECK(journal_read(j, buf, len)); 2065 CHECK(journal_seek(j, indexend + i)); 2066 CHECK(journal_write(j, buf, len)); 2067 } 2068 2069 CHECK(journal_fsync(j)); 2070 2071 /* 2072 * Compute new header. 2073 */ 2074 j->header.begin.offset = indexend; 2075 j->header.end.offset = indexend + copy_length; 2076 /* 2077 * Update the journal header. 2078 */ 2079 journal_header_encode(&j->header, &rawheader); 2080 CHECK(journal_seek(j, 0)); 2081 CHECK(journal_write(j, &rawheader, sizeof(rawheader))); 2082 CHECK(journal_fsync(j)); 2083 2084 /* 2085 * Build new index. 2086 */ 2087 current_pos = j->header.begin; 2088 while (current_pos.serial != j->header.end.serial) { 2089 index_add(j, ¤t_pos); 2090 CHECK(journal_next(j, ¤t_pos)); 2091 } 2092 2093 /* 2094 * Write index. 2095 */ 2096 CHECK(index_to_disk(j)); 2097 CHECK(journal_fsync(j)); 2098 2099 indexend = j->header.end.offset; 2100 } 2101 dns_journal_destroy(&j); 2102 (void)isc_file_truncate(filename, (isc_offset_t)indexend); 2103 result = ISC_R_SUCCESS; 2104 2105 failure: 2106 if (buf != NULL) 2107 isc_mem_put(mctx, buf, size); 2108 if (j != NULL) 2109 dns_journal_destroy(&j); 2110 return (result); 2111} 2112 2113static isc_result_t 2114index_to_disk(dns_journal_t *j) { 2115 isc_result_t result = ISC_R_SUCCESS; 2116 2117 if (j->header.index_size != 0) { 2118 unsigned int i; 2119 unsigned char *p; 2120 unsigned int rawbytes; 2121 2122 rawbytes = j->header.index_size * sizeof(journal_rawpos_t); 2123 2124 p = j->rawindex; 2125 for (i = 0; i < j->header.index_size; i++) { 2126 encode_uint32(j->index[i].serial, p); 2127 p += 4; 2128 encode_uint32(j->index[i].offset, p); 2129 p += 4; 2130 } 2131 INSIST(p == j->rawindex + rawbytes); 2132 2133 CHECK(journal_seek(j, sizeof(journal_rawheader_t))); 2134 CHECK(journal_write(j, j->rawindex, rawbytes)); 2135 } 2136failure: 2137 return (result); 2138} 2139