svndiff.c revision 289166
1/* 2 * svndiff.c -- Encoding and decoding svndiff-format deltas. 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25#include <assert.h> 26#include <string.h> 27#include "svn_delta.h" 28#include "svn_io.h" 29#include "delta.h" 30#include "svn_pools.h" 31#include "svn_private_config.h" 32#include <zlib.h> 33 34#include "private/svn_error_private.h" 35#include "private/svn_delta_private.h" 36 37/* The zlib compressBound function was not exported until 1.2.0. */ 38#if ZLIB_VERNUM >= 0x1200 39#define svnCompressBound(LEN) compressBound(LEN) 40#else 41#define svnCompressBound(LEN) ((LEN) + ((LEN) >> 12) + ((LEN) >> 14) + 11) 42#endif 43 44/* For svndiff1, address/instruction/new data under this size will not 45 be compressed using zlib as a secondary compressor. */ 46#define MIN_COMPRESS_SIZE 512 47 48/* ----- Text delta to svndiff ----- */ 49 50/* We make one of these and get it passed back to us in calls to the 51 window handler. We only use it to record the write function and 52 baton passed to svn_txdelta_to_svndiff3(). */ 53struct encoder_baton { 54 svn_stream_t *output; 55 svn_boolean_t header_done; 56 int version; 57 int compression_level; 58 apr_pool_t *pool; 59}; 60 61/* This is at least as big as the largest size of an integer that 62 encode_int can generate; it is sufficient for creating buffers for 63 it to write into. This assumes that integers are at most 64 bits, 64 and so 10 bytes (with 7 bits of information each) are sufficient to 65 represent them. */ 66#define MAX_ENCODED_INT_LEN 10 67/* This is at least as big as the largest size for a single instruction. */ 68#define MAX_INSTRUCTION_LEN (2*MAX_ENCODED_INT_LEN+1) 69/* This is at least as big as the largest possible instructions 70 section: in theory, the instructions could be SVN_DELTA_WINDOW_SIZE 71 1-byte copy-from-source instructions (though this is very unlikely). */ 72#define MAX_INSTRUCTION_SECTION_LEN (SVN_DELTA_WINDOW_SIZE*MAX_INSTRUCTION_LEN) 73 74/* Encode VAL into the buffer P using the variable-length svndiff 75 integer format. Return the incremented value of P after the 76 encoded bytes have been written. P must point to a buffer of size 77 at least MAX_ENCODED_INT_LEN. 78 79 This encoding uses the high bit of each byte as a continuation bit 80 and the other seven bits as data bits. High-order data bits are 81 encoded first, followed by lower-order bits, so the value can be 82 reconstructed by concatenating the data bits from left to right and 83 interpreting the result as a binary number. Examples (brackets 84 denote byte boundaries, spaces are for clarity only): 85 86 1 encodes as [0 0000001] 87 33 encodes as [0 0100001] 88 129 encodes as [1 0000001] [0 0000001] 89 2000 encodes as [1 0001111] [0 1010000] 90*/ 91static unsigned char * 92encode_int(unsigned char *p, svn_filesize_t val) 93{ 94 int n; 95 svn_filesize_t v; 96 unsigned char cont; 97 98 SVN_ERR_ASSERT_NO_RETURN(val >= 0); 99 100 /* Figure out how many bytes we'll need. */ 101 v = val >> 7; 102 n = 1; 103 while (v > 0) 104 { 105 v = v >> 7; 106 n++; 107 } 108 109 SVN_ERR_ASSERT_NO_RETURN(n <= MAX_ENCODED_INT_LEN); 110 111 /* Encode the remaining bytes; n is always the number of bytes 112 coming after the one we're encoding. */ 113 while (--n >= 0) 114 { 115 cont = ((n > 0) ? 0x1 : 0x0) << 7; 116 *p++ = (unsigned char)(((val >> (n * 7)) & 0x7f) | cont); 117 } 118 119 return p; 120} 121 122 123/* Append an encoded integer to a string. */ 124static void 125append_encoded_int(svn_stringbuf_t *header, svn_filesize_t val) 126{ 127 unsigned char buf[MAX_ENCODED_INT_LEN], *p; 128 129 p = encode_int(buf, val); 130 svn_stringbuf_appendbytes(header, (const char *)buf, p - buf); 131} 132 133/* If IN is a string that is >= MIN_COMPRESS_SIZE and the COMPRESSION_LEVEL 134 is not SVN_DELTA_COMPRESSION_LEVEL_NONE, zlib compress it and places the 135 result in OUT, with an integer prepended specifying the original size. 136 If IN is < MIN_COMPRESS_SIZE, or if the compressed version of IN was no 137 smaller than the original IN, OUT will be a copy of IN with the size 138 prepended as an integer. */ 139static svn_error_t * 140zlib_encode(const char *data, 141 apr_size_t len, 142 svn_stringbuf_t *out, 143 int compression_level) 144{ 145 unsigned long endlen; 146 apr_size_t intlen; 147 148 svn_stringbuf_setempty(out); 149 append_encoded_int(out, len); 150 intlen = out->len; 151 152 /* Compression initialization overhead is considered to large for 153 short buffers. Also, if we don't actually want to compress data, 154 ZLIB will produce an output no shorter than the input. Hence, 155 the DATA would directly appended to OUT, so we can do that directly 156 without calling ZLIB before. */ 157 if ( (len < MIN_COMPRESS_SIZE) 158 || (compression_level == SVN_DELTA_COMPRESSION_LEVEL_NONE)) 159 { 160 svn_stringbuf_appendbytes(out, data, len); 161 } 162 else 163 { 164 int zerr; 165 166 svn_stringbuf_ensure(out, svnCompressBound(len) + intlen); 167 endlen = out->blocksize; 168 169 zerr = compress2((unsigned char *)out->data + intlen, &endlen, 170 (const unsigned char *)data, len, 171 compression_level); 172 if (zerr != Z_OK) 173 return svn_error_trace(svn_error__wrap_zlib( 174 zerr, "compress2", 175 _("Compression of svndiff data failed"))); 176 177 /* Compression didn't help :(, just append the original text */ 178 if (endlen >= len) 179 { 180 svn_stringbuf_appendbytes(out, data, len); 181 return SVN_NO_ERROR; 182 } 183 out->len = endlen + intlen; 184 out->data[out->len] = 0; 185 } 186 return SVN_NO_ERROR; 187} 188 189static svn_error_t * 190send_simple_insertion_window(svn_txdelta_window_t *window, 191 struct encoder_baton *eb) 192{ 193 unsigned char headers[4 + 5 * MAX_ENCODED_INT_LEN + MAX_INSTRUCTION_LEN]; 194 unsigned char ibuf[MAX_INSTRUCTION_LEN]; 195 unsigned char *header_current; 196 apr_size_t header_len; 197 apr_size_t ip_len, i; 198 apr_size_t len = window->new_data->len; 199 200 /* there is only one target copy op. It must span the whole window */ 201 assert(window->ops[0].action_code == svn_txdelta_new); 202 assert(window->ops[0].length == window->tview_len); 203 assert(window->ops[0].offset == 0); 204 205 /* write stream header if necessary */ 206 if (!eb->header_done) 207 { 208 eb->header_done = TRUE; 209 headers[0] = 'S'; 210 headers[1] = 'V'; 211 headers[2] = 'N'; 212 headers[3] = (unsigned char)eb->version; 213 header_current = headers + 4; 214 } 215 else 216 { 217 header_current = headers; 218 } 219 220 /* Encode the action code and length. */ 221 if (window->tview_len >> 6 == 0) 222 { 223 ibuf[0] = (unsigned char)(window->tview_len + (0x2 << 6)); 224 ip_len = 1; 225 } 226 else 227 { 228 ibuf[0] = (0x2 << 6); 229 ip_len = encode_int(ibuf + 1, window->tview_len) - ibuf; 230 } 231 232 /* encode the window header. Please note that the source window may 233 * have content despite not being used for deltification. */ 234 header_current = encode_int(header_current, window->sview_offset); 235 header_current = encode_int(header_current, window->sview_len); 236 header_current = encode_int(header_current, window->tview_len); 237 header_current[0] = (unsigned char)ip_len; /* 1 instruction */ 238 header_current = encode_int(&header_current[1], len); 239 240 /* append instructions (1 to a handful of bytes) */ 241 for (i = 0; i < ip_len; ++i) 242 header_current[i] = ibuf[i]; 243 244 header_len = header_current - headers + ip_len; 245 246 /* Write out the window. */ 247 SVN_ERR(svn_stream_write(eb->output, (const char *)headers, &header_len)); 248 if (len) 249 SVN_ERR(svn_stream_write(eb->output, window->new_data->data, &len)); 250 251 return SVN_NO_ERROR; 252} 253 254static svn_error_t * 255window_handler(svn_txdelta_window_t *window, void *baton) 256{ 257 struct encoder_baton *eb = baton; 258 apr_pool_t *pool; 259 svn_stringbuf_t *instructions; 260 svn_stringbuf_t *i1; 261 svn_stringbuf_t *header; 262 const svn_string_t *newdata; 263 unsigned char ibuf[MAX_INSTRUCTION_LEN], *ip; 264 const svn_txdelta_op_t *op; 265 apr_size_t len; 266 267 /* use specialized code if there is no source */ 268 if (window && !window->src_ops && window->num_ops == 1 && !eb->version) 269 return svn_error_trace(send_simple_insertion_window(window, eb)); 270 271 /* Make sure we write the header. */ 272 if (!eb->header_done) 273 { 274 char svnver[4] = {'S','V','N','\0'}; 275 len = 4; 276 svnver[3] = (char)eb->version; 277 SVN_ERR(svn_stream_write(eb->output, svnver, &len)); 278 eb->header_done = TRUE; 279 } 280 281 if (window == NULL) 282 { 283 svn_stream_t *output = eb->output; 284 285 /* We're done; clean up. 286 287 We clean our pool first. Given that the output stream was passed 288 TO us, we'll assume it has a longer lifetime, and that it will not 289 be affected by our pool destruction. 290 291 The contrary point of view (close the stream first): that could 292 tell our user that everything related to the output stream is done, 293 and a cleanup of the user pool should occur. However, that user 294 pool could include the subpool we created for our work (eb->pool), 295 which would then make our call to svn_pool_destroy() puke. 296 */ 297 svn_pool_destroy(eb->pool); 298 299 return svn_stream_close(output); 300 } 301 302 /* create the necessary data buffers */ 303 pool = svn_pool_create(eb->pool); 304 instructions = svn_stringbuf_create_empty(pool); 305 i1 = svn_stringbuf_create_empty(pool); 306 header = svn_stringbuf_create_empty(pool); 307 308 /* Encode the instructions. */ 309 for (op = window->ops; op < window->ops + window->num_ops; op++) 310 { 311 /* Encode the action code and length. */ 312 ip = ibuf; 313 switch (op->action_code) 314 { 315 case svn_txdelta_source: *ip = 0; break; 316 case svn_txdelta_target: *ip = (0x1 << 6); break; 317 case svn_txdelta_new: *ip = (0x2 << 6); break; 318 } 319 if (op->length >> 6 == 0) 320 *ip++ |= (unsigned char)op->length; 321 else 322 ip = encode_int(ip + 1, op->length); 323 if (op->action_code != svn_txdelta_new) 324 ip = encode_int(ip, op->offset); 325 svn_stringbuf_appendbytes(instructions, (const char *)ibuf, ip - ibuf); 326 } 327 328 /* Encode the header. */ 329 append_encoded_int(header, window->sview_offset); 330 append_encoded_int(header, window->sview_len); 331 append_encoded_int(header, window->tview_len); 332 if (eb->version == 1) 333 { 334 SVN_ERR(zlib_encode(instructions->data, instructions->len, 335 i1, eb->compression_level)); 336 instructions = i1; 337 } 338 append_encoded_int(header, instructions->len); 339 if (eb->version == 1) 340 { 341 svn_stringbuf_t *temp = svn_stringbuf_create_empty(pool); 342 svn_string_t *tempstr = svn_string_create_empty(pool); 343 SVN_ERR(zlib_encode(window->new_data->data, window->new_data->len, 344 temp, eb->compression_level)); 345 tempstr->data = temp->data; 346 tempstr->len = temp->len; 347 newdata = tempstr; 348 } 349 else 350 newdata = window->new_data; 351 352 append_encoded_int(header, newdata->len); 353 354 /* Write out the window. */ 355 len = header->len; 356 SVN_ERR(svn_stream_write(eb->output, header->data, &len)); 357 if (instructions->len > 0) 358 { 359 len = instructions->len; 360 SVN_ERR(svn_stream_write(eb->output, instructions->data, &len)); 361 } 362 if (newdata->len > 0) 363 { 364 len = newdata->len; 365 SVN_ERR(svn_stream_write(eb->output, newdata->data, &len)); 366 } 367 368 svn_pool_destroy(pool); 369 return SVN_NO_ERROR; 370} 371 372void 373svn_txdelta_to_svndiff3(svn_txdelta_window_handler_t *handler, 374 void **handler_baton, 375 svn_stream_t *output, 376 int svndiff_version, 377 int compression_level, 378 apr_pool_t *pool) 379{ 380 apr_pool_t *subpool = svn_pool_create(pool); 381 struct encoder_baton *eb; 382 383 eb = apr_palloc(subpool, sizeof(*eb)); 384 eb->output = output; 385 eb->header_done = FALSE; 386 eb->pool = subpool; 387 eb->version = svndiff_version; 388 eb->compression_level = compression_level; 389 390 *handler = window_handler; 391 *handler_baton = eb; 392} 393 394void 395svn_txdelta_to_svndiff2(svn_txdelta_window_handler_t *handler, 396 void **handler_baton, 397 svn_stream_t *output, 398 int svndiff_version, 399 apr_pool_t *pool) 400{ 401 svn_txdelta_to_svndiff3(handler, handler_baton, output, svndiff_version, 402 SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool); 403} 404 405void 406svn_txdelta_to_svndiff(svn_stream_t *output, 407 apr_pool_t *pool, 408 svn_txdelta_window_handler_t *handler, 409 void **handler_baton) 410{ 411 svn_txdelta_to_svndiff3(handler, handler_baton, output, 0, 412 SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool); 413} 414 415 416/* ----- svndiff to text delta ----- */ 417 418/* An svndiff parser object. */ 419struct decode_baton 420{ 421 /* Once the svndiff parser has enough data buffered to create a 422 "window", it passes this window to the caller's consumer routine. */ 423 svn_txdelta_window_handler_t consumer_func; 424 void *consumer_baton; 425 426 /* Pool to create subpools from; each developing window will be a 427 subpool. */ 428 apr_pool_t *pool; 429 430 /* The current subpool which contains our current window-buffer. */ 431 apr_pool_t *subpool; 432 433 /* The actual svndiff data buffer, living within subpool. */ 434 svn_stringbuf_t *buffer; 435 436 /* The offset and size of the last source view, so that we can check 437 to make sure the next one isn't sliding backwards. */ 438 svn_filesize_t last_sview_offset; 439 apr_size_t last_sview_len; 440 441 /* We have to discard four bytes at the beginning for the header. 442 This field keeps track of how many of those bytes we have read. */ 443 apr_size_t header_bytes; 444 445 /* Do we want an error to occur when we close the stream that 446 indicates we didn't send the whole svndiff data? If you plan to 447 not transmit the whole svndiff data stream, you will want this to 448 be FALSE. */ 449 svn_boolean_t error_on_early_close; 450 451 /* svndiff version in use by delta. */ 452 unsigned char version; 453}; 454 455 456/* Decode an svndiff-encoded integer into *VAL and return a pointer to 457 the byte after the integer. The bytes to be decoded live in the 458 range [P..END-1]. If these bytes do not contain a whole encoded 459 integer, return NULL; in this case *VAL is undefined. 460 461 See the comment for encode_int() earlier in this file for more detail on 462 the encoding format. */ 463static const unsigned char * 464decode_file_offset(svn_filesize_t *val, 465 const unsigned char *p, 466 const unsigned char *end) 467{ 468 svn_filesize_t temp = 0; 469 470 if (p + MAX_ENCODED_INT_LEN < end) 471 end = p + MAX_ENCODED_INT_LEN; 472 /* Decode bytes until we're done. */ 473 while (p < end) 474 { 475 /* Don't use svn_filesize_t here, because this might be 64 bits 476 * on 32 bit targets. Optimizing compilers may or may not be 477 * able to reduce that to the effective code below. */ 478 unsigned int c = *p++; 479 480 temp = (temp << 7) | (c & 0x7f); 481 if (c < 0x80) 482 { 483 *val = temp; 484 return p; 485 } 486 } 487 488 return NULL; 489} 490 491 492/* Same as above, only decode into a size variable. */ 493static const unsigned char * 494decode_size(apr_size_t *val, 495 const unsigned char *p, 496 const unsigned char *end) 497{ 498 apr_size_t temp = 0; 499 500 if (p + MAX_ENCODED_INT_LEN < end) 501 end = p + MAX_ENCODED_INT_LEN; 502 /* Decode bytes until we're done. */ 503 while (p < end) 504 { 505 apr_size_t c = *p++; 506 507 temp = (temp << 7) | (c & 0x7f); 508 if (c < 0x80) 509 { 510 *val = temp; 511 return p; 512 } 513 } 514 515 return NULL; 516} 517 518/* Decode the possibly-zlib compressed string of length INLEN that is in 519 IN, into OUT. We expect an integer is prepended to IN that specifies 520 the original size, and that if encoded size == original size, that the 521 remaining data is not compressed. 522 In that case, we will simply return pointer into IN as data pointer for 523 OUT, COPYLESS_ALLOWED has been set. The, the caller is expected not to 524 modify the contents of OUT. 525 An error is returned if the decoded length exceeds the given LIMIT. 526 */ 527static svn_error_t * 528zlib_decode(const unsigned char *in, apr_size_t inLen, svn_stringbuf_t *out, 529 apr_size_t limit) 530{ 531 apr_size_t len; 532 const unsigned char *oldplace = in; 533 534 /* First thing in the string is the original length. */ 535 in = decode_size(&len, in, in + inLen); 536 if (in == NULL) 537 return svn_error_create(SVN_ERR_SVNDIFF_INVALID_COMPRESSED_DATA, NULL, 538 _("Decompression of svndiff data failed: no size")); 539 if (len > limit) 540 return svn_error_create(SVN_ERR_SVNDIFF_INVALID_COMPRESSED_DATA, NULL, 541 _("Decompression of svndiff data failed: " 542 "size too large")); 543 /* We need to subtract the size of the encoded original length off the 544 * still remaining input length. */ 545 inLen -= (in - oldplace); 546 if (inLen == len) 547 { 548 svn_stringbuf_ensure(out, len); 549 memcpy(out->data, in, len); 550 out->data[len] = 0; 551 out->len = len; 552 553 return SVN_NO_ERROR; 554 } 555 else 556 { 557 unsigned long zlen = len; 558 int zerr; 559 560 svn_stringbuf_ensure(out, len); 561 zerr = uncompress((unsigned char *)out->data, &zlen, in, inLen); 562 if (zerr != Z_OK) 563 return svn_error_trace(svn_error__wrap_zlib( 564 zerr, "uncompress", 565 _("Decompression of svndiff data failed"))); 566 567 /* Zlib should not produce something that has a different size than the 568 original length we stored. */ 569 if (zlen != len) 570 return svn_error_create(SVN_ERR_SVNDIFF_INVALID_COMPRESSED_DATA, 571 NULL, 572 _("Size of uncompressed data " 573 "does not match stored original length")); 574 out->data[zlen] = 0; 575 out->len = zlen; 576 } 577 return SVN_NO_ERROR; 578} 579 580/* Decode an instruction into OP, returning a pointer to the text 581 after the instruction. Note that if the action code is 582 svn_txdelta_new, the offset field of *OP will not be set. */ 583static const unsigned char * 584decode_instruction(svn_txdelta_op_t *op, 585 const unsigned char *p, 586 const unsigned char *end) 587{ 588 apr_size_t c; 589 apr_size_t action; 590 591 if (p == end) 592 return NULL; 593 594 /* We need this more than once */ 595 c = *p++; 596 597 /* Decode the instruction selector. */ 598 action = (c >> 6) & 0x3; 599 if (action >= 0x3) 600 return NULL; 601 602 /* This relies on enum svn_delta_action values to match and never to be 603 redefined. */ 604 op->action_code = (enum svn_delta_action)(action); 605 606 /* Decode the length and offset. */ 607 op->length = c & 0x3f; 608 if (op->length == 0) 609 { 610 p = decode_size(&op->length, p, end); 611 if (p == NULL) 612 return NULL; 613 } 614 if (action != svn_txdelta_new) 615 { 616 p = decode_size(&op->offset, p, end); 617 if (p == NULL) 618 return NULL; 619 } 620 621 return p; 622} 623 624/* Count the instructions in the range [P..END-1] and make sure they 625 are valid for the given window lengths. Return an error if the 626 instructions are invalid; otherwise set *NINST to the number of 627 instructions. */ 628static svn_error_t * 629count_and_verify_instructions(int *ninst, 630 const unsigned char *p, 631 const unsigned char *end, 632 apr_size_t sview_len, 633 apr_size_t tview_len, 634 apr_size_t new_len) 635{ 636 int n = 0; 637 svn_txdelta_op_t op; 638 apr_size_t tpos = 0, npos = 0; 639 640 while (p < end) 641 { 642 p = decode_instruction(&op, p, end); 643 644 /* Detect any malformed operations from the instruction stream. */ 645 if (p == NULL) 646 return svn_error_createf 647 (SVN_ERR_SVNDIFF_INVALID_OPS, NULL, 648 _("Invalid diff stream: insn %d cannot be decoded"), n); 649 else if (op.length == 0) 650 return svn_error_createf 651 (SVN_ERR_SVNDIFF_INVALID_OPS, NULL, 652 _("Invalid diff stream: insn %d has length zero"), n); 653 else if (op.length > tview_len - tpos) 654 return svn_error_createf 655 (SVN_ERR_SVNDIFF_INVALID_OPS, NULL, 656 _("Invalid diff stream: insn %d overflows the target view"), n); 657 658 switch (op.action_code) 659 { 660 case svn_txdelta_source: 661 if (op.length > sview_len - op.offset || 662 op.offset > sview_len) 663 return svn_error_createf 664 (SVN_ERR_SVNDIFF_INVALID_OPS, NULL, 665 _("Invalid diff stream: " 666 "[src] insn %d overflows the source view"), n); 667 break; 668 case svn_txdelta_target: 669 if (op.offset >= tpos) 670 return svn_error_createf 671 (SVN_ERR_SVNDIFF_INVALID_OPS, NULL, 672 _("Invalid diff stream: " 673 "[tgt] insn %d starts beyond the target view position"), n); 674 break; 675 case svn_txdelta_new: 676 if (op.length > new_len - npos) 677 return svn_error_createf 678 (SVN_ERR_SVNDIFF_INVALID_OPS, NULL, 679 _("Invalid diff stream: " 680 "[new] insn %d overflows the new data section"), n); 681 npos += op.length; 682 break; 683 } 684 tpos += op.length; 685 n++; 686 } 687 if (tpos != tview_len) 688 return svn_error_create(SVN_ERR_SVNDIFF_INVALID_OPS, NULL, 689 _("Delta does not fill the target window")); 690 if (npos != new_len) 691 return svn_error_create(SVN_ERR_SVNDIFF_INVALID_OPS, NULL, 692 _("Delta does not contain enough new data")); 693 694 *ninst = n; 695 return SVN_NO_ERROR; 696} 697 698/* Given the five integer fields of a window header and a pointer to 699 the remainder of the window contents, fill in a delta window 700 structure *WINDOW. New allocations will be performed in POOL; 701 the new_data field of *WINDOW will refer directly to memory pointed 702 to by DATA. */ 703static svn_error_t * 704decode_window(svn_txdelta_window_t *window, svn_filesize_t sview_offset, 705 apr_size_t sview_len, apr_size_t tview_len, apr_size_t inslen, 706 apr_size_t newlen, const unsigned char *data, apr_pool_t *pool, 707 unsigned int version) 708{ 709 const unsigned char *insend; 710 int ninst; 711 apr_size_t npos; 712 svn_txdelta_op_t *ops, *op; 713 svn_string_t *new_data = apr_palloc(pool, sizeof(*new_data)); 714 715 window->sview_offset = sview_offset; 716 window->sview_len = sview_len; 717 window->tview_len = tview_len; 718 719 insend = data + inslen; 720 721 if (version == 1) 722 { 723 svn_stringbuf_t *instout = svn_stringbuf_create_empty(pool); 724 svn_stringbuf_t *ndout = svn_stringbuf_create_empty(pool); 725 726 SVN_ERR(zlib_decode(insend, newlen, ndout, 727 SVN_DELTA_WINDOW_SIZE)); 728 SVN_ERR(zlib_decode(data, insend - data, instout, 729 MAX_INSTRUCTION_SECTION_LEN)); 730 731 newlen = ndout->len; 732 data = (unsigned char *)instout->data; 733 insend = (unsigned char *)instout->data + instout->len; 734 735 new_data->data = (const char *) ndout->data; 736 new_data->len = newlen; 737 } 738 else 739 { 740 /* Copy the data because an svn_string_t must have the invariant 741 data[len]=='\0'. */ 742 char *buf = apr_palloc(pool, newlen + 1); 743 744 memcpy(buf, insend, newlen); 745 buf[newlen] = '\0'; 746 new_data->data = buf; 747 new_data->len = newlen; 748 } 749 750 /* Count the instructions and make sure they are all valid. */ 751 SVN_ERR(count_and_verify_instructions(&ninst, data, insend, 752 sview_len, tview_len, newlen)); 753 754 /* Allocate a buffer for the instructions and decode them. */ 755 ops = apr_palloc(pool, ninst * sizeof(*ops)); 756 npos = 0; 757 window->src_ops = 0; 758 for (op = ops; op < ops + ninst; op++) 759 { 760 data = decode_instruction(op, data, insend); 761 if (op->action_code == svn_txdelta_source) 762 ++window->src_ops; 763 else if (op->action_code == svn_txdelta_new) 764 { 765 op->offset = npos; 766 npos += op->length; 767 } 768 } 769 SVN_ERR_ASSERT(data == insend); 770 771 window->ops = ops; 772 window->num_ops = ninst; 773 window->new_data = new_data; 774 775 return SVN_NO_ERROR; 776} 777 778static svn_error_t * 779write_handler(void *baton, 780 const char *buffer, 781 apr_size_t *len) 782{ 783 struct decode_baton *db = (struct decode_baton *) baton; 784 const unsigned char *p, *end; 785 svn_filesize_t sview_offset; 786 apr_size_t sview_len, tview_len, inslen, newlen, remaining; 787 apr_size_t buflen = *len; 788 789 /* Chew up four bytes at the beginning for the header. */ 790 if (db->header_bytes < 4) 791 { 792 apr_size_t nheader = 4 - db->header_bytes; 793 if (nheader > buflen) 794 nheader = buflen; 795 if (memcmp(buffer, "SVN\0" + db->header_bytes, nheader) == 0) 796 db->version = 0; 797 else if (memcmp(buffer, "SVN\1" + db->header_bytes, nheader) == 0) 798 db->version = 1; 799 else 800 return svn_error_create(SVN_ERR_SVNDIFF_INVALID_HEADER, NULL, 801 _("Svndiff has invalid header")); 802 buflen -= nheader; 803 buffer += nheader; 804 db->header_bytes += nheader; 805 } 806 807 /* Concatenate the old with the new. */ 808 svn_stringbuf_appendbytes(db->buffer, buffer, buflen); 809 810 /* We have a buffer of svndiff data that might be good for: 811 812 a) an integral number of windows' worth of data - this is a 813 trivial case. Make windows from our data and ship them off. 814 815 b) a non-integral number of windows' worth of data - we shall 816 consume the integral portion of the window data, and then 817 somewhere in the following loop the decoding of the svndiff 818 data will run out of stuff to decode, and will simply return 819 SVN_NO_ERROR, anxiously awaiting more data. 820 */ 821 822 while (1) 823 { 824 apr_pool_t *newpool; 825 svn_txdelta_window_t window; 826 827 /* Read the header, if we have enough bytes for that. */ 828 p = (const unsigned char *) db->buffer->data; 829 end = (const unsigned char *) db->buffer->data + db->buffer->len; 830 831 p = decode_file_offset(&sview_offset, p, end); 832 if (p == NULL) 833 break; 834 835 p = decode_size(&sview_len, p, end); 836 if (p == NULL) 837 break; 838 839 p = decode_size(&tview_len, p, end); 840 if (p == NULL) 841 break; 842 843 p = decode_size(&inslen, p, end); 844 if (p == NULL) 845 break; 846 847 p = decode_size(&newlen, p, end); 848 if (p == NULL) 849 break; 850 851 if (tview_len > SVN_DELTA_WINDOW_SIZE || 852 sview_len > SVN_DELTA_WINDOW_SIZE || 853 /* for svndiff1, newlen includes the original length */ 854 newlen > SVN_DELTA_WINDOW_SIZE + MAX_ENCODED_INT_LEN || 855 inslen > MAX_INSTRUCTION_SECTION_LEN) 856 return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL, 857 _("Svndiff contains a too-large window")); 858 859 /* Check for integer overflow. */ 860 if (sview_offset < 0 || inslen + newlen < inslen 861 || sview_len + tview_len < sview_len 862 || (apr_size_t)sview_offset + sview_len < (apr_size_t)sview_offset) 863 return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL, 864 _("Svndiff contains corrupt window header")); 865 866 /* Check for source windows which slide backwards. */ 867 if (sview_len > 0 868 && (sview_offset < db->last_sview_offset 869 || (sview_offset + sview_len 870 < db->last_sview_offset + db->last_sview_len))) 871 return svn_error_create 872 (SVN_ERR_SVNDIFF_BACKWARD_VIEW, NULL, 873 _("Svndiff has backwards-sliding source views")); 874 875 /* Wait for more data if we don't have enough bytes for the 876 whole window. */ 877 if ((apr_size_t) (end - p) < inslen + newlen) 878 return SVN_NO_ERROR; 879 880 /* Decode the window and send it off. */ 881 SVN_ERR(decode_window(&window, sview_offset, sview_len, tview_len, 882 inslen, newlen, p, db->subpool, 883 db->version)); 884 SVN_ERR(db->consumer_func(&window, db->consumer_baton)); 885 886 /* Make a new subpool and buffer, saving aside the remaining 887 data in the old buffer. */ 888 newpool = svn_pool_create(db->pool); 889 p += inslen + newlen; 890 remaining = db->buffer->data + db->buffer->len - (const char *) p; 891 db->buffer = 892 svn_stringbuf_ncreate((const char *) p, remaining, newpool); 893 894 /* Remember the offset and length of the source view for next time. */ 895 db->last_sview_offset = sview_offset; 896 db->last_sview_len = sview_len; 897 898 /* We've copied stuff out of the old pool. Toss that pool and use 899 our new pool. 900 ### might be nice to avoid the copy and just use svn_pool_clear 901 ### to get rid of whatever the "other stuff" is. future project... 902 */ 903 svn_pool_destroy(db->subpool); 904 db->subpool = newpool; 905 } 906 907 /* At this point we processed all integral windows and DB->BUFFER is empty 908 or contains partially read window header. 909 Check that unprocessed data is not larger that theoretical maximum 910 window header size. */ 911 if (db->buffer->len > 5 * MAX_ENCODED_INT_LEN) 912 return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL, 913 _("Svndiff contains a too-large window header")); 914 915 return SVN_NO_ERROR; 916} 917 918/* Minimal svn_stream_t write handler, doing nothing */ 919static svn_error_t * 920noop_write_handler(void *baton, 921 const char *buffer, 922 apr_size_t *len) 923{ 924 return SVN_NO_ERROR; 925} 926 927static svn_error_t * 928close_handler(void *baton) 929{ 930 struct decode_baton *db = (struct decode_baton *) baton; 931 svn_error_t *err; 932 933 /* Make sure that we're at a plausible end of stream, returning an 934 error if we are expected to do so. */ 935 if ((db->error_on_early_close) 936 && (db->header_bytes < 4 || db->buffer->len != 0)) 937 return svn_error_create(SVN_ERR_SVNDIFF_UNEXPECTED_END, NULL, 938 _("Unexpected end of svndiff input")); 939 940 /* Tell the window consumer that we're done, and clean up. */ 941 err = db->consumer_func(NULL, db->consumer_baton); 942 svn_pool_destroy(db->pool); 943 return err; 944} 945 946 947svn_stream_t * 948svn_txdelta_parse_svndiff(svn_txdelta_window_handler_t handler, 949 void *handler_baton, 950 svn_boolean_t error_on_early_close, 951 apr_pool_t *pool) 952{ 953 apr_pool_t *subpool = svn_pool_create(pool); 954 struct decode_baton *db = apr_palloc(pool, sizeof(*db)); 955 svn_stream_t *stream; 956 957 db->consumer_func = handler; 958 db->consumer_baton = handler_baton; 959 db->pool = subpool; 960 db->subpool = svn_pool_create(subpool); 961 db->buffer = svn_stringbuf_create_empty(db->subpool); 962 db->last_sview_offset = 0; 963 db->last_sview_len = 0; 964 db->header_bytes = 0; 965 db->error_on_early_close = error_on_early_close; 966 stream = svn_stream_create(db, pool); 967 968 if (handler != svn_delta_noop_window_handler) 969 { 970 svn_stream_set_write(stream, write_handler); 971 svn_stream_set_close(stream, close_handler); 972 } 973 else 974 { 975 /* And else we just ignore everything as efficiently as we can. 976 by only hooking a no-op handler */ 977 svn_stream_set_write(stream, noop_write_handler); 978 } 979 return stream; 980} 981 982 983/* Routines for reading one svndiff window at a time. */ 984 985/* Read one byte from STREAM into *BYTE. */ 986static svn_error_t * 987read_one_byte(unsigned char *byte, svn_stream_t *stream) 988{ 989 char c; 990 apr_size_t len = 1; 991 992 SVN_ERR(svn_stream_read(stream, &c, &len)); 993 if (len == 0) 994 return svn_error_create(SVN_ERR_SVNDIFF_UNEXPECTED_END, NULL, 995 _("Unexpected end of svndiff input")); 996 *byte = (unsigned char) c; 997 return SVN_NO_ERROR; 998} 999 1000/* Read and decode one integer from STREAM into *SIZE. */ 1001static svn_error_t * 1002read_one_size(apr_size_t *size, svn_stream_t *stream) 1003{ 1004 unsigned char c; 1005 1006 *size = 0; 1007 while (1) 1008 { 1009 SVN_ERR(read_one_byte(&c, stream)); 1010 *size = (*size << 7) | (c & 0x7f); 1011 if (!(c & 0x80)) 1012 break; 1013 } 1014 return SVN_NO_ERROR; 1015} 1016 1017/* Read a window header from STREAM and check it for integer overflow. */ 1018static svn_error_t * 1019read_window_header(svn_stream_t *stream, svn_filesize_t *sview_offset, 1020 apr_size_t *sview_len, apr_size_t *tview_len, 1021 apr_size_t *inslen, apr_size_t *newlen) 1022{ 1023 unsigned char c; 1024 1025 /* Read the source view offset by hand, since it's not an apr_size_t. */ 1026 *sview_offset = 0; 1027 while (1) 1028 { 1029 SVN_ERR(read_one_byte(&c, stream)); 1030 *sview_offset = (*sview_offset << 7) | (c & 0x7f); 1031 if (!(c & 0x80)) 1032 break; 1033 } 1034 1035 /* Read the four size fields. */ 1036 SVN_ERR(read_one_size(sview_len, stream)); 1037 SVN_ERR(read_one_size(tview_len, stream)); 1038 SVN_ERR(read_one_size(inslen, stream)); 1039 SVN_ERR(read_one_size(newlen, stream)); 1040 1041 if (*tview_len > SVN_DELTA_WINDOW_SIZE || 1042 *sview_len > SVN_DELTA_WINDOW_SIZE || 1043 /* for svndiff1, newlen includes the original length */ 1044 *newlen > SVN_DELTA_WINDOW_SIZE + MAX_ENCODED_INT_LEN || 1045 *inslen > MAX_INSTRUCTION_SECTION_LEN) 1046 return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL, 1047 _("Svndiff contains a too-large window")); 1048 1049 /* Check for integer overflow. */ 1050 if (*sview_offset < 0 || *inslen + *newlen < *inslen 1051 || *sview_len + *tview_len < *sview_len 1052 || (apr_size_t)*sview_offset + *sview_len < (apr_size_t)*sview_offset) 1053 return svn_error_create(SVN_ERR_SVNDIFF_CORRUPT_WINDOW, NULL, 1054 _("Svndiff contains corrupt window header")); 1055 1056 return SVN_NO_ERROR; 1057} 1058 1059svn_error_t * 1060svn_txdelta_read_svndiff_window(svn_txdelta_window_t **window, 1061 svn_stream_t *stream, 1062 int svndiff_version, 1063 apr_pool_t *pool) 1064{ 1065 svn_filesize_t sview_offset; 1066 apr_size_t sview_len, tview_len, inslen, newlen, len; 1067 unsigned char *buf; 1068 1069 SVN_ERR(read_window_header(stream, &sview_offset, &sview_len, &tview_len, 1070 &inslen, &newlen)); 1071 len = inslen + newlen; 1072 buf = apr_palloc(pool, len); 1073 SVN_ERR(svn_stream_read(stream, (char*)buf, &len)); 1074 if (len < inslen + newlen) 1075 return svn_error_create(SVN_ERR_SVNDIFF_UNEXPECTED_END, NULL, 1076 _("Unexpected end of svndiff input")); 1077 *window = apr_palloc(pool, sizeof(**window)); 1078 return decode_window(*window, sview_offset, sview_len, tview_len, inslen, 1079 newlen, buf, pool, svndiff_version); 1080} 1081 1082 1083svn_error_t * 1084svn_txdelta_skip_svndiff_window(apr_file_t *file, 1085 int svndiff_version, 1086 apr_pool_t *pool) 1087{ 1088 svn_stream_t *stream = svn_stream_from_aprfile2(file, TRUE, pool); 1089 svn_filesize_t sview_offset; 1090 apr_size_t sview_len, tview_len, inslen, newlen; 1091 apr_off_t offset; 1092 1093 SVN_ERR(read_window_header(stream, &sview_offset, &sview_len, &tview_len, 1094 &inslen, &newlen)); 1095 1096 offset = inslen + newlen; 1097 return svn_io_file_seek(file, APR_CUR, &offset, pool); 1098} 1099 1100 1101svn_error_t * 1102svn__compress(svn_string_t *in, 1103 svn_stringbuf_t *out, 1104 int compression_level) 1105{ 1106 return zlib_encode(in->data, in->len, out, compression_level); 1107} 1108 1109svn_error_t * 1110svn__decompress(svn_string_t *in, 1111 svn_stringbuf_t *out, 1112 apr_size_t limit) 1113{ 1114 return zlib_decode((const unsigned char*)in->data, in->len, out, limit); 1115} 1116