parse-diff.c revision 362181
1/* 2 * parse-diff.c: functions for parsing diff files 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24#include <stdlib.h> 25#include <stddef.h> 26#include <string.h> 27 28#include "svn_hash.h" 29#include "svn_types.h" 30#include "svn_error.h" 31#include "svn_io.h" 32#include "svn_pools.h" 33#include "svn_props.h" 34#include "svn_string.h" 35#include "svn_utf.h" 36#include "svn_dirent_uri.h" 37#include "svn_diff.h" 38#include "svn_ctype.h" 39#include "svn_mergeinfo.h" 40 41#include "private/svn_eol_private.h" 42#include "private/svn_dep_compat.h" 43#include "private/svn_diff_private.h" 44#include "private/svn_sorts_private.h" 45 46#include "diff.h" 47 48#include "svn_private_config.h" 49 50/* Helper macro for readability */ 51#define starts_with(str, start) \ 52 (strncmp((str), (start), strlen(start)) == 0) 53 54/* Like strlen() but for string literals. */ 55#define STRLEN_LITERAL(str) (sizeof(str) - 1) 56 57/* This struct describes a range within a file, as well as the 58 * current cursor position within the range. All numbers are in bytes. */ 59struct svn_diff__hunk_range { 60 apr_off_t start; 61 apr_off_t end; 62 apr_off_t current; 63}; 64 65struct svn_diff_hunk_t { 66 /* The patch this hunk belongs to. */ 67 const svn_patch_t *patch; 68 69 /* APR file handle to the patch file this hunk came from. */ 70 apr_file_t *apr_file; 71 72 /* Whether the hunk was interpreted as pretty-print mergeinfo. If so, 73 the hunk content is in PATCH and the rest of this hunk object is 74 mostly uninitialized. */ 75 svn_boolean_t is_pretty_print_mergeinfo; 76 77 /* Ranges used to keep track of this hunk's texts positions within 78 * the patch file. */ 79 struct svn_diff__hunk_range diff_text_range; 80 struct svn_diff__hunk_range original_text_range; 81 struct svn_diff__hunk_range modified_text_range; 82 83 /* Hunk ranges as they appeared in the patch file. 84 * All numbers are lines, not bytes. */ 85 svn_linenum_t original_start; 86 svn_linenum_t original_length; 87 svn_linenum_t modified_start; 88 svn_linenum_t modified_length; 89 90 /* Number of lines of leading and trailing hunk context. */ 91 svn_linenum_t leading_context; 92 svn_linenum_t trailing_context; 93 94 /* Did we see a 'file does not end with eol' marker in this hunk? */ 95 svn_boolean_t original_no_final_eol; 96 svn_boolean_t modified_no_final_eol; 97 98 /* Fuzz penalty, triggered by bad patch targets */ 99 svn_linenum_t original_fuzz; 100 svn_linenum_t modified_fuzz; 101}; 102 103struct svn_diff_binary_patch_t { 104 /* The patch this hunk belongs to. */ 105 const svn_patch_t *patch; 106 107 /* APR file handle to the patch file this hunk came from. */ 108 apr_file_t *apr_file; 109 110 /* Offsets inside APR_FILE representing the location of the patch */ 111 apr_off_t src_start; 112 apr_off_t src_end; 113 svn_filesize_t src_filesize; /* Expanded/final size */ 114 115 /* Offsets inside APR_FILE representing the location of the patch */ 116 apr_off_t dst_start; 117 apr_off_t dst_end; 118 svn_filesize_t dst_filesize; /* Expanded/final size */ 119}; 120 121/* Common guts of svn_diff_hunk__create_adds_single_line() and 122 * svn_diff_hunk__create_deletes_single_line(). 123 * 124 * ADD is TRUE if adding and FALSE if deleting. 125 */ 126static svn_error_t * 127add_or_delete_single_line(svn_diff_hunk_t **hunk_out, 128 const char *line, 129 const svn_patch_t *patch, 130 svn_boolean_t add, 131 apr_pool_t *result_pool, 132 apr_pool_t *scratch_pool) 133{ 134 svn_diff_hunk_t *hunk = apr_pcalloc(result_pool, sizeof(*hunk)); 135 static const char *hunk_header[] = { "@@ -1 +0,0 @@\n", "@@ -0,0 +1 @@\n" }; 136 const apr_size_t header_len = strlen(hunk_header[add]); 137 const apr_size_t len = strlen(line); 138 const apr_size_t end = header_len + (1 + len); /* The +1 is for the \n. */ 139 svn_stringbuf_t *buf = svn_stringbuf_create_ensure(end + 1, scratch_pool); 140 141 hunk->patch = patch; 142 143 /* hunk->apr_file is created below. */ 144 145 hunk->diff_text_range.start = header_len; 146 hunk->diff_text_range.current = header_len; 147 148 if (add) 149 { 150 hunk->original_text_range.start = 0; /* There's no "original" text. */ 151 hunk->original_text_range.current = 0; 152 hunk->original_text_range.end = 0; 153 hunk->original_no_final_eol = FALSE; 154 155 hunk->modified_text_range.start = header_len; 156 hunk->modified_text_range.current = header_len; 157 hunk->modified_text_range.end = end; 158 hunk->modified_no_final_eol = TRUE; 159 160 hunk->original_start = 0; 161 hunk->original_length = 0; 162 163 hunk->modified_start = 1; 164 hunk->modified_length = 1; 165 } 166 else /* delete */ 167 { 168 hunk->original_text_range.start = header_len; 169 hunk->original_text_range.current = header_len; 170 hunk->original_text_range.end = end; 171 hunk->original_no_final_eol = TRUE; 172 173 hunk->modified_text_range.start = 0; /* There's no "original" text. */ 174 hunk->modified_text_range.current = 0; 175 hunk->modified_text_range.end = 0; 176 hunk->modified_no_final_eol = FALSE; 177 178 hunk->original_start = 1; 179 hunk->original_length = 1; 180 181 hunk->modified_start = 0; 182 hunk->modified_length = 0; /* setting to '1' works too */ 183 } 184 185 hunk->leading_context = 0; 186 hunk->trailing_context = 0; 187 188 /* Create APR_FILE and put just a hunk in it (without a diff header). 189 * Save the offset of the last byte of the diff line. */ 190 svn_stringbuf_appendbytes(buf, hunk_header[add], header_len); 191 svn_stringbuf_appendbyte(buf, add ? '+' : '-'); 192 svn_stringbuf_appendbytes(buf, line, len); 193 svn_stringbuf_appendbyte(buf, '\n'); 194 svn_stringbuf_appendcstr(buf, "\\ No newline at end of hunk\n"); 195 196 hunk->diff_text_range.end = buf->len; 197 198 SVN_ERR(svn_io_open_unique_file3(&hunk->apr_file, NULL /* filename */, 199 NULL /* system tempdir */, 200 svn_io_file_del_on_pool_cleanup, 201 result_pool, scratch_pool)); 202 SVN_ERR(svn_io_file_write_full(hunk->apr_file, 203 buf->data, buf->len, 204 NULL, scratch_pool)); 205 /* No need to seek. */ 206 207 *hunk_out = hunk; 208 return SVN_NO_ERROR; 209} 210 211svn_error_t * 212svn_diff_hunk__create_adds_single_line(svn_diff_hunk_t **hunk_out, 213 const char *line, 214 const svn_patch_t *patch, 215 apr_pool_t *result_pool, 216 apr_pool_t *scratch_pool) 217{ 218 SVN_ERR(add_or_delete_single_line(hunk_out, line, patch, 219 (!patch->reverse), 220 result_pool, scratch_pool)); 221 return SVN_NO_ERROR; 222} 223 224svn_error_t * 225svn_diff_hunk__create_deletes_single_line(svn_diff_hunk_t **hunk_out, 226 const char *line, 227 const svn_patch_t *patch, 228 apr_pool_t *result_pool, 229 apr_pool_t *scratch_pool) 230{ 231 SVN_ERR(add_or_delete_single_line(hunk_out, line, patch, 232 patch->reverse, 233 result_pool, scratch_pool)); 234 return SVN_NO_ERROR; 235} 236 237void 238svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk) 239{ 240 hunk->diff_text_range.current = hunk->diff_text_range.start; 241} 242 243void 244svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk) 245{ 246 if (hunk->patch->reverse) 247 hunk->modified_text_range.current = hunk->modified_text_range.start; 248 else 249 hunk->original_text_range.current = hunk->original_text_range.start; 250} 251 252void 253svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk) 254{ 255 if (hunk->patch->reverse) 256 hunk->original_text_range.current = hunk->original_text_range.start; 257 else 258 hunk->modified_text_range.current = hunk->modified_text_range.start; 259} 260 261svn_linenum_t 262svn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk) 263{ 264 return hunk->patch->reverse ? hunk->modified_start : hunk->original_start; 265} 266 267svn_linenum_t 268svn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk) 269{ 270 return hunk->patch->reverse ? hunk->modified_length : hunk->original_length; 271} 272 273svn_linenum_t 274svn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk) 275{ 276 return hunk->patch->reverse ? hunk->original_start : hunk->modified_start; 277} 278 279svn_linenum_t 280svn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk) 281{ 282 return hunk->patch->reverse ? hunk->original_length : hunk->modified_length; 283} 284 285svn_linenum_t 286svn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk) 287{ 288 return hunk->leading_context; 289} 290 291svn_linenum_t 292svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk) 293{ 294 return hunk->trailing_context; 295} 296 297svn_linenum_t 298svn_diff_hunk__get_fuzz_penalty(const svn_diff_hunk_t *hunk) 299{ 300 return hunk->patch->reverse ? hunk->original_fuzz : hunk->modified_fuzz; 301} 302 303/* Baton for the base85 stream implementation */ 304struct base85_baton_t 305{ 306 apr_file_t *file; 307 apr_pool_t *iterpool; 308 char buffer[52]; /* Bytes on current line */ 309 apr_off_t next_pos; /* Start position of next line */ 310 apr_off_t end_pos; /* Position after last line */ 311 apr_size_t buf_size; /* Bytes available (52 unless at eof) */ 312 apr_size_t buf_pos; /* Bytes in linebuffer */ 313 svn_boolean_t done; /* At eof? */ 314}; 315 316/* Implements svn_read_fn_t for the base85 read stream */ 317static svn_error_t * 318read_handler_base85(void *baton, char *buffer, apr_size_t *len) 319{ 320 struct base85_baton_t *b85b = baton; 321 apr_pool_t *iterpool = b85b->iterpool; 322 apr_size_t remaining = *len; 323 char *dest = buffer; 324 325 svn_pool_clear(iterpool); 326 327 if (b85b->done) 328 { 329 *len = 0; 330 return SVN_NO_ERROR; 331 } 332 333 while (remaining && (b85b->buf_size > b85b->buf_pos 334 || b85b->next_pos < b85b->end_pos)) 335 { 336 svn_stringbuf_t *line; 337 svn_boolean_t at_eof; 338 339 apr_size_t available = b85b->buf_size - b85b->buf_pos; 340 if (available) 341 { 342 apr_size_t n = (remaining < available) ? remaining : available; 343 344 memcpy(dest, b85b->buffer + b85b->buf_pos, n); 345 dest += n; 346 remaining -= n; 347 b85b->buf_pos += n; 348 349 if (!remaining) 350 return SVN_NO_ERROR; /* *len = OK */ 351 } 352 353 if (b85b->next_pos >= b85b->end_pos) 354 break; /* At EOF */ 355 SVN_ERR(svn_io_file_seek(b85b->file, APR_SET, &b85b->next_pos, 356 iterpool)); 357 SVN_ERR(svn_io_file_readline(b85b->file, &line, NULL, &at_eof, 358 APR_SIZE_MAX, iterpool, iterpool)); 359 if (at_eof) 360 b85b->next_pos = b85b->end_pos; 361 else 362 { 363 SVN_ERR(svn_io_file_get_offset(&b85b->next_pos, b85b->file, 364 iterpool)); 365 } 366 367 if (line->len && line->data[0] >= 'A' && line->data[0] <= 'Z') 368 b85b->buf_size = line->data[0] - 'A' + 1; 369 else if (line->len && line->data[0] >= 'a' && line->data[0] <= 'z') 370 b85b->buf_size = line->data[0] - 'a' + 26 + 1; 371 else 372 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL, 373 _("Unexpected data in base85 section")); 374 375 if (b85b->buf_size < 52) 376 b85b->next_pos = b85b->end_pos; /* Handle as EOF */ 377 378 SVN_ERR(svn_diff__base85_decode_line(b85b->buffer, b85b->buf_size, 379 line->data + 1, line->len - 1, 380 iterpool)); 381 b85b->buf_pos = 0; 382 } 383 384 *len -= remaining; 385 b85b->done = TRUE; 386 387 return SVN_NO_ERROR; 388} 389 390/* Implements svn_close_fn_t for the base85 read stream */ 391static svn_error_t * 392close_handler_base85(void *baton) 393{ 394 struct base85_baton_t *b85b = baton; 395 396 svn_pool_destroy(b85b->iterpool); 397 398 return SVN_NO_ERROR; 399} 400 401/* Gets a stream that reads decoded base85 data from a segment of a file. 402 The current implementation might assume that both start_pos and end_pos 403 are located at line boundaries. */ 404static svn_stream_t * 405get_base85_data_stream(apr_file_t *file, 406 apr_off_t start_pos, 407 apr_off_t end_pos, 408 apr_pool_t *result_pool) 409{ 410 struct base85_baton_t *b85b = apr_pcalloc(result_pool, sizeof(*b85b)); 411 svn_stream_t *base85s = svn_stream_create(b85b, result_pool); 412 413 b85b->file = file; 414 b85b->iterpool = svn_pool_create(result_pool); 415 b85b->next_pos = start_pos; 416 b85b->end_pos = end_pos; 417 418 svn_stream_set_read2(base85s, NULL /* only full read support */, 419 read_handler_base85); 420 svn_stream_set_close(base85s, close_handler_base85); 421 return base85s; 422} 423 424/* Baton for the length verification stream functions */ 425struct length_verify_baton_t 426{ 427 svn_stream_t *inner; 428 svn_filesize_t remaining; 429}; 430 431/* Implements svn_read_fn_t for the length verification stream */ 432static svn_error_t * 433read_handler_length_verify(void *baton, char *buffer, apr_size_t *len) 434{ 435 struct length_verify_baton_t *lvb = baton; 436 apr_size_t requested_len = *len; 437 438 SVN_ERR(svn_stream_read_full(lvb->inner, buffer, len)); 439 440 if (*len > lvb->remaining) 441 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL, 442 _("Base85 data expands to longer than declared " 443 "filesize")); 444 else if (requested_len > *len && *len != lvb->remaining) 445 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL, 446 _("Base85 data expands to smaller than declared " 447 "filesize")); 448 449 lvb->remaining -= *len; 450 451 return SVN_NO_ERROR; 452} 453 454/* Implements svn_close_fn_t for the length verification stream */ 455static svn_error_t * 456close_handler_length_verify(void *baton) 457{ 458 struct length_verify_baton_t *lvb = baton; 459 460 return svn_error_trace(svn_stream_close(lvb->inner)); 461} 462 463/* Gets a stream that verifies on reads that the inner stream is exactly 464 of the specified length */ 465static svn_stream_t * 466get_verify_length_stream(svn_stream_t *inner, 467 svn_filesize_t expected_size, 468 apr_pool_t *result_pool) 469{ 470 struct length_verify_baton_t *lvb = apr_palloc(result_pool, sizeof(*lvb)); 471 svn_stream_t *len_stream = svn_stream_create(lvb, result_pool); 472 473 lvb->inner = inner; 474 lvb->remaining = expected_size; 475 476 svn_stream_set_read2(len_stream, NULL /* only full read support */, 477 read_handler_length_verify); 478 svn_stream_set_close(len_stream, close_handler_length_verify); 479 480 return len_stream; 481} 482 483svn_stream_t * 484svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t *bpatch, 485 apr_pool_t *result_pool) 486{ 487 svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->src_start, 488 bpatch->src_end, result_pool); 489 490 s = svn_stream_compressed(s, result_pool); 491 492 /* ### If we (ever) want to support the DELTA format, then we should hook the 493 undelta handling here */ 494 495 return get_verify_length_stream(s, bpatch->src_filesize, result_pool); 496} 497 498svn_stream_t * 499svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t *bpatch, 500 apr_pool_t *result_pool) 501{ 502 svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->dst_start, 503 bpatch->dst_end, result_pool); 504 505 s = svn_stream_compressed(s, result_pool); 506 507 /* ### If we (ever) want to support the DELTA format, then we should hook the 508 undelta handling here */ 509 510 return get_verify_length_stream(s, bpatch->dst_filesize, result_pool); 511} 512 513/* Try to parse a positive number from a decimal number encoded 514 * in the string NUMBER. Return parsed number in OFFSET, and return 515 * TRUE if parsing was successful. */ 516static svn_boolean_t 517parse_offset(svn_linenum_t *offset, const char *number) 518{ 519 svn_error_t *err; 520 apr_uint64_t val; 521 522 err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10); 523 if (err) 524 { 525 svn_error_clear(err); 526 return FALSE; 527 } 528 529 *offset = (svn_linenum_t)val; 530 531 return TRUE; 532} 533 534/* Try to parse a hunk range specification from the string RANGE. 535 * Return parsed information in *START and *LENGTH, and return TRUE 536 * if the range parsed correctly. Note: This function may modify the 537 * input value RANGE. */ 538static svn_boolean_t 539parse_range(svn_linenum_t *start, svn_linenum_t *length, char *range) 540{ 541 char *comma; 542 543 if (*range == 0) 544 return FALSE; 545 546 comma = strstr(range, ","); 547 if (comma) 548 { 549 if (strlen(comma + 1) > 0) 550 { 551 /* Try to parse the length. */ 552 if (! parse_offset(length, comma + 1)) 553 return FALSE; 554 555 /* Snip off the end of the string, 556 * so we can comfortably parse the line 557 * number the hunk starts at. */ 558 *comma = '\0'; 559 } 560 else 561 /* A comma but no length? */ 562 return FALSE; 563 } 564 else 565 { 566 *length = 1; 567 } 568 569 /* Try to parse the line number the hunk starts at. */ 570 return parse_offset(start, range); 571} 572 573/* Try to parse a hunk header in string HEADER, putting parsed information 574 * into HUNK. Return TRUE if the header parsed correctly. ATAT is the 575 * character string used to delimit the hunk header. 576 * Do all allocations in POOL. */ 577static svn_boolean_t 578parse_hunk_header(const char *header, svn_diff_hunk_t *hunk, 579 const char *atat, apr_pool_t *pool) 580{ 581 const char *p; 582 const char *start; 583 svn_stringbuf_t *range; 584 585 p = header + strlen(atat); 586 if (*p != ' ') 587 /* No. */ 588 return FALSE; 589 p++; 590 if (*p != '-') 591 /* Nah... */ 592 return FALSE; 593 /* OK, this may be worth allocating some memory for... */ 594 range = svn_stringbuf_create_ensure(31, pool); 595 start = ++p; 596 while (*p && *p != ' ') 597 { 598 p++; 599 } 600 601 if (*p != ' ') 602 /* No no no... */ 603 return FALSE; 604 605 svn_stringbuf_appendbytes(range, start, p - start); 606 607 /* Try to parse the first range. */ 608 if (! parse_range(&hunk->original_start, &hunk->original_length, range->data)) 609 return FALSE; 610 611 /* Clear the stringbuf so we can reuse it for the second range. */ 612 svn_stringbuf_setempty(range); 613 p++; 614 if (*p != '+') 615 /* Eeek! */ 616 return FALSE; 617 /* OK, this may be worth copying... */ 618 start = ++p; 619 while (*p && *p != ' ') 620 { 621 p++; 622 } 623 if (*p != ' ') 624 /* No no no... */ 625 return FALSE; 626 627 svn_stringbuf_appendbytes(range, start, p - start); 628 629 /* Check for trailing @@ */ 630 p++; 631 if (! starts_with(p, atat)) 632 return FALSE; 633 634 /* There may be stuff like C-function names after the trailing @@, 635 * but we ignore that. */ 636 637 /* Try to parse the second range. */ 638 if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data)) 639 return FALSE; 640 641 /* Hunk header is good. */ 642 return TRUE; 643} 644 645/* Read a line of original or modified hunk text from the specified 646 * RANGE within FILE. FILE is expected to contain unidiff text. 647 * Leading unidiff symbols ('+', '-', and ' ') are removed from the line, 648 * Any lines commencing with the VERBOTEN character are discarded. 649 * VERBOTEN should be '+' or '-', depending on which form of hunk text 650 * is being read. NO_FINAL_EOL declares if the hunk contains a no final 651 * EOL marker. 652 * 653 * All other parameters are as in svn_diff_hunk_readline_original_text() 654 * and svn_diff_hunk_readline_modified_text(). 655 */ 656static svn_error_t * 657hunk_readline_original_or_modified(apr_file_t *file, 658 struct svn_diff__hunk_range *range, 659 svn_stringbuf_t **stringbuf, 660 const char **eol, 661 svn_boolean_t *eof, 662 char verboten, 663 svn_boolean_t no_final_eol, 664 apr_pool_t *result_pool, 665 apr_pool_t *scratch_pool) 666{ 667 apr_size_t max_len; 668 svn_boolean_t filtered; 669 apr_off_t pos; 670 svn_stringbuf_t *str; 671 const char *eol_p; 672 apr_pool_t *last_pool; 673 674 if (!eol) 675 eol = &eol_p; 676 677 if (range->current >= range->end) 678 { 679 /* We're past the range. Indicate that no bytes can be read. */ 680 *eof = TRUE; 681 *eol = NULL; 682 *stringbuf = svn_stringbuf_create_empty(result_pool); 683 return SVN_NO_ERROR; 684 } 685 686 SVN_ERR(svn_io_file_get_offset(&pos, file, scratch_pool)); 687 SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool)); 688 689 /* It's not ITERPOOL because we use data allocated in LAST_POOL out 690 of the loop. */ 691 last_pool = svn_pool_create(scratch_pool); 692 do 693 { 694 svn_pool_clear(last_pool); 695 696 max_len = range->end - range->current; 697 SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len, 698 last_pool, last_pool)); 699 SVN_ERR(svn_io_file_get_offset(&range->current, file, last_pool)); 700 filtered = (str->data[0] == verboten || str->data[0] == '\\'); 701 } 702 while (filtered && ! *eof); 703 704 if (filtered) 705 { 706 /* EOF, return an empty string. */ 707 *stringbuf = svn_stringbuf_create_ensure(0, result_pool); 708 *eol = NULL; 709 } 710 else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ') 711 { 712 /* Shave off leading unidiff symbols. */ 713 *stringbuf = svn_stringbuf_create(str->data + 1, result_pool); 714 } 715 else 716 { 717 /* Return the line as-is. Handle as a chopped leading spaces */ 718 *stringbuf = svn_stringbuf_dup(str, result_pool); 719 } 720 721 if (!filtered && *eof && !*eol && *str->data) 722 { 723 /* Ok, we miss a final EOL in the patch file, but didn't see a 724 no eol marker line. 725 726 We should report that we had an EOL or the patch code will 727 misbehave (and it knows nothing about no eol markers) */ 728 729 if (!no_final_eol && eol != &eol_p) 730 { 731 apr_off_t start = 0; 732 733 SVN_ERR(svn_io_file_seek(file, APR_SET, &start, scratch_pool)); 734 735 SVN_ERR(svn_io_file_readline(file, &str, eol, NULL, APR_SIZE_MAX, 736 scratch_pool, scratch_pool)); 737 738 /* Every patch file that has hunks has at least one EOL*/ 739 SVN_ERR_ASSERT(*eol != NULL); 740 } 741 742 *eof = FALSE; 743 /* Fall through to seek back to the right location */ 744 } 745 SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool)); 746 747 svn_pool_destroy(last_pool); 748 return SVN_NO_ERROR; 749} 750 751svn_error_t * 752svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk, 753 svn_stringbuf_t **stringbuf, 754 const char **eol, 755 svn_boolean_t *eof, 756 apr_pool_t *result_pool, 757 apr_pool_t *scratch_pool) 758{ 759 return svn_error_trace( 760 hunk_readline_original_or_modified(hunk->apr_file, 761 hunk->patch->reverse ? 762 &hunk->modified_text_range : 763 &hunk->original_text_range, 764 stringbuf, eol, eof, 765 hunk->patch->reverse ? '-' : '+', 766 hunk->patch->reverse 767 ? hunk->modified_no_final_eol 768 : hunk->original_no_final_eol, 769 result_pool, scratch_pool)); 770} 771 772svn_error_t * 773svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk, 774 svn_stringbuf_t **stringbuf, 775 const char **eol, 776 svn_boolean_t *eof, 777 apr_pool_t *result_pool, 778 apr_pool_t *scratch_pool) 779{ 780 return svn_error_trace( 781 hunk_readline_original_or_modified(hunk->apr_file, 782 hunk->patch->reverse ? 783 &hunk->original_text_range : 784 &hunk->modified_text_range, 785 stringbuf, eol, eof, 786 hunk->patch->reverse ? '+' : '-', 787 hunk->patch->reverse 788 ? hunk->original_no_final_eol 789 : hunk->modified_no_final_eol, 790 result_pool, scratch_pool)); 791} 792 793svn_error_t * 794svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk, 795 svn_stringbuf_t **stringbuf, 796 const char **eol, 797 svn_boolean_t *eof, 798 apr_pool_t *result_pool, 799 apr_pool_t *scratch_pool) 800{ 801 svn_stringbuf_t *line; 802 apr_size_t max_len; 803 apr_off_t pos; 804 const char *eol_p; 805 806 if (!eol) 807 eol = &eol_p; 808 809 if (hunk->diff_text_range.current >= hunk->diff_text_range.end) 810 { 811 /* We're past the range. Indicate that no bytes can be read. */ 812 *eof = TRUE; 813 *eol = NULL; 814 *stringbuf = svn_stringbuf_create_empty(result_pool); 815 return SVN_NO_ERROR; 816 } 817 818 SVN_ERR(svn_io_file_get_offset(&pos, hunk->apr_file, scratch_pool)); 819 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, 820 &hunk->diff_text_range.current, scratch_pool)); 821 max_len = hunk->diff_text_range.end - hunk->diff_text_range.current; 822 SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len, 823 result_pool, 824 scratch_pool)); 825 SVN_ERR(svn_io_file_get_offset(&hunk->diff_text_range.current, 826 hunk->apr_file, scratch_pool)); 827 828 if (*eof && !*eol && *line->data) 829 { 830 /* Ok, we miss a final EOL in the patch file, but didn't see a 831 no eol marker line. 832 833 We should report that we had an EOL or the patch code will 834 misbehave (and it knows nothing about no eol markers) */ 835 836 if (eol != &eol_p) 837 { 838 /* Lets pick the first eol we find in our patch file */ 839 apr_off_t start = 0; 840 svn_stringbuf_t *str; 841 842 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &start, 843 scratch_pool)); 844 845 SVN_ERR(svn_io_file_readline(hunk->apr_file, &str, eol, NULL, 846 APR_SIZE_MAX, 847 scratch_pool, scratch_pool)); 848 849 /* Every patch file that has hunks has at least one EOL*/ 850 SVN_ERR_ASSERT(*eol != NULL); 851 } 852 853 *eof = FALSE; 854 855 /* Fall through to seek back to the right location */ 856 } 857 858 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool)); 859 860 if (hunk->patch->reverse) 861 { 862 if (line->data[0] == '+') 863 line->data[0] = '-'; 864 else if (line->data[0] == '-') 865 line->data[0] = '+'; 866 } 867 868 *stringbuf = line; 869 870 return SVN_NO_ERROR; 871} 872 873/* Parse *PROP_NAME from HEADER as the part after the INDICATOR line. 874 * Allocate *PROP_NAME in RESULT_POOL. 875 * Set *PROP_NAME to NULL if no valid property name was found. */ 876static svn_error_t * 877parse_prop_name(const char **prop_name, const char *header, 878 const char *indicator, apr_pool_t *result_pool) 879{ 880 SVN_ERR(svn_utf_cstring_to_utf8(prop_name, 881 header + strlen(indicator), 882 result_pool)); 883 if (**prop_name == '\0') 884 *prop_name = NULL; 885 else if (! svn_prop_name_is_valid(*prop_name)) 886 { 887 svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool); 888 svn_stringbuf_strip_whitespace(buf); 889 *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL); 890 } 891 892 return SVN_NO_ERROR; 893} 894 895 896/* A helper function to parse svn:mergeinfo diffs. 897 * 898 * These diffs use a special pretty-print format, for instance: 899 * 900 * Added: svn:mergeinfo 901 * ## -0,0 +0,1 ## 902 * Merged /trunk:r2-3 903 * 904 * The hunk header has the following format: 905 * ## -0,NUMBER_OF_REVERSE_MERGES +0,NUMBER_OF_FORWARD_MERGES ## 906 * 907 * The header is followed by a list of mergeinfo, one path per line. 908 * This function parses such lines. Lines describing reverse merges 909 * appear first, and then all lines describing forward merges appear. 910 * 911 * Parts of the line are affected by i18n. The words 'Merged' 912 * and 'Reverse-merged' can appear in any language and at any 913 * position within the line. We can only assume that a leading 914 * '/' starts the merge source path, the path is followed by 915 * ":r", which in turn is followed by a mergeinfo revision range, 916 * which is terminated by whitespace or end-of-string. 917 * 918 * *NUMBER_OF_REVERSE_MERGES and *NUMBER_OF_FORWARD_MERGES are the 919 * numbers of reverse and forward merges remaining to be read. This 920 * function decrements *NUMBER_OF_REVERSE_MERGES for each LINE 921 * parsed until that is zero, then *NUMBER_OF_FORWARD_MERGES for 922 * each LINE parsed until that is zero. If both are zero, it parses 923 * and discards LINE. 924 * 925 * If LINE is successfully parsed, *FOUND_MERGEINFO is set to TRUE, 926 * otherwise to FALSE. 927 * 928 * If LINE is successfully parsed and counted, the resulting mergeinfo 929 * is added to PATCH->mergeinfo or PATCH->reverse_mergeinfo. 930 */ 931static svn_error_t * 932parse_pretty_mergeinfo_line(svn_boolean_t *found_mergeinfo, 933 svn_linenum_t *number_of_reverse_merges, 934 svn_linenum_t *number_of_forward_merges, 935 svn_stringbuf_t *line, 936 svn_patch_t *patch, 937 apr_pool_t *result_pool, 938 apr_pool_t *scratch_pool) 939{ 940 char *slash = strchr(line->data, '/'); 941 char *colon = strrchr(line->data, ':'); 942 943 *found_mergeinfo = FALSE; 944 945 if (slash && colon && colon[1] == 'r' && slash < colon) 946 { 947 svn_stringbuf_t *input; 948 svn_mergeinfo_t mergeinfo = NULL; 949 char *s; 950 svn_error_t *err; 951 952 input = svn_stringbuf_create_ensure(line->len, scratch_pool); 953 954 /* Copy the merge source path + colon */ 955 s = slash; 956 while (s <= colon) 957 { 958 svn_stringbuf_appendbyte(input, *s); 959 s++; 960 } 961 962 /* skip 'r' after colon */ 963 s++; 964 965 /* Copy the revision range. */ 966 while (s < line->data + line->len) 967 { 968 if (svn_ctype_isspace(*s)) 969 break; 970 svn_stringbuf_appendbyte(input, *s); 971 s++; 972 } 973 974 err = svn_mergeinfo_parse(&mergeinfo, input->data, result_pool); 975 if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR) 976 { 977 svn_error_clear(err); 978 mergeinfo = NULL; 979 } 980 else 981 SVN_ERR(err); 982 983 if (mergeinfo) 984 { 985 if (*number_of_reverse_merges > 0) /* reverse merges */ 986 { 987 if (patch->reverse) 988 { 989 if (patch->mergeinfo == NULL) 990 patch->mergeinfo = mergeinfo; 991 else 992 SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo, 993 mergeinfo, 994 result_pool, 995 scratch_pool)); 996 } 997 else 998 { 999 if (patch->reverse_mergeinfo == NULL) 1000 patch->reverse_mergeinfo = mergeinfo; 1001 else 1002 SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo, 1003 mergeinfo, 1004 result_pool, 1005 scratch_pool)); 1006 } 1007 (*number_of_reverse_merges)--; 1008 } 1009 else if (number_of_forward_merges > 0) /* forward merges */ 1010 { 1011 if (patch->reverse) 1012 { 1013 if (patch->reverse_mergeinfo == NULL) 1014 patch->reverse_mergeinfo = mergeinfo; 1015 else 1016 SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo, 1017 mergeinfo, 1018 result_pool, 1019 scratch_pool)); 1020 } 1021 else 1022 { 1023 if (patch->mergeinfo == NULL) 1024 patch->mergeinfo = mergeinfo; 1025 else 1026 SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo, 1027 mergeinfo, 1028 result_pool, 1029 scratch_pool)); 1030 } 1031 (*number_of_forward_merges)--; 1032 } 1033 1034 *found_mergeinfo = TRUE; 1035 } 1036 } 1037 1038 return SVN_NO_ERROR; 1039} 1040 1041/* Return the next *HUNK from a PATCH in APR_FILE. 1042 * If no hunk can be found, set *HUNK to NULL. 1043 * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK 1044 * is the first belonging to a certain property, then PROP_NAME and 1045 * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be 1046 * NULL. If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be 1047 * treated as context lines. Allocate results in RESULT_POOL. 1048 * Use SCRATCH_POOL for all other allocations. */ 1049static svn_error_t * 1050parse_next_hunk(svn_diff_hunk_t **hunk, 1051 svn_boolean_t *is_property, 1052 const char **prop_name, 1053 svn_diff_operation_kind_t *prop_operation, 1054 svn_patch_t *patch, 1055 apr_file_t *apr_file, 1056 svn_boolean_t ignore_whitespace, 1057 apr_pool_t *result_pool, 1058 apr_pool_t *scratch_pool) 1059{ 1060 static const char * const minus = "--- "; 1061 static const char * const text_atat = "@@"; 1062 static const char * const prop_atat = "##"; 1063 svn_stringbuf_t *line; 1064 svn_boolean_t eof, in_hunk, hunk_seen; 1065 apr_off_t pos, last_line; 1066 apr_off_t start, end; 1067 apr_off_t original_end; 1068 apr_off_t modified_end; 1069 svn_boolean_t original_no_final_eol = FALSE; 1070 svn_boolean_t modified_no_final_eol = FALSE; 1071 svn_linenum_t original_lines; 1072 svn_linenum_t modified_lines; 1073 svn_linenum_t leading_context; 1074 svn_linenum_t trailing_context; 1075 svn_boolean_t changed_line_seen; 1076 enum { 1077 noise_line, 1078 original_line, 1079 modified_line, 1080 context_line 1081 } last_line_type; 1082 apr_pool_t *iterpool; 1083 1084 *prop_operation = svn_diff_op_unchanged; 1085 1086 /* We only set this if we have a property hunk header. */ 1087 *prop_name = NULL; 1088 *is_property = FALSE; 1089 1090 if (apr_file_eof(apr_file) == APR_EOF) 1091 { 1092 /* No more hunks here. */ 1093 *hunk = NULL; 1094 return SVN_NO_ERROR; 1095 } 1096 1097 in_hunk = FALSE; 1098 hunk_seen = FALSE; 1099 leading_context = 0; 1100 trailing_context = 0; 1101 changed_line_seen = FALSE; 1102 original_end = 0; 1103 modified_end = 0; 1104 *hunk = apr_pcalloc(result_pool, sizeof(**hunk)); 1105 1106 /* Get current seek position. */ 1107 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool)); 1108 1109 /* Start out assuming noise. */ 1110 last_line_type = noise_line; 1111 1112 iterpool = svn_pool_create(scratch_pool); 1113 do 1114 { 1115 1116 svn_pool_clear(iterpool); 1117 1118 /* Remember the current line's offset, and read the line. */ 1119 last_line = pos; 1120 SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX, 1121 iterpool, iterpool)); 1122 1123 /* Update line offset for next iteration. */ 1124 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool)); 1125 1126 /* Lines starting with a backslash indicate a missing EOL: 1127 * "\ No newline at end of file" or "end of property". */ 1128 if (line->data[0] == '\\') 1129 { 1130 if (in_hunk) 1131 { 1132 char eolbuf[2]; 1133 apr_size_t len; 1134 apr_off_t off; 1135 apr_off_t hunk_text_end; 1136 1137 /* Comment terminates the hunk text and says the hunk text 1138 * has no trailing EOL. Snip off trailing EOL which is part 1139 * of the patch file but not part of the hunk text. */ 1140 off = last_line - 2; 1141 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool)); 1142 len = sizeof(eolbuf); 1143 SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len, 1144 &eof, iterpool)); 1145 if (eolbuf[0] == '\r' && eolbuf[1] == '\n') 1146 hunk_text_end = last_line - 2; 1147 else if (eolbuf[1] == '\n' || eolbuf[1] == '\r') 1148 hunk_text_end = last_line - 1; 1149 else 1150 hunk_text_end = last_line; 1151 1152 if (last_line_type == original_line && original_end == 0) 1153 original_end = hunk_text_end; 1154 else if (last_line_type == modified_line && modified_end == 0) 1155 modified_end = hunk_text_end; 1156 else if (last_line_type == context_line) 1157 { 1158 if (original_end == 0) 1159 original_end = hunk_text_end; 1160 if (modified_end == 0) 1161 modified_end = hunk_text_end; 1162 } 1163 1164 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool)); 1165 /* Set for the type and context by using != the other type */ 1166 if (last_line_type != modified_line) 1167 original_no_final_eol = TRUE; 1168 if (last_line_type != original_line) 1169 modified_no_final_eol = TRUE; 1170 } 1171 1172 continue; 1173 } 1174 1175 if (in_hunk && *is_property && *prop_name && 1176 strcmp(*prop_name, SVN_PROP_MERGEINFO) == 0) 1177 { 1178 svn_boolean_t found_pretty_mergeinfo_line; 1179 1180 if (! hunk_seen) 1181 { 1182 /* We're reading the first line of the hunk, so the start 1183 * of the line just read is the hunk text's byte offset. */ 1184 start = last_line; 1185 } 1186 1187 SVN_ERR(parse_pretty_mergeinfo_line(&found_pretty_mergeinfo_line, 1188 &original_lines, &modified_lines, 1189 line, patch, 1190 result_pool, iterpool)); 1191 if (found_pretty_mergeinfo_line) 1192 { 1193 hunk_seen = TRUE; 1194 (*hunk)->is_pretty_print_mergeinfo = TRUE; 1195 continue; /* Proceed to the next line in the svn:mergeinfo hunk. */ 1196 } 1197 1198 if ((*hunk)->is_pretty_print_mergeinfo) 1199 { 1200 /* We have reached the end of the pretty-print-mergeinfo hunk. 1201 (This format uses only one hunk.) */ 1202 if (eof) 1203 { 1204 /* The hunk ends at EOF. */ 1205 end = pos; 1206 } 1207 else 1208 { 1209 /* The start of the current line marks the first byte 1210 * after the hunk text. */ 1211 end = last_line; 1212 } 1213 original_end = end; 1214 modified_end = end; 1215 break; 1216 } 1217 1218 /* Otherwise, this is a property diff in the 1219 regular format so fall through to normal processing. */ 1220 } 1221 1222 if (in_hunk) 1223 { 1224 char c; 1225 static const char add = '+'; 1226 static const char del = '-'; 1227 1228 if (! hunk_seen) 1229 { 1230 /* We're reading the first line of the hunk, so the start 1231 * of the line just read is the hunk text's byte offset. */ 1232 start = last_line; 1233 } 1234 1235 c = line->data[0]; 1236 if (c == ' ' 1237 || ((original_lines > 0 && modified_lines > 0) 1238 && ( 1239 /* Tolerate chopped leading spaces on empty lines. */ 1240 (! eof && line->len == 0) 1241 /* Maybe tolerate chopped leading spaces on non-empty lines. */ 1242 || (ignore_whitespace && c != del && c != add)))) 1243 { 1244 /* It's a "context" line in the hunk. */ 1245 hunk_seen = TRUE; 1246 if (original_lines > 0) 1247 original_lines--; 1248 else 1249 { 1250 (*hunk)->original_length++; 1251 (*hunk)->original_fuzz++; 1252 } 1253 if (modified_lines > 0) 1254 modified_lines--; 1255 else 1256 { 1257 (*hunk)->modified_length++; 1258 (*hunk)->modified_fuzz++; 1259 } 1260 if (changed_line_seen) 1261 trailing_context++; 1262 else 1263 leading_context++; 1264 last_line_type = context_line; 1265 } 1266 else if (c == del 1267 && (original_lines > 0 || line->data[1] != del)) 1268 { 1269 /* It's a "deleted" line in the hunk. */ 1270 hunk_seen = TRUE; 1271 changed_line_seen = TRUE; 1272 1273 /* A hunk may have context in the middle. We only want 1274 trailing lines of context. */ 1275 if (trailing_context > 0) 1276 trailing_context = 0; 1277 1278 if (original_lines > 0) 1279 original_lines--; 1280 else 1281 { 1282 (*hunk)->original_length++; 1283 (*hunk)->original_fuzz++; 1284 } 1285 last_line_type = original_line; 1286 } 1287 else if (c == add 1288 && (modified_lines > 0 || line->data[1] != add)) 1289 { 1290 /* It's an "added" line in the hunk. */ 1291 hunk_seen = TRUE; 1292 changed_line_seen = TRUE; 1293 1294 /* A hunk may have context in the middle. We only want 1295 trailing lines of context. */ 1296 if (trailing_context > 0) 1297 trailing_context = 0; 1298 1299 if (modified_lines > 0) 1300 modified_lines--; 1301 else 1302 { 1303 (*hunk)->modified_length++; 1304 (*hunk)->modified_fuzz++; 1305 } 1306 last_line_type = modified_line; 1307 } 1308 else 1309 { 1310 if (eof) 1311 { 1312 /* The hunk ends at EOF. */ 1313 end = pos; 1314 } 1315 else 1316 { 1317 /* The start of the current line marks the first byte 1318 * after the hunk text. */ 1319 end = last_line; 1320 } 1321 if (original_end == 0) 1322 original_end = end; 1323 if (modified_end == 0) 1324 modified_end = end; 1325 break; /* Hunk was empty or has been read. */ 1326 } 1327 } 1328 else 1329 { 1330 if (starts_with(line->data, text_atat)) 1331 { 1332 /* Looks like we have a hunk header, try to rip it apart. */ 1333 in_hunk = parse_hunk_header(line->data, *hunk, text_atat, 1334 iterpool); 1335 if (in_hunk) 1336 { 1337 original_lines = (*hunk)->original_length; 1338 modified_lines = (*hunk)->modified_length; 1339 *is_property = FALSE; 1340 } 1341 } 1342 else if (starts_with(line->data, prop_atat)) 1343 { 1344 /* Looks like we have a property hunk header, try to rip it 1345 * apart. */ 1346 in_hunk = parse_hunk_header(line->data, *hunk, prop_atat, 1347 iterpool); 1348 if (in_hunk) 1349 { 1350 original_lines = (*hunk)->original_length; 1351 modified_lines = (*hunk)->modified_length; 1352 *is_property = TRUE; 1353 } 1354 } 1355 else if (starts_with(line->data, "Added: ")) 1356 { 1357 SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ", 1358 result_pool)); 1359 if (*prop_name) 1360 *prop_operation = (patch->reverse ? svn_diff_op_deleted 1361 : svn_diff_op_added); 1362 } 1363 else if (starts_with(line->data, "Deleted: ")) 1364 { 1365 SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ", 1366 result_pool)); 1367 if (*prop_name) 1368 *prop_operation = (patch->reverse ? svn_diff_op_added 1369 : svn_diff_op_deleted); 1370 } 1371 else if (starts_with(line->data, "Modified: ")) 1372 { 1373 SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ", 1374 result_pool)); 1375 if (*prop_name) 1376 *prop_operation = svn_diff_op_modified; 1377 } 1378 else if (starts_with(line->data, minus) 1379 || starts_with(line->data, "diff --git ")) 1380 /* This could be a header of another patch. Bail out. */ 1381 break; 1382 } 1383 } 1384 /* Check for the line length since a file may not have a newline at the 1385 * end and we depend upon the last line to be an empty one. */ 1386 while (! eof || line->len > 0); 1387 svn_pool_destroy(iterpool); 1388 1389 if (! eof) 1390 /* Rewind to the start of the line just read, so subsequent calls 1391 * to this function or svn_diff_parse_next_patch() don't end 1392 * up skipping the line -- it may contain a patch or hunk header. */ 1393 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool)); 1394 1395 if (hunk_seen && start < end) 1396 { 1397 /* Did we get the number of context lines announced in the header? 1398 1399 If not... let's limit the number from the header to what we 1400 actually have, and apply a fuzz penalty */ 1401 if (original_lines) 1402 { 1403 (*hunk)->original_length -= original_lines; 1404 (*hunk)->original_fuzz += original_lines; 1405 } 1406 if (modified_lines) 1407 { 1408 (*hunk)->modified_length -= modified_lines; 1409 (*hunk)->modified_fuzz += modified_lines; 1410 } 1411 1412 (*hunk)->patch = patch; 1413 (*hunk)->apr_file = apr_file; 1414 (*hunk)->leading_context = leading_context; 1415 (*hunk)->trailing_context = trailing_context; 1416 (*hunk)->diff_text_range.start = start; 1417 (*hunk)->diff_text_range.current = start; 1418 (*hunk)->diff_text_range.end = end; 1419 (*hunk)->original_text_range.start = start; 1420 (*hunk)->original_text_range.current = start; 1421 (*hunk)->original_text_range.end = original_end; 1422 (*hunk)->modified_text_range.start = start; 1423 (*hunk)->modified_text_range.current = start; 1424 (*hunk)->modified_text_range.end = modified_end; 1425 (*hunk)->original_no_final_eol = original_no_final_eol; 1426 (*hunk)->modified_no_final_eol = modified_no_final_eol; 1427 } 1428 else 1429 /* Something went wrong, just discard the result. */ 1430 *hunk = NULL; 1431 1432 return SVN_NO_ERROR; 1433} 1434 1435/* Compare function for sorting hunks after parsing. 1436 * We sort hunks by their original line offset. */ 1437static int 1438compare_hunks(const void *a, const void *b) 1439{ 1440 const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a); 1441 const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b); 1442 1443 if (ha->original_start < hb->original_start) 1444 return -1; 1445 if (ha->original_start > hb->original_start) 1446 return 1; 1447 return 0; 1448} 1449 1450/* Possible states of the diff header parser. */ 1451enum parse_state 1452{ 1453 state_start, /* initial */ 1454 state_git_diff_seen, /* diff --git */ 1455 state_git_tree_seen, /* a tree operation, rather than content change */ 1456 state_git_minus_seen, /* --- /dev/null; or --- a/ */ 1457 state_git_plus_seen, /* +++ /dev/null; or +++ a/ */ 1458 state_old_mode_seen, /* old mode 100644 */ 1459 state_git_mode_seen, /* new mode 100644 */ 1460 state_move_from_seen, /* rename from foo.c */ 1461 state_copy_from_seen, /* copy from foo.c */ 1462 state_minus_seen, /* --- foo.c */ 1463 state_unidiff_found, /* valid start of a regular unidiff header */ 1464 state_git_header_found, /* valid start of a --git diff header */ 1465 state_binary_patch_found /* valid start of binary patch */ 1466}; 1467 1468/* Data type describing a valid state transition of the parser. */ 1469struct transition 1470{ 1471 const char *expected_input; 1472 enum parse_state required_state; 1473 1474 /* A callback called upon each parser state transition. */ 1475 svn_error_t *(*fn)(enum parse_state *new_state, char *input, 1476 svn_patch_t *patch, apr_pool_t *result_pool, 1477 apr_pool_t *scratch_pool); 1478}; 1479 1480/* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */ 1481static svn_error_t * 1482grab_filename(const char **file_name, const char *line, apr_pool_t *result_pool, 1483 apr_pool_t *scratch_pool) 1484{ 1485 const char *utf8_path; 1486 const char *canon_path; 1487 1488 /* Grab the filename and encode it in UTF-8. */ 1489 /* TODO: Allow specifying the patch file's encoding. 1490 * For now, we assume its encoding is native. */ 1491 /* ### This can fail if the filename cannot be represented in the current 1492 * ### locale's encoding. */ 1493 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path, 1494 line, 1495 scratch_pool)); 1496 1497 /* Canonicalize the path name. */ 1498 canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool); 1499 1500 *file_name = apr_pstrdup(result_pool, canon_path); 1501 1502 return SVN_NO_ERROR; 1503} 1504 1505/* Parse the '--- ' line of a regular unidiff. */ 1506static svn_error_t * 1507diff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch, 1508 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1509{ 1510 /* If we can find a tab, it separates the filename from 1511 * the rest of the line which we can discard. */ 1512 char *tab = strchr(line, '\t'); 1513 if (tab) 1514 *tab = '\0'; 1515 1516 SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "), 1517 result_pool, scratch_pool)); 1518 1519 *new_state = state_minus_seen; 1520 1521 return SVN_NO_ERROR; 1522} 1523 1524/* Parse the '+++ ' line of a regular unidiff. */ 1525static svn_error_t * 1526diff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch, 1527 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1528{ 1529 /* If we can find a tab, it separates the filename from 1530 * the rest of the line which we can discard. */ 1531 char *tab = strchr(line, '\t'); 1532 if (tab) 1533 *tab = '\0'; 1534 1535 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "), 1536 result_pool, scratch_pool)); 1537 1538 *new_state = state_unidiff_found; 1539 1540 return SVN_NO_ERROR; 1541} 1542 1543/* Parse the first line of a git extended unidiff. */ 1544static svn_error_t * 1545git_start(enum parse_state *new_state, char *line, svn_patch_t *patch, 1546 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1547{ 1548 const char *old_path_start; 1549 char *old_path_end; 1550 const char *new_path_start; 1551 const char *new_path_end; 1552 char *new_path_marker; 1553 const char *old_path_marker; 1554 1555 /* ### Add handling of escaped paths 1556 * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html: 1557 * 1558 * TAB, LF, double quote and backslash characters in pathnames are 1559 * represented as \t, \n, \" and \\, respectively. If there is need for 1560 * such substitution then the whole pathname is put in double quotes. 1561 */ 1562 1563 /* Our line should look like this: 'diff --git a/path b/path'. 1564 * 1565 * If we find any deviations from that format, we return with state reset 1566 * to start. 1567 */ 1568 old_path_marker = strstr(line, " a/"); 1569 1570 if (! old_path_marker) 1571 { 1572 *new_state = state_start; 1573 return SVN_NO_ERROR; 1574 } 1575 1576 if (! *(old_path_marker + 3)) 1577 { 1578 *new_state = state_start; 1579 return SVN_NO_ERROR; 1580 } 1581 1582 new_path_marker = strstr(old_path_marker, " b/"); 1583 1584 if (! new_path_marker) 1585 { 1586 *new_state = state_start; 1587 return SVN_NO_ERROR; 1588 } 1589 1590 if (! *(new_path_marker + 3)) 1591 { 1592 *new_state = state_start; 1593 return SVN_NO_ERROR; 1594 } 1595 1596 /* By now, we know that we have a line on the form '--git diff a/.+ b/.+' 1597 * We only need the filenames when we have deleted or added empty 1598 * files. In those cases the old_path and new_path is identical on the 1599 * 'diff --git' line. For all other cases we fetch the filenames from 1600 * other header lines. */ 1601 old_path_start = line + STRLEN_LITERAL("diff --git a/"); 1602 new_path_end = line + strlen(line); 1603 new_path_start = old_path_start; 1604 1605 while (TRUE) 1606 { 1607 ptrdiff_t len_old; 1608 ptrdiff_t len_new; 1609 1610 new_path_marker = strstr(new_path_start, " b/"); 1611 1612 /* No new path marker, bail out. */ 1613 if (! new_path_marker) 1614 break; 1615 1616 old_path_end = new_path_marker; 1617 new_path_start = new_path_marker + STRLEN_LITERAL(" b/"); 1618 1619 /* No path after the marker. */ 1620 if (! *new_path_start) 1621 break; 1622 1623 len_old = old_path_end - old_path_start; 1624 len_new = new_path_end - new_path_start; 1625 1626 /* Are the paths before and after the " b/" marker the same? */ 1627 if (len_old == len_new 1628 && ! strncmp(old_path_start, new_path_start, len_old)) 1629 { 1630 *old_path_end = '\0'; 1631 SVN_ERR(grab_filename(&patch->old_filename, old_path_start, 1632 result_pool, scratch_pool)); 1633 1634 SVN_ERR(grab_filename(&patch->new_filename, new_path_start, 1635 result_pool, scratch_pool)); 1636 break; 1637 } 1638 } 1639 1640 /* We assume that the path is only modified until we've found a 'tree' 1641 * header */ 1642 patch->operation = svn_diff_op_modified; 1643 1644 *new_state = state_git_diff_seen; 1645 return SVN_NO_ERROR; 1646} 1647 1648/* Parse the '--- ' line of a git extended unidiff. */ 1649static svn_error_t * 1650git_minus(enum parse_state *new_state, char *line, svn_patch_t *patch, 1651 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1652{ 1653 /* If we can find a tab, it separates the filename from 1654 * the rest of the line which we can discard. */ 1655 char *tab = strchr(line, '\t'); 1656 if (tab) 1657 *tab = '\0'; 1658 1659 if (starts_with(line, "--- /dev/null")) 1660 SVN_ERR(grab_filename(&patch->old_filename, "/dev/null", 1661 result_pool, scratch_pool)); 1662 else 1663 SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"), 1664 result_pool, scratch_pool)); 1665 1666 *new_state = state_git_minus_seen; 1667 return SVN_NO_ERROR; 1668} 1669 1670/* Parse the '+++ ' line of a git extended unidiff. */ 1671static svn_error_t * 1672git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch, 1673 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1674{ 1675 /* If we can find a tab, it separates the filename from 1676 * the rest of the line which we can discard. */ 1677 char *tab = strchr(line, '\t'); 1678 if (tab) 1679 *tab = '\0'; 1680 1681 if (starts_with(line, "+++ /dev/null")) 1682 SVN_ERR(grab_filename(&patch->new_filename, "/dev/null", 1683 result_pool, scratch_pool)); 1684 else 1685 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"), 1686 result_pool, scratch_pool)); 1687 1688 *new_state = state_git_header_found; 1689 return SVN_NO_ERROR; 1690} 1691 1692/* Helper for git_old_mode() and git_new_mode(). Translate the git 1693 * file mode MODE_STR into a binary "executable?" and "symlink?" state. */ 1694static svn_error_t * 1695parse_git_mode_bits(svn_tristate_t *executable_p, 1696 svn_tristate_t *symlink_p, 1697 const char *mode_str) 1698{ 1699 apr_uint64_t mode; 1700 SVN_ERR(svn_cstring_strtoui64(&mode, mode_str, 1701 0 /* min */, 1702 0777777 /* max: six octal digits */, 1703 010 /* radix (octal) */)); 1704 1705 /* Note: 0644 and 0755 are the only modes that can occur for plain files. 1706 * We deliberately choose to parse only those values: we are strict in what 1707 * we accept _and_ in what we produce. 1708 * 1709 * (Having said that, though, we could consider relaxing the parser to also 1710 * map 1711 * (mode & 0111) == 0000 -> svn_tristate_false 1712 * (mode & 0111) == 0111 -> svn_tristate_true 1713 * [anything else] -> svn_tristate_unknown 1714 * .) 1715 */ 1716 1717 switch (mode & 0777) 1718 { 1719 case 0644: 1720 *executable_p = svn_tristate_false; 1721 break; 1722 1723 case 0755: 1724 *executable_p = svn_tristate_true; 1725 break; 1726 1727 default: 1728 /* Ignore unknown values. */ 1729 *executable_p = svn_tristate_unknown; 1730 break; 1731 } 1732 1733 switch (mode & 0170000 /* S_IFMT */) 1734 { 1735 case 0120000: /* S_IFLNK */ 1736 *symlink_p = svn_tristate_true; 1737 break; 1738 1739 case 0100000: /* S_IFREG */ 1740 case 0040000: /* S_IFDIR */ 1741 *symlink_p = svn_tristate_false; 1742 break; 1743 1744 default: 1745 /* Ignore unknown values. 1746 (Including those generated by Subversion <= 1.9) */ 1747 *symlink_p = svn_tristate_unknown; 1748 break; 1749 } 1750 1751 return SVN_NO_ERROR; 1752} 1753 1754/* Parse the 'old mode ' line of a git extended unidiff. */ 1755static svn_error_t * 1756git_old_mode(enum parse_state *new_state, char *line, svn_patch_t *patch, 1757 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1758{ 1759 SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit, 1760 &patch->old_symlink_bit, 1761 line + STRLEN_LITERAL("old mode "))); 1762 1763#ifdef SVN_DEBUG 1764 /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */ 1765 SVN_ERR_ASSERT(patch->old_executable_bit != svn_tristate_unknown); 1766#endif 1767 1768 *new_state = state_old_mode_seen; 1769 return SVN_NO_ERROR; 1770} 1771 1772/* Parse the 'new mode ' line of a git extended unidiff. */ 1773static svn_error_t * 1774git_new_mode(enum parse_state *new_state, char *line, svn_patch_t *patch, 1775 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1776{ 1777 SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit, 1778 &patch->new_symlink_bit, 1779 line + STRLEN_LITERAL("new mode "))); 1780 1781#ifdef SVN_DEBUG 1782 /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */ 1783 SVN_ERR_ASSERT(patch->new_executable_bit != svn_tristate_unknown); 1784#endif 1785 1786 /* Don't touch patch->operation. */ 1787 1788 *new_state = state_git_mode_seen; 1789 return SVN_NO_ERROR; 1790} 1791 1792static svn_error_t * 1793git_index(enum parse_state *new_state, char *line, svn_patch_t *patch, 1794 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1795{ 1796 /* We either have something like "index 33e5b38..0000000" (which we just 1797 ignore as we are not interested in git specific shas) or something like 1798 "index 33e5b38..0000000 120000" which tells us the mode, that isn't 1799 changed by applying this patch. 1800 1801 If the mode would have changed then we would see 'old mode' and 'new mode' 1802 lines. 1803 */ 1804 line = strchr(line + STRLEN_LITERAL("index "), ' '); 1805 1806 if (line && patch->new_executable_bit == svn_tristate_unknown 1807 && patch->new_symlink_bit == svn_tristate_unknown 1808 && patch->operation != svn_diff_op_added 1809 && patch->operation != svn_diff_op_deleted) 1810 { 1811 SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit, 1812 &patch->new_symlink_bit, 1813 line + 1)); 1814 1815 /* There is no change.. so set the old values to the new values */ 1816 patch->old_executable_bit = patch->new_executable_bit; 1817 patch->old_symlink_bit = patch->new_symlink_bit; 1818 } 1819 1820 /* This function doesn't change the state! */ 1821 /* *new_state = *new_state */ 1822 return SVN_NO_ERROR; 1823} 1824 1825/* Parse the 'rename from ' line of a git extended unidiff. */ 1826static svn_error_t * 1827git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch, 1828 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1829{ 1830 SVN_ERR(grab_filename(&patch->old_filename, 1831 line + STRLEN_LITERAL("rename from "), 1832 result_pool, scratch_pool)); 1833 1834 *new_state = state_move_from_seen; 1835 return SVN_NO_ERROR; 1836} 1837 1838/* Parse the 'rename to ' line of a git extended unidiff. */ 1839static svn_error_t * 1840git_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch, 1841 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1842{ 1843 SVN_ERR(grab_filename(&patch->new_filename, 1844 line + STRLEN_LITERAL("rename to "), 1845 result_pool, scratch_pool)); 1846 1847 patch->operation = svn_diff_op_moved; 1848 1849 *new_state = state_git_tree_seen; 1850 return SVN_NO_ERROR; 1851} 1852 1853/* Parse the 'copy from ' line of a git extended unidiff. */ 1854static svn_error_t * 1855git_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch, 1856 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1857{ 1858 SVN_ERR(grab_filename(&patch->old_filename, 1859 line + STRLEN_LITERAL("copy from "), 1860 result_pool, scratch_pool)); 1861 1862 *new_state = state_copy_from_seen; 1863 return SVN_NO_ERROR; 1864} 1865 1866/* Parse the 'copy to ' line of a git extended unidiff. */ 1867static svn_error_t * 1868git_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch, 1869 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1870{ 1871 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "), 1872 result_pool, scratch_pool)); 1873 1874 patch->operation = svn_diff_op_copied; 1875 1876 *new_state = state_git_tree_seen; 1877 return SVN_NO_ERROR; 1878} 1879 1880/* Parse the 'new file ' line of a git extended unidiff. */ 1881static svn_error_t * 1882git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch, 1883 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1884{ 1885 SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit, 1886 &patch->new_symlink_bit, 1887 line + STRLEN_LITERAL("new file mode "))); 1888 1889 patch->operation = svn_diff_op_added; 1890 1891 /* Filename already retrieved from diff --git header. */ 1892 1893 *new_state = state_git_tree_seen; 1894 return SVN_NO_ERROR; 1895} 1896 1897/* Parse the 'deleted file ' line of a git extended unidiff. */ 1898static svn_error_t * 1899git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch, 1900 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1901{ 1902 SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit, 1903 &patch->old_symlink_bit, 1904 line + STRLEN_LITERAL("deleted file mode "))); 1905 1906 patch->operation = svn_diff_op_deleted; 1907 1908 /* Filename already retrieved from diff --git header. */ 1909 1910 *new_state = state_git_tree_seen; 1911 return SVN_NO_ERROR; 1912} 1913 1914/* Parse the 'GIT binary patch' header */ 1915static svn_error_t * 1916binary_patch_start(enum parse_state *new_state, char *line, svn_patch_t *patch, 1917 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1918{ 1919 *new_state = state_binary_patch_found; 1920 return SVN_NO_ERROR; 1921} 1922 1923 1924/* Add a HUNK associated with the property PROP_NAME to PATCH. */ 1925static svn_error_t * 1926add_property_hunk(svn_patch_t *patch, const char *prop_name, 1927 svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation, 1928 apr_pool_t *result_pool) 1929{ 1930 svn_prop_patch_t *prop_patch; 1931 1932 prop_patch = svn_hash_gets(patch->prop_patches, prop_name); 1933 1934 if (! prop_patch) 1935 { 1936 prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t)); 1937 prop_patch->name = prop_name; 1938 prop_patch->operation = operation; 1939 prop_patch->hunks = apr_array_make(result_pool, 1, 1940 sizeof(svn_diff_hunk_t *)); 1941 1942 svn_hash_sets(patch->prop_patches, prop_name, prop_patch); 1943 } 1944 1945 APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk; 1946 1947 return SVN_NO_ERROR; 1948} 1949 1950struct svn_patch_file_t 1951{ 1952 /* The APR file handle to the patch file. */ 1953 apr_file_t *apr_file; 1954 1955 /* The file offset at which the next patch is expected. */ 1956 apr_off_t next_patch_offset; 1957}; 1958 1959svn_error_t * 1960svn_diff_open_patch_file(svn_patch_file_t **patch_file, 1961 const char *local_abspath, 1962 apr_pool_t *result_pool) 1963{ 1964 svn_patch_file_t *p; 1965 1966 p = apr_palloc(result_pool, sizeof(*p)); 1967 SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath, 1968 APR_READ | APR_BUFFERED, APR_OS_DEFAULT, 1969 result_pool)); 1970 p->next_patch_offset = 0; 1971 *patch_file = p; 1972 1973 return SVN_NO_ERROR; 1974} 1975 1976/* Parse hunks from APR_FILE and store them in PATCH->HUNKS. 1977 * Parsing stops if no valid next hunk can be found. 1978 * If IGNORE_WHITESPACE is TRUE, lines without 1979 * leading spaces will be treated as context lines. 1980 * Allocate results in RESULT_POOL. 1981 * Use SCRATCH_POOL for temporary allocations. */ 1982static svn_error_t * 1983parse_hunks(svn_patch_t *patch, apr_file_t *apr_file, 1984 svn_boolean_t ignore_whitespace, 1985 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1986{ 1987 svn_diff_hunk_t *hunk; 1988 svn_boolean_t is_property; 1989 const char *last_prop_name; 1990 const char *prop_name; 1991 svn_diff_operation_kind_t prop_operation; 1992 apr_pool_t *iterpool; 1993 1994 last_prop_name = NULL; 1995 1996 patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *)); 1997 patch->prop_patches = apr_hash_make(result_pool); 1998 iterpool = svn_pool_create(scratch_pool); 1999 do 2000 { 2001 svn_pool_clear(iterpool); 2002 2003 SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation, 2004 patch, apr_file, ignore_whitespace, result_pool, 2005 iterpool)); 2006 2007 if (hunk && is_property) 2008 { 2009 if (! prop_name) 2010 prop_name = last_prop_name; 2011 else 2012 last_prop_name = prop_name; 2013 2014 /* Skip pretty-printed svn:mergeinfo property hunks. 2015 * Pretty-printed mergeinfo data cannot be represented as a hunk and 2016 * is therefore stored in PATCH itself. */ 2017 if (hunk->is_pretty_print_mergeinfo) 2018 continue; 2019 2020 SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation, 2021 result_pool)); 2022 } 2023 else if (hunk) 2024 { 2025 APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk; 2026 last_prop_name = NULL; 2027 } 2028 2029 } 2030 while (hunk); 2031 svn_pool_destroy(iterpool); 2032 2033 return SVN_NO_ERROR; 2034} 2035 2036static svn_error_t * 2037parse_binary_patch(svn_patch_t *patch, apr_file_t *apr_file, 2038 svn_boolean_t reverse, 2039 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 2040{ 2041 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 2042 apr_off_t pos, last_line; 2043 svn_stringbuf_t *line; 2044 svn_boolean_t eof = FALSE; 2045 svn_diff_binary_patch_t *bpatch = apr_pcalloc(result_pool, sizeof(*bpatch)); 2046 svn_boolean_t in_blob = FALSE; 2047 svn_boolean_t in_src = FALSE; 2048 2049 bpatch->apr_file = apr_file; 2050 2051 patch->prop_patches = apr_hash_make(result_pool); 2052 2053 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool)); 2054 2055 while (!eof) 2056 { 2057 last_line = pos; 2058 SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX, 2059 iterpool, iterpool)); 2060 2061 /* Update line offset for next iteration. */ 2062 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool)); 2063 2064 if (in_blob) 2065 { 2066 char c = line->data[0]; 2067 2068 /* 66 = len byte + (52/4*5) chars */ 2069 if (((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) 2070 && line->len <= 66 2071 && !strchr(line->data, ':') 2072 && !strchr(line->data, ' ')) 2073 { 2074 /* One more blop line */ 2075 if (in_src) 2076 bpatch->src_end = pos; 2077 else 2078 bpatch->dst_end = pos; 2079 } 2080 else if (svn_stringbuf_first_non_whitespace(line) < line->len 2081 && !(in_src && bpatch->src_start < last_line)) 2082 { 2083 break; /* Bad patch */ 2084 } 2085 else if (in_src) 2086 { 2087 patch->binary_patch = bpatch; /* SUCCESS! */ 2088 break; 2089 } 2090 else 2091 { 2092 in_blob = FALSE; 2093 in_src = TRUE; 2094 } 2095 } 2096 else if (starts_with(line->data, "literal ")) 2097 { 2098 apr_uint64_t expanded_size; 2099 svn_error_t *err = svn_cstring_strtoui64(&expanded_size, 2100 &line->data[8], 2101 0, APR_UINT64_MAX, 10); 2102 2103 if (err) 2104 { 2105 svn_error_clear(err); 2106 break; 2107 } 2108 2109 if (in_src) 2110 { 2111 bpatch->src_start = pos; 2112 bpatch->src_filesize = expanded_size; 2113 } 2114 else 2115 { 2116 bpatch->dst_start = pos; 2117 bpatch->dst_filesize = expanded_size; 2118 } 2119 in_blob = TRUE; 2120 } 2121 else 2122 break; /* We don't support GIT deltas (yet) */ 2123 } 2124 svn_pool_destroy(iterpool); 2125 2126 if (!eof) 2127 /* Rewind to the start of the line just read, so subsequent calls 2128 * don't end up skipping the line. It may contain a patch or hunk header.*/ 2129 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool)); 2130 else if (in_src 2131 && ((bpatch->src_end > bpatch->src_start) || !bpatch->src_filesize)) 2132 { 2133 patch->binary_patch = bpatch; /* SUCCESS */ 2134 } 2135 2136 /* Reverse patch if requested */ 2137 if (reverse && patch->binary_patch) 2138 { 2139 apr_off_t tmp_start = bpatch->src_start; 2140 apr_off_t tmp_end = bpatch->src_end; 2141 svn_filesize_t tmp_filesize = bpatch->src_filesize; 2142 2143 bpatch->src_start = bpatch->dst_start; 2144 bpatch->src_end = bpatch->dst_end; 2145 bpatch->src_filesize = bpatch->dst_filesize; 2146 2147 bpatch->dst_start = tmp_start; 2148 bpatch->dst_end = tmp_end; 2149 bpatch->dst_filesize = tmp_filesize; 2150 } 2151 2152 return SVN_NO_ERROR; 2153} 2154 2155/* State machine for the diff header parser. 2156 * Expected Input Required state Function to call */ 2157static struct transition transitions[] = 2158{ 2159 {"--- ", state_start, diff_minus}, 2160 {"+++ ", state_minus_seen, diff_plus}, 2161 2162 {"diff --git", state_start, git_start}, 2163 {"--- a/", state_git_diff_seen, git_minus}, 2164 {"--- a/", state_git_mode_seen, git_minus}, 2165 {"--- a/", state_git_tree_seen, git_minus}, 2166 {"--- /dev/null", state_git_mode_seen, git_minus}, 2167 {"--- /dev/null", state_git_tree_seen, git_minus}, 2168 {"+++ b/", state_git_minus_seen, git_plus}, 2169 {"+++ /dev/null", state_git_minus_seen, git_plus}, 2170 2171 {"old mode ", state_git_diff_seen, git_old_mode}, 2172 {"new mode ", state_old_mode_seen, git_new_mode}, 2173 2174 {"rename from ", state_git_diff_seen, git_move_from}, 2175 {"rename from ", state_git_mode_seen, git_move_from}, 2176 {"rename to ", state_move_from_seen, git_move_to}, 2177 2178 {"copy from ", state_git_diff_seen, git_copy_from}, 2179 {"copy from ", state_git_mode_seen, git_copy_from}, 2180 {"copy to ", state_copy_from_seen, git_copy_to}, 2181 2182 {"new file ", state_git_diff_seen, git_new_file}, 2183 2184 {"deleted file ", state_git_diff_seen, git_deleted_file}, 2185 2186 {"index ", state_git_diff_seen, git_index}, 2187 {"index ", state_git_tree_seen, git_index}, 2188 {"index ", state_git_mode_seen, git_index}, 2189 2190 {"GIT binary patch", state_git_diff_seen, binary_patch_start}, 2191 {"GIT binary patch", state_git_tree_seen, binary_patch_start}, 2192 {"GIT binary patch", state_git_mode_seen, binary_patch_start}, 2193}; 2194 2195svn_error_t * 2196svn_diff_parse_next_patch(svn_patch_t **patch_p, 2197 svn_patch_file_t *patch_file, 2198 svn_boolean_t reverse, 2199 svn_boolean_t ignore_whitespace, 2200 apr_pool_t *result_pool, 2201 apr_pool_t *scratch_pool) 2202{ 2203 apr_off_t pos, last_line; 2204 svn_boolean_t eof; 2205 svn_boolean_t line_after_tree_header_read = FALSE; 2206 apr_pool_t *iterpool; 2207 svn_patch_t *patch; 2208 enum parse_state state = state_start; 2209 2210 if (apr_file_eof(patch_file->apr_file) == APR_EOF) 2211 { 2212 /* No more patches here. */ 2213 *patch_p = NULL; 2214 return SVN_NO_ERROR; 2215 } 2216 2217 patch = apr_pcalloc(result_pool, sizeof(*patch)); 2218 patch->old_executable_bit = svn_tristate_unknown; 2219 patch->new_executable_bit = svn_tristate_unknown; 2220 patch->old_symlink_bit = svn_tristate_unknown; 2221 patch->new_symlink_bit = svn_tristate_unknown; 2222 2223 pos = patch_file->next_patch_offset; 2224 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool)); 2225 2226 iterpool = svn_pool_create(scratch_pool); 2227 do 2228 { 2229 svn_stringbuf_t *line; 2230 svn_boolean_t valid_header_line = FALSE; 2231 int i; 2232 2233 svn_pool_clear(iterpool); 2234 2235 /* Remember the current line's offset, and read the line. */ 2236 last_line = pos; 2237 SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof, 2238 APR_SIZE_MAX, iterpool, iterpool)); 2239 2240 if (! eof) 2241 { 2242 /* Update line offset for next iteration. */ 2243 SVN_ERR(svn_io_file_get_offset(&pos, patch_file->apr_file, 2244 iterpool)); 2245 } 2246 2247 /* Run the state machine. */ 2248 for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++) 2249 { 2250 if (starts_with(line->data, transitions[i].expected_input) 2251 && state == transitions[i].required_state) 2252 { 2253 SVN_ERR(transitions[i].fn(&state, line->data, patch, 2254 result_pool, iterpool)); 2255 valid_header_line = TRUE; 2256 break; 2257 } 2258 } 2259 2260 if (state == state_unidiff_found 2261 || state == state_git_header_found 2262 || state == state_binary_patch_found) 2263 { 2264 /* We have a valid diff header, yay! */ 2265 break; 2266 } 2267 else if ((state == state_git_tree_seen || state == state_git_mode_seen) 2268 && line_after_tree_header_read 2269 && !valid_header_line) 2270 { 2271 /* We have a valid diff header for a patch with only tree changes. 2272 * Rewind to the start of the line just read, so subsequent calls 2273 * to this function don't end up skipping the line -- it may 2274 * contain a patch. */ 2275 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line, 2276 scratch_pool)); 2277 break; 2278 } 2279 else if (state == state_git_tree_seen 2280 || state == state_git_mode_seen) 2281 { 2282 line_after_tree_header_read = TRUE; 2283 } 2284 else if (! valid_header_line && state != state_start 2285 && state != state_git_diff_seen) 2286 { 2287 /* We've encountered an invalid diff header. 2288 * 2289 * Rewind to the start of the line just read - it may be a new 2290 * header that begins there. */ 2291 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line, 2292 scratch_pool)); 2293 state = state_start; 2294 } 2295 2296 } 2297 while (! eof); 2298 2299 patch->reverse = reverse; 2300 if (reverse) 2301 { 2302 const char *temp; 2303 svn_tristate_t ts_tmp; 2304 2305 temp = patch->old_filename; 2306 patch->old_filename = patch->new_filename; 2307 patch->new_filename = temp; 2308 2309 switch (patch->operation) 2310 { 2311 case svn_diff_op_added: 2312 patch->operation = svn_diff_op_deleted; 2313 break; 2314 case svn_diff_op_deleted: 2315 patch->operation = svn_diff_op_added; 2316 break; 2317 2318 case svn_diff_op_modified: 2319 break; /* Stays modified. */ 2320 2321 case svn_diff_op_copied: 2322 case svn_diff_op_moved: 2323 break; /* Stays copied or moved, just in the other direction. */ 2324 case svn_diff_op_unchanged: 2325 break; /* Stays unchanged, of course. */ 2326 } 2327 2328 ts_tmp = patch->old_executable_bit; 2329 patch->old_executable_bit = patch->new_executable_bit; 2330 patch->new_executable_bit = ts_tmp; 2331 2332 ts_tmp = patch->old_symlink_bit; 2333 patch->old_symlink_bit = patch->new_symlink_bit; 2334 patch->new_symlink_bit = ts_tmp; 2335 } 2336 2337 if (patch->old_filename == NULL || patch->new_filename == NULL) 2338 { 2339 /* Something went wrong, just discard the result. */ 2340 patch = NULL; 2341 } 2342 else 2343 { 2344 if (state == state_binary_patch_found) 2345 { 2346 SVN_ERR(parse_binary_patch(patch, patch_file->apr_file, reverse, 2347 result_pool, iterpool)); 2348 /* And fall through in property parsing */ 2349 } 2350 2351 SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace, 2352 result_pool, iterpool)); 2353 } 2354 2355 svn_pool_destroy(iterpool); 2356 2357 SVN_ERR(svn_io_file_get_offset(&patch_file->next_patch_offset, 2358 patch_file->apr_file, scratch_pool)); 2359 2360 if (patch && patch->hunks) 2361 { 2362 /* Usually, hunks appear in the patch sorted by their original line 2363 * offset. But just in case they weren't parsed in this order for 2364 * some reason, we sort them so that our caller can assume that hunks 2365 * are sorted as if parsed from a usual patch. */ 2366 svn_sort__array(patch->hunks, compare_hunks); 2367 } 2368 2369 *patch_p = patch; 2370 return SVN_NO_ERROR; 2371} 2372 2373svn_error_t * 2374svn_diff_close_patch_file(svn_patch_file_t *patch_file, 2375 apr_pool_t *scratch_pool) 2376{ 2377 return svn_error_trace(svn_io_file_close(patch_file->apr_file, 2378 scratch_pool)); 2379} 2380