diff_file.c revision 299742
1/* 2 * diff_file.c : routines for doing diffs on files 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25#include <apr.h> 26#include <apr_pools.h> 27#include <apr_general.h> 28#include <apr_file_io.h> 29#include <apr_file_info.h> 30#include <apr_time.h> 31#include <apr_mmap.h> 32#include <apr_getopt.h> 33 34#include <assert.h> 35 36#include "svn_error.h" 37#include "svn_diff.h" 38#include "svn_types.h" 39#include "svn_string.h" 40#include "svn_subst.h" 41#include "svn_io.h" 42#include "svn_utf.h" 43#include "svn_pools.h" 44#include "diff.h" 45#include "svn_private_config.h" 46#include "svn_path.h" 47#include "svn_ctype.h" 48 49#include "private/svn_utf_private.h" 50#include "private/svn_eol_private.h" 51#include "private/svn_dep_compat.h" 52#include "private/svn_adler32.h" 53#include "private/svn_diff_private.h" 54 55/* A token, i.e. a line read from a file. */ 56typedef struct svn_diff__file_token_t 57{ 58 /* Next token in free list. */ 59 struct svn_diff__file_token_t *next; 60 svn_diff_datasource_e datasource; 61 /* Offset in the datasource. */ 62 apr_off_t offset; 63 /* Offset of the normalized token (may skip leading whitespace) */ 64 apr_off_t norm_offset; 65 /* Total length - before normalization. */ 66 apr_off_t raw_length; 67 /* Total length - after normalization. */ 68 apr_off_t length; 69} svn_diff__file_token_t; 70 71 72typedef struct svn_diff__file_baton_t 73{ 74 const svn_diff_file_options_t *options; 75 76 struct file_info { 77 const char *path; /* path to this file, absolute or relative to CWD */ 78 79 /* All the following fields are active while this datasource is open */ 80 apr_file_t *file; /* handle of this file */ 81 apr_off_t size; /* total raw size in bytes of this file */ 82 83 /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */ 84 int chunk; /* the current chunk number, zero-based */ 85 char *buffer; /* a buffer containing the current chunk */ 86 char *curp; /* current position in the current chunk */ 87 char *endp; /* next memory address after the current chunk */ 88 89 svn_diff__normalize_state_t normalize_state; 90 91 /* Where the identical suffix starts in this datasource */ 92 int suffix_start_chunk; 93 apr_off_t suffix_offset_in_chunk; 94 } files[4]; 95 96 /* List of free tokens that may be reused. */ 97 svn_diff__file_token_t *tokens; 98 99 apr_pool_t *pool; 100} svn_diff__file_baton_t; 101 102static int 103datasource_to_index(svn_diff_datasource_e datasource) 104{ 105 switch (datasource) 106 { 107 case svn_diff_datasource_original: 108 return 0; 109 110 case svn_diff_datasource_modified: 111 return 1; 112 113 case svn_diff_datasource_latest: 114 return 2; 115 116 case svn_diff_datasource_ancestor: 117 return 3; 118 } 119 120 return -1; 121} 122 123/* Files are read in chunks of 128k. There is no support for this number 124 * whatsoever. If there is a number someone comes up with that has some 125 * argumentation, let's use that. 126 */ 127/* If you change this number, update test_norm_offset(), 128 * test_identical_suffix() and and test_token_compare() in diff-diff3-test.c. 129 */ 130#define CHUNK_SHIFT 17 131#define CHUNK_SIZE (1 << CHUNK_SHIFT) 132 133#define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT) 134#define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT) 135#define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1)) 136 137 138/* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for 139 * *LENGTH. The actual bytes read are stored in *LENGTH on return. 140 */ 141static APR_INLINE svn_error_t * 142read_chunk(apr_file_t *file, 143 char *buffer, apr_off_t length, 144 apr_off_t offset, apr_pool_t *scratch_pool) 145{ 146 /* XXX: The final offset may not be the one we asked for. 147 * XXX: Check. 148 */ 149 SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool)); 150 return svn_io_file_read_full2(file, buffer, (apr_size_t) length, 151 NULL, NULL, scratch_pool); 152} 153 154 155/* Map or read a file at PATH. *BUFFER will point to the file 156 * contents; if the file was mapped, *FILE and *MM will contain the 157 * mmap context; otherwise they will be NULL. SIZE will contain the 158 * file size. Allocate from POOL. 159 */ 160#if APR_HAS_MMAP 161#define MMAP_T_PARAM(NAME) apr_mmap_t **NAME, 162#define MMAP_T_ARG(NAME) &(NAME), 163#else 164#define MMAP_T_PARAM(NAME) 165#define MMAP_T_ARG(NAME) 166#endif 167 168static svn_error_t * 169map_or_read_file(apr_file_t **file, 170 MMAP_T_PARAM(mm) 171 char **buffer, apr_size_t *size_p, 172 const char *path, apr_pool_t *pool) 173{ 174 apr_finfo_t finfo; 175 apr_status_t rv; 176 apr_size_t size; 177 178 *buffer = NULL; 179 180 SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool)); 181 SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool)); 182 183 if (finfo.size > APR_SIZE_MAX) 184 { 185 return svn_error_createf(APR_ENOMEM, NULL, 186 _("File '%s' is too large to be read in " 187 "to memory"), path); 188 } 189 190 size = (apr_size_t) finfo.size; 191#if APR_HAS_MMAP 192 if (size > APR_MMAP_THRESHOLD) 193 { 194 rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool); 195 if (rv == APR_SUCCESS) 196 { 197 *buffer = (*mm)->mm; 198 } 199 else 200 { 201 /* Clear *MM because output parameters are undefined on error. */ 202 *mm = NULL; 203 } 204 205 /* On failure we just fall through and try reading the file into 206 * memory instead. 207 */ 208 } 209#endif /* APR_HAS_MMAP */ 210 211 if (*buffer == NULL && size > 0) 212 { 213 *buffer = apr_palloc(pool, size); 214 215 SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool)); 216 217 /* Since we have the entire contents of the file we can 218 * close it now. 219 */ 220 SVN_ERR(svn_io_file_close(*file, pool)); 221 222 *file = NULL; 223 } 224 225 *size_p = size; 226 227 return SVN_NO_ERROR; 228} 229 230 231/* For all files in the FILE array, increment the curp pointer. If a file 232 * points before the beginning of file, let it point at the first byte again. 233 * If the end of the current chunk is reached, read the next chunk in the 234 * buffer and point curp to the start of the chunk. If EOF is reached, set 235 * curp equal to endp to indicate EOF. */ 236#define INCREMENT_POINTERS(all_files, files_len, pool) \ 237 do { \ 238 apr_size_t svn_macro__i; \ 239 \ 240 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \ 241 { \ 242 if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\ 243 (all_files)[svn_macro__i].curp++; \ 244 else \ 245 SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool))); \ 246 } \ 247 } while (0) 248 249 250/* For all files in the FILE array, decrement the curp pointer. If the 251 * start of a chunk is reached, read the previous chunk in the buffer and 252 * point curp to the last byte of the chunk. If the beginning of a FILE is 253 * reached, set chunk to -1 to indicate BOF. */ 254#define DECREMENT_POINTERS(all_files, files_len, pool) \ 255 do { \ 256 apr_size_t svn_macro__i; \ 257 \ 258 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \ 259 { \ 260 if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \ 261 (all_files)[svn_macro__i].curp--; \ 262 else \ 263 SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool))); \ 264 } \ 265 } while (0) 266 267 268static svn_error_t * 269increment_chunk(struct file_info *file, apr_pool_t *pool) 270{ 271 apr_off_t length; 272 apr_off_t last_chunk = offset_to_chunk(file->size); 273 274 if (file->chunk == -1) 275 { 276 /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */ 277 file->chunk = 0; 278 file->curp = file->buffer; 279 } 280 else if (file->chunk == last_chunk) 281 { 282 /* We are at the last chunk. Indicate EOF by setting curp == endp. */ 283 file->curp = file->endp; 284 } 285 else 286 { 287 /* There are still chunks left. Read next chunk and reset pointers. */ 288 file->chunk++; 289 length = file->chunk == last_chunk ? 290 offset_in_chunk(file->size) : CHUNK_SIZE; 291 SVN_ERR(read_chunk(file->file, file->buffer, 292 length, chunk_to_offset(file->chunk), 293 pool)); 294 file->endp = file->buffer + length; 295 file->curp = file->buffer; 296 } 297 298 return SVN_NO_ERROR; 299} 300 301 302static svn_error_t * 303decrement_chunk(struct file_info *file, apr_pool_t *pool) 304{ 305 if (file->chunk == 0) 306 { 307 /* We are already at the first chunk. Indicate BOF (Beginning Of File) 308 by setting chunk = -1 and curp = endp - 1. Both conditions are 309 important. They help the increment step to catch the BOF situation 310 in an efficient way. */ 311 file->chunk--; 312 file->curp = file->endp - 1; 313 } 314 else 315 { 316 /* Read previous chunk and reset pointers. */ 317 file->chunk--; 318 SVN_ERR(read_chunk(file->file, file->buffer, 319 CHUNK_SIZE, chunk_to_offset(file->chunk), 320 pool)); 321 file->endp = file->buffer + CHUNK_SIZE; 322 file->curp = file->endp - 1; 323 } 324 325 return SVN_NO_ERROR; 326} 327 328 329/* Check whether one of the FILEs has its pointers 'before' the beginning of 330 * the file (this can happen while scanning backwards). This is the case if 331 * one of them has chunk == -1. */ 332static svn_boolean_t 333is_one_at_bof(struct file_info file[], apr_size_t file_len) 334{ 335 apr_size_t i; 336 337 for (i = 0; i < file_len; i++) 338 if (file[i].chunk == -1) 339 return TRUE; 340 341 return FALSE; 342} 343 344/* Check whether one of the FILEs has its pointers at EOF (this is the case if 345 * one of them has curp == endp (this can only happen at the last chunk)) */ 346static svn_boolean_t 347is_one_at_eof(struct file_info file[], apr_size_t file_len) 348{ 349 apr_size_t i; 350 351 for (i = 0; i < file_len; i++) 352 if (file[i].curp == file[i].endp) 353 return TRUE; 354 355 return FALSE; 356} 357 358/* Quickly determine whether there is a eol char in CHUNK. 359 * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start). 360 */ 361 362#if SVN_UNALIGNED_ACCESS_IS_OK 363static svn_boolean_t contains_eol(apr_uintptr_t chunk) 364{ 365 apr_uintptr_t r_test = chunk ^ SVN__R_MASK; 366 apr_uintptr_t n_test = chunk ^ SVN__N_MASK; 367 368 r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET; 369 n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET; 370 371 return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET; 372} 373#endif 374 375/* Find the prefix which is identical between all elements of the FILE array. 376 * Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be 377 * set to TRUE if one of the FILEs reached its end while scanning prefix, 378 * i.e. at least one file consisted entirely of prefix. Otherwise, 379 * REACHED_ONE_EOF is set to FALSE. 380 * 381 * After this function is finished, the buffers, chunks, curp's and endp's 382 * of the FILEs are set to point at the first byte after the prefix. */ 383static svn_error_t * 384find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines, 385 struct file_info file[], apr_size_t file_len, 386 apr_pool_t *pool) 387{ 388 svn_boolean_t had_cr = FALSE; 389 svn_boolean_t is_match; 390 apr_off_t lines = 0; 391 apr_size_t i; 392 393 *reached_one_eof = FALSE; 394 395 for (i = 1, is_match = TRUE; i < file_len; i++) 396 is_match = is_match && *file[0].curp == *file[i].curp; 397 while (is_match) 398 { 399#if SVN_UNALIGNED_ACCESS_IS_OK 400 apr_ssize_t max_delta, delta; 401#endif /* SVN_UNALIGNED_ACCESS_IS_OK */ 402 403 /* ### TODO: see if we can take advantage of 404 diff options like ignore_eol_style or ignore_space. */ 405 /* check for eol, and count */ 406 if (*file[0].curp == '\r') 407 { 408 lines++; 409 had_cr = TRUE; 410 } 411 else if (*file[0].curp == '\n' && !had_cr) 412 { 413 lines++; 414 } 415 else 416 { 417 had_cr = FALSE; 418 } 419 420 INCREMENT_POINTERS(file, file_len, pool); 421 422#if SVN_UNALIGNED_ACCESS_IS_OK 423 424 /* Try to advance as far as possible with machine-word granularity. 425 * Determine how far we may advance with chunky ops without reaching 426 * endp for any of the files. 427 * Signedness is important here if curp gets close to endp. 428 */ 429 max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t); 430 for (i = 1; i < file_len; i++) 431 { 432 delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t); 433 if (delta < max_delta) 434 max_delta = delta; 435 } 436 437 is_match = TRUE; 438 for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t)) 439 { 440 apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta); 441 if (contains_eol(chunk)) 442 break; 443 444 for (i = 1; i < file_len; i++) 445 if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta)) 446 { 447 is_match = FALSE; 448 break; 449 } 450 451 if (! is_match) 452 break; 453 } 454 455 if (delta /* > 0*/) 456 { 457 /* We either found a mismatch or an EOL at or shortly behind curp+delta 458 * or we cannot proceed with chunky ops without exceeding endp. 459 * In any way, everything up to curp + delta is equal and not an EOL. 460 */ 461 for (i = 0; i < file_len; i++) 462 file[i].curp += delta; 463 464 /* Skipped data without EOL markers, so last char was not a CR. */ 465 had_cr = FALSE; 466 } 467#endif 468 469 *reached_one_eof = is_one_at_eof(file, file_len); 470 if (*reached_one_eof) 471 break; 472 else 473 for (i = 1, is_match = TRUE; i < file_len; i++) 474 is_match = is_match && *file[0].curp == *file[i].curp; 475 } 476 477 if (had_cr) 478 { 479 /* Check if we ended in the middle of a \r\n for one file, but \r for 480 another. If so, back up one byte, so the next loop will back up 481 the entire line. Also decrement lines, since we counted one 482 too many for the \r. */ 483 svn_boolean_t ended_at_nonmatching_newline = FALSE; 484 for (i = 0; i < file_len; i++) 485 if (file[i].curp < file[i].endp) 486 ended_at_nonmatching_newline = ended_at_nonmatching_newline 487 || *file[i].curp == '\n'; 488 if (ended_at_nonmatching_newline) 489 { 490 lines--; 491 DECREMENT_POINTERS(file, file_len, pool); 492 } 493 } 494 495 /* Back up one byte, so we point at the last identical byte */ 496 DECREMENT_POINTERS(file, file_len, pool); 497 498 /* Back up to the last eol sequence (\n, \r\n or \r) */ 499 while (!is_one_at_bof(file, file_len) && 500 *file[0].curp != '\n' && *file[0].curp != '\r') 501 DECREMENT_POINTERS(file, file_len, pool); 502 503 /* Slide one byte forward, to point past the eol sequence */ 504 INCREMENT_POINTERS(file, file_len, pool); 505 506 *prefix_lines = lines; 507 508 return SVN_NO_ERROR; 509} 510 511 512/* The number of identical suffix lines to keep with the middle section. These 513 * lines are not eliminated as suffix, and can be picked up by the token 514 * parsing and lcs steps. This is mainly for backward compatibility with 515 * the previous diff (and blame) output (if there are multiple diff solutions, 516 * our lcs algorithm prefers taking common lines from the start, rather than 517 * from the end. By giving it back some suffix lines, we give it some wiggle 518 * room to find the exact same diff as before). 519 * 520 * The number 50 is more or less arbitrary, based on some real-world tests 521 * with big files (and then doubling the required number to be on the safe 522 * side). This has a negligible effect on the power of the optimization. */ 523/* If you change this number, update test_identical_suffix() in diff-diff3-test.c */ 524#ifndef SUFFIX_LINES_TO_KEEP 525#define SUFFIX_LINES_TO_KEEP 50 526#endif 527 528/* Find the suffix which is identical between all elements of the FILE array. 529 * Return the number of suffix lines in SUFFIX_LINES. 530 * 531 * Before this function is called the FILEs' pointers and chunks should be 532 * positioned right after the identical prefix (which is the case after 533 * find_identical_prefix), so we can determine where suffix scanning should 534 * ultimately stop. */ 535static svn_error_t * 536find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[], 537 apr_size_t file_len, apr_pool_t *pool) 538{ 539 struct file_info file_for_suffix[4] = { { 0 } }; 540 apr_off_t length[4]; 541 apr_off_t suffix_min_chunk0; 542 apr_off_t suffix_min_offset0; 543 apr_off_t min_file_size; 544 int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP; 545 svn_boolean_t is_match; 546 apr_off_t lines = 0; 547 svn_boolean_t had_nl; 548 apr_size_t i; 549 550 /* Initialize file_for_suffix[]. 551 Read last chunk, position curp at last byte. */ 552 for (i = 0; i < file_len; i++) 553 { 554 file_for_suffix[i].path = file[i].path; 555 file_for_suffix[i].file = file[i].file; 556 file_for_suffix[i].size = file[i].size; 557 file_for_suffix[i].chunk = 558 (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */ 559 length[i] = offset_in_chunk(file_for_suffix[i].size); 560 if (length[i] == 0) 561 { 562 /* last chunk is an empty chunk -> start at next-to-last chunk */ 563 file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1; 564 length[i] = CHUNK_SIZE; 565 } 566 567 if (file_for_suffix[i].chunk == file[i].chunk) 568 { 569 /* Prefix ended in last chunk, so we can reuse the prefix buffer */ 570 file_for_suffix[i].buffer = file[i].buffer; 571 } 572 else 573 { 574 /* There is at least more than 1 chunk, 575 so allocate full chunk size buffer */ 576 file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE); 577 SVN_ERR(read_chunk(file_for_suffix[i].file, 578 file_for_suffix[i].buffer, length[i], 579 chunk_to_offset(file_for_suffix[i].chunk), 580 pool)); 581 } 582 file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i]; 583 file_for_suffix[i].curp = file_for_suffix[i].endp - 1; 584 } 585 586 /* Get the chunk and pointer offset (for file[0]) at which we should stop 587 scanning backward for the identical suffix, i.e. when we reach prefix. */ 588 suffix_min_chunk0 = file[0].chunk; 589 suffix_min_offset0 = file[0].curp - file[0].buffer; 590 591 /* Compensate if other files are smaller than file[0] */ 592 for (i = 1, min_file_size = file[0].size; i < file_len; i++) 593 if (file[i].size < min_file_size) 594 min_file_size = file[i].size; 595 if (file[0].size > min_file_size) 596 { 597 suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE; 598 suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE; 599 } 600 601 /* Scan backwards until mismatch or until we reach the prefix. */ 602 for (i = 1, is_match = TRUE; i < file_len; i++) 603 is_match = is_match 604 && *file_for_suffix[0].curp == *file_for_suffix[i].curp; 605 if (is_match && *file_for_suffix[0].curp != '\r' 606 && *file_for_suffix[0].curp != '\n') 607 /* Count an extra line for the last line not ending in an eol. */ 608 lines++; 609 610 had_nl = FALSE; 611 while (is_match) 612 { 613 svn_boolean_t reached_prefix; 614#if SVN_UNALIGNED_ACCESS_IS_OK 615 /* Initialize the minimum pointer positions. */ 616 const char *min_curp[4]; 617 svn_boolean_t can_read_word; 618#endif /* SVN_UNALIGNED_ACCESS_IS_OK */ 619 620 /* ### TODO: see if we can take advantage of 621 diff options like ignore_eol_style or ignore_space. */ 622 /* check for eol, and count */ 623 if (*file_for_suffix[0].curp == '\n') 624 { 625 lines++; 626 had_nl = TRUE; 627 } 628 else if (*file_for_suffix[0].curp == '\r' && !had_nl) 629 { 630 lines++; 631 } 632 else 633 { 634 had_nl = FALSE; 635 } 636 637 DECREMENT_POINTERS(file_for_suffix, file_len, pool); 638 639#if SVN_UNALIGNED_ACCESS_IS_OK 640 for (i = 0; i < file_len; i++) 641 min_curp[i] = file_for_suffix[i].buffer; 642 643 /* If we are in the same chunk that contains the last part of the common 644 prefix, use the min_curp[0] pointer to make sure we don't get a 645 suffix that overlaps the already determined common prefix. */ 646 if (file_for_suffix[0].chunk == suffix_min_chunk0) 647 min_curp[0] += suffix_min_offset0; 648 649 /* Scan quickly by reading with machine-word granularity. */ 650 for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++) 651 can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t)) 652 > min_curp[i]); 653 654 while (can_read_word) 655 { 656 apr_uintptr_t chunk; 657 658 /* For each file curp is positioned at the current byte, but we 659 want to examine the current byte and the ones before the current 660 location as one machine word. */ 661 662 chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1 663 - sizeof(apr_uintptr_t)); 664 if (contains_eol(chunk)) 665 break; 666 667 for (i = 1, is_match = TRUE; is_match && i < file_len; i++) 668 is_match = (chunk 669 == *(const apr_uintptr_t *) 670 (file_for_suffix[i].curp + 1 671 - sizeof(apr_uintptr_t))); 672 673 if (! is_match) 674 break; 675 676 for (i = 0; i < file_len; i++) 677 { 678 file_for_suffix[i].curp -= sizeof(apr_uintptr_t); 679 can_read_word = can_read_word 680 && ( (file_for_suffix[i].curp + 1 681 - sizeof(apr_uintptr_t)) 682 > min_curp[i]); 683 } 684 685 /* We skipped some bytes, so there are no closing EOLs */ 686 had_nl = FALSE; 687 } 688 689 /* The > min_curp[i] check leaves at least one final byte for checking 690 in the non block optimized case below. */ 691#endif 692 693 reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0 694 && (file_for_suffix[0].curp - file_for_suffix[0].buffer) 695 == suffix_min_offset0; 696 if (reached_prefix || is_one_at_bof(file_for_suffix, file_len)) 697 break; 698 699 is_match = TRUE; 700 for (i = 1; i < file_len; i++) 701 is_match = is_match 702 && *file_for_suffix[0].curp == *file_for_suffix[i].curp; 703 } 704 705 /* Slide one byte forward, to point at the first byte of identical suffix */ 706 INCREMENT_POINTERS(file_for_suffix, file_len, pool); 707 708 /* Slide forward until we find an eol sequence to add the rest of the line 709 we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least 710 one file reaches its end. */ 711 do 712 { 713 svn_boolean_t had_cr = FALSE; 714 while (!is_one_at_eof(file_for_suffix, file_len) 715 && *file_for_suffix[0].curp != '\n' 716 && *file_for_suffix[0].curp != '\r') 717 INCREMENT_POINTERS(file_for_suffix, file_len, pool); 718 719 /* Slide one or two more bytes, to point past the eol. */ 720 if (!is_one_at_eof(file_for_suffix, file_len) 721 && *file_for_suffix[0].curp == '\r') 722 { 723 lines--; 724 had_cr = TRUE; 725 INCREMENT_POINTERS(file_for_suffix, file_len, pool); 726 } 727 if (!is_one_at_eof(file_for_suffix, file_len) 728 && *file_for_suffix[0].curp == '\n') 729 { 730 if (!had_cr) 731 lines--; 732 INCREMENT_POINTERS(file_for_suffix, file_len, pool); 733 } 734 } 735 while (!is_one_at_eof(file_for_suffix, file_len) 736 && suffix_lines_to_keep--); 737 738 if (is_one_at_eof(file_for_suffix, file_len)) 739 lines = 0; 740 741 /* Save the final suffix information in the original file_info */ 742 for (i = 0; i < file_len; i++) 743 { 744 file[i].suffix_start_chunk = file_for_suffix[i].chunk; 745 file[i].suffix_offset_in_chunk = 746 file_for_suffix[i].curp - file_for_suffix[i].buffer; 747 } 748 749 *suffix_lines = lines; 750 751 return SVN_NO_ERROR; 752} 753 754 755/* Let FILE stand for the array of file_info struct elements of BATON->files 756 * that are indexed by the elements of the DATASOURCE array. 757 * BATON's type is (svn_diff__file_baton_t *). 758 * 759 * For each file in the FILE array, open the file at FILE.path; initialize 760 * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a 761 * buffer and read the first chunk. Then find the prefix and suffix lines 762 * which are identical between all the files. Return the number of identical 763 * prefix lines in PREFIX_LINES, and the number of identical suffix lines in 764 * SUFFIX_LINES. 765 * 766 * Finding the identical prefix and suffix allows us to exclude those from the 767 * rest of the diff algorithm, which increases performance by reducing the 768 * problem space. 769 * 770 * Implements svn_diff_fns2_t::datasources_open. */ 771static svn_error_t * 772datasources_open(void *baton, 773 apr_off_t *prefix_lines, 774 apr_off_t *suffix_lines, 775 const svn_diff_datasource_e *datasources, 776 apr_size_t datasources_len) 777{ 778 svn_diff__file_baton_t *file_baton = baton; 779 struct file_info files[4]; 780 apr_finfo_t finfo[4]; 781 apr_off_t length[4]; 782#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING 783 svn_boolean_t reached_one_eof; 784#endif 785 apr_size_t i; 786 787 /* Make sure prefix_lines and suffix_lines are set correctly, even if we 788 * exit early because one of the files is empty. */ 789 *prefix_lines = 0; 790 *suffix_lines = 0; 791 792 /* Open datasources and read first chunk */ 793 for (i = 0; i < datasources_len; i++) 794 { 795 struct file_info *file 796 = &file_baton->files[datasource_to_index(datasources[i])]; 797 SVN_ERR(svn_io_file_open(&file->file, file->path, 798 APR_READ, APR_OS_DEFAULT, file_baton->pool)); 799 SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE, 800 file->file, file_baton->pool)); 801 file->size = finfo[i].size; 802 length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size; 803 file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]); 804 SVN_ERR(read_chunk(file->file, file->buffer, 805 length[i], 0, file_baton->pool)); 806 file->endp = file->buffer + length[i]; 807 file->curp = file->buffer; 808 /* Set suffix_start_chunk to a guard value, so if suffix scanning is 809 * skipped because one of the files is empty, or because of 810 * reached_one_eof, we can still easily check for the suffix during 811 * token reading (datasource_get_next_token). */ 812 file->suffix_start_chunk = -1; 813 814 files[i] = *file; 815 } 816 817 for (i = 0; i < datasources_len; i++) 818 if (length[i] == 0) 819 /* There will not be any identical prefix/suffix, so we're done. */ 820 return SVN_NO_ERROR; 821 822#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING 823 824 SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines, 825 files, datasources_len, file_baton->pool)); 826 827 if (!reached_one_eof) 828 /* No file consisted totally of identical prefix, 829 * so there may be some identical suffix. */ 830 SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len, 831 file_baton->pool)); 832 833#endif 834 835 /* Copy local results back to baton. */ 836 for (i = 0; i < datasources_len; i++) 837 file_baton->files[datasource_to_index(datasources[i])] = files[i]; 838 839 return SVN_NO_ERROR; 840} 841 842 843/* Implements svn_diff_fns2_t::datasource_close */ 844static svn_error_t * 845datasource_close(void *baton, svn_diff_datasource_e datasource) 846{ 847 /* Do nothing. The compare_token function needs previous datasources 848 * to stay available until all datasources are processed. 849 */ 850 851 return SVN_NO_ERROR; 852} 853 854/* Implements svn_diff_fns2_t::datasource_get_next_token */ 855static svn_error_t * 856datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton, 857 svn_diff_datasource_e datasource) 858{ 859 svn_diff__file_baton_t *file_baton = baton; 860 svn_diff__file_token_t *file_token; 861 struct file_info *file = &file_baton->files[datasource_to_index(datasource)]; 862 char *endp; 863 char *curp; 864 char *eol; 865 apr_off_t last_chunk; 866 apr_off_t length; 867 apr_uint32_t h = 0; 868 /* Did the last chunk end in a CR character? */ 869 svn_boolean_t had_cr = FALSE; 870 871 *token = NULL; 872 873 curp = file->curp; 874 endp = file->endp; 875 876 last_chunk = offset_to_chunk(file->size); 877 878 /* Are we already at the end of a chunk? */ 879 if (curp == endp) 880 { 881 /* Are we at EOF */ 882 if (last_chunk == file->chunk) 883 return SVN_NO_ERROR; /* EOF */ 884 885 /* Or right before an identical suffix in the next chunk? */ 886 if (file->chunk + 1 == file->suffix_start_chunk 887 && file->suffix_offset_in_chunk == 0) 888 return SVN_NO_ERROR; 889 } 890 891 /* Stop when we encounter the identical suffix. If suffix scanning was not 892 * performed, suffix_start_chunk will be -1, so this condition will never 893 * be true. */ 894 if (file->chunk == file->suffix_start_chunk 895 && (curp - file->buffer) == file->suffix_offset_in_chunk) 896 return SVN_NO_ERROR; 897 898 /* Allocate a new token, or fetch one from the "reusable tokens" list. */ 899 file_token = file_baton->tokens; 900 if (file_token) 901 { 902 file_baton->tokens = file_token->next; 903 } 904 else 905 { 906 file_token = apr_palloc(file_baton->pool, sizeof(*file_token)); 907 } 908 909 file_token->datasource = datasource; 910 file_token->offset = chunk_to_offset(file->chunk) 911 + (curp - file->buffer); 912 file_token->norm_offset = file_token->offset; 913 file_token->raw_length = 0; 914 file_token->length = 0; 915 916 while (1) 917 { 918 eol = svn_eol__find_eol_start(curp, endp - curp); 919 if (eol) 920 { 921 had_cr = (*eol == '\r'); 922 eol++; 923 /* If we have the whole eol sequence in the chunk... */ 924 if (!(had_cr && eol == endp)) 925 { 926 /* Also skip past the '\n' in an '\r\n' sequence. */ 927 if (had_cr && *eol == '\n') 928 eol++; 929 break; 930 } 931 } 932 933 if (file->chunk == last_chunk) 934 { 935 eol = endp; 936 break; 937 } 938 939 length = endp - curp; 940 file_token->raw_length += length; 941 { 942 char *c = curp; 943 944 svn_diff__normalize_buffer(&c, &length, 945 &file->normalize_state, 946 curp, file_baton->options); 947 if (file_token->length == 0) 948 { 949 /* When we are reading the first part of the token, move the 950 normalized offset past leading ignored characters, if any. */ 951 file_token->norm_offset += (c - curp); 952 } 953 file_token->length += length; 954 h = svn__adler32(h, c, length); 955 } 956 957 curp = endp = file->buffer; 958 file->chunk++; 959 length = file->chunk == last_chunk ? 960 offset_in_chunk(file->size) : CHUNK_SIZE; 961 endp += length; 962 file->endp = endp; 963 964 /* Issue #4283: Normally we should have checked for reaching the skipped 965 suffix here, but because we assume that a suffix always starts on a 966 line and token boundary we rely on catching the suffix earlier in this 967 function. 968 969 When changing things here, make sure the whitespace settings are 970 applied, or we might not reach the exact suffix boundary as token 971 boundary. */ 972 SVN_ERR(read_chunk(file->file, 973 curp, length, 974 chunk_to_offset(file->chunk), 975 file_baton->pool)); 976 977 /* If the last chunk ended in a CR, we're done. */ 978 if (had_cr) 979 { 980 eol = curp; 981 if (*curp == '\n') 982 ++eol; 983 break; 984 } 985 } 986 987 length = eol - curp; 988 file_token->raw_length += length; 989 file->curp = eol; 990 991 /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up 992 * with a spurious empty token. Avoid returning it. 993 * Note that we use the unnormalized length; we don't want a line containing 994 * only spaces (and no trailing newline) to appear like a non-existent 995 * line. */ 996 if (file_token->raw_length > 0) 997 { 998 char *c = curp; 999 svn_diff__normalize_buffer(&c, &length, 1000 &file->normalize_state, 1001 curp, file_baton->options); 1002 if (file_token->length == 0) 1003 { 1004 /* When we are reading the first part of the token, move the 1005 normalized offset past leading ignored characters, if any. */ 1006 file_token->norm_offset += (c - curp); 1007 } 1008 1009 file_token->length += length; 1010 1011 *hash = svn__adler32(h, c, length); 1012 *token = file_token; 1013 } 1014 1015 return SVN_NO_ERROR; 1016} 1017 1018#define COMPARE_CHUNK_SIZE 4096 1019 1020/* Implements svn_diff_fns2_t::token_compare */ 1021static svn_error_t * 1022token_compare(void *baton, void *token1, void *token2, int *compare) 1023{ 1024 svn_diff__file_baton_t *file_baton = baton; 1025 svn_diff__file_token_t *file_token[2]; 1026 char buffer[2][COMPARE_CHUNK_SIZE]; 1027 char *bufp[2]; 1028 apr_off_t offset[2]; 1029 struct file_info *file[2]; 1030 apr_off_t length[2]; 1031 apr_off_t total_length; 1032 /* How much is left to read of each token from the file. */ 1033 apr_off_t raw_length[2]; 1034 int i; 1035 svn_diff__normalize_state_t state[2]; 1036 1037 file_token[0] = token1; 1038 file_token[1] = token2; 1039 if (file_token[0]->length < file_token[1]->length) 1040 { 1041 *compare = -1; 1042 return SVN_NO_ERROR; 1043 } 1044 1045 if (file_token[0]->length > file_token[1]->length) 1046 { 1047 *compare = 1; 1048 return SVN_NO_ERROR; 1049 } 1050 1051 total_length = file_token[0]->length; 1052 if (total_length == 0) 1053 { 1054 *compare = 0; 1055 return SVN_NO_ERROR; 1056 } 1057 1058 for (i = 0; i < 2; ++i) 1059 { 1060 int idx = datasource_to_index(file_token[i]->datasource); 1061 1062 file[i] = &file_baton->files[idx]; 1063 offset[i] = file_token[i]->norm_offset; 1064 state[i] = svn_diff__normalize_state_normal; 1065 1066 if (offset_to_chunk(offset[i]) == file[i]->chunk) 1067 { 1068 /* If the start of the token is in memory, the entire token is 1069 * in memory. 1070 */ 1071 bufp[i] = file[i]->buffer; 1072 bufp[i] += offset_in_chunk(offset[i]); 1073 1074 length[i] = total_length; 1075 raw_length[i] = 0; 1076 } 1077 else 1078 { 1079 apr_off_t skipped; 1080 1081 length[i] = 0; 1082 1083 /* When we skipped the first part of the token via the whitespace 1084 normalization we must reduce the raw length of the token */ 1085 skipped = (file_token[i]->norm_offset - file_token[i]->offset); 1086 1087 raw_length[i] = file_token[i]->raw_length - skipped; 1088 } 1089 } 1090 1091 do 1092 { 1093 apr_off_t len; 1094 for (i = 0; i < 2; i++) 1095 { 1096 if (length[i] == 0) 1097 { 1098 /* Error if raw_length is 0, that's an unexpected change 1099 * of the file that can happen when ingoring whitespace 1100 * and that can lead to an infinite loop. */ 1101 if (raw_length[i] == 0) 1102 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED, 1103 NULL, 1104 _("The file '%s' changed unexpectedly" 1105 " during diff"), 1106 file[i]->path); 1107 1108 /* Read a chunk from disk into a buffer */ 1109 bufp[i] = buffer[i]; 1110 length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ? 1111 COMPARE_CHUNK_SIZE : raw_length[i]; 1112 1113 SVN_ERR(read_chunk(file[i]->file, 1114 bufp[i], length[i], offset[i], 1115 file_baton->pool)); 1116 offset[i] += length[i]; 1117 raw_length[i] -= length[i]; 1118 /* bufp[i] gets reset to buffer[i] before reading each chunk, 1119 so, overwriting it isn't a problem */ 1120 svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i], 1121 bufp[i], file_baton->options); 1122 1123 /* assert(length[i] == file_token[i]->length); */ 1124 } 1125 } 1126 1127 len = length[0] > length[1] ? length[1] : length[0]; 1128 1129 /* Compare two chunks (that could be entire tokens if they both reside 1130 * in memory). 1131 */ 1132 *compare = memcmp(bufp[0], bufp[1], (size_t) len); 1133 if (*compare != 0) 1134 return SVN_NO_ERROR; 1135 1136 total_length -= len; 1137 length[0] -= len; 1138 length[1] -= len; 1139 bufp[0] += len; 1140 bufp[1] += len; 1141 } 1142 while(total_length > 0); 1143 1144 *compare = 0; 1145 return SVN_NO_ERROR; 1146} 1147 1148 1149/* Implements svn_diff_fns2_t::token_discard */ 1150static void 1151token_discard(void *baton, void *token) 1152{ 1153 svn_diff__file_baton_t *file_baton = baton; 1154 svn_diff__file_token_t *file_token = token; 1155 1156 /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */ 1157 file_token->next = file_baton->tokens; 1158 file_baton->tokens = file_token; 1159} 1160 1161 1162/* Implements svn_diff_fns2_t::token_discard_all */ 1163static void 1164token_discard_all(void *baton) 1165{ 1166 svn_diff__file_baton_t *file_baton = baton; 1167 1168 /* Discard all memory in use by the tokens, and close all open files. */ 1169 svn_pool_clear(file_baton->pool); 1170} 1171 1172 1173static const svn_diff_fns2_t svn_diff__file_vtable = 1174{ 1175 datasources_open, 1176 datasource_close, 1177 datasource_get_next_token, 1178 token_compare, 1179 token_discard, 1180 token_discard_all 1181}; 1182 1183/* Id for the --ignore-eol-style option, which doesn't have a short name. */ 1184#define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256 1185 1186/* Options supported by svn_diff_file_options_parse(). */ 1187static const apr_getopt_option_t diff_options[] = 1188{ 1189 { "ignore-space-change", 'b', 0, NULL }, 1190 { "ignore-all-space", 'w', 0, NULL }, 1191 { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL }, 1192 { "show-c-function", 'p', 0, NULL }, 1193 /* ### For compatibility; we don't support the argument to -u, because 1194 * ### we don't have optional argument support. */ 1195 { "unified", 'u', 0, NULL }, 1196 { "context", 'U', 1, NULL }, 1197 { NULL, 0, 0, NULL } 1198}; 1199 1200svn_diff_file_options_t * 1201svn_diff_file_options_create(apr_pool_t *pool) 1202{ 1203 svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts)); 1204 1205 opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE; 1206 1207 return opts; 1208} 1209 1210/* A baton for use with opt_parsing_error_func(). */ 1211struct opt_parsing_error_baton_t 1212{ 1213 svn_error_t *err; 1214 apr_pool_t *pool; 1215}; 1216 1217/* Store an error message from apr_getopt_long(). Set BATON->err to a new 1218 * error with a message generated from FMT and the remaining arguments. 1219 * Implements apr_getopt_err_fn_t. */ 1220static void 1221opt_parsing_error_func(void *baton, 1222 const char *fmt, ...) 1223{ 1224 struct opt_parsing_error_baton_t *b = baton; 1225 const char *message; 1226 va_list ap; 1227 1228 va_start(ap, fmt); 1229 message = apr_pvsprintf(b->pool, fmt, ap); 1230 va_end(ap); 1231 1232 /* Skip leading ": " (if present, which it always is in known cases). */ 1233 if (strncmp(message, ": ", 2) == 0) 1234 message += 2; 1235 1236 b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message); 1237} 1238 1239svn_error_t * 1240svn_diff_file_options_parse(svn_diff_file_options_t *options, 1241 const apr_array_header_t *args, 1242 apr_pool_t *pool) 1243{ 1244 apr_getopt_t *os; 1245 struct opt_parsing_error_baton_t opt_parsing_error_baton; 1246 /* Make room for each option (starting at index 1) plus trailing NULL. */ 1247 const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2)); 1248 1249 opt_parsing_error_baton.err = NULL; 1250 opt_parsing_error_baton.pool = pool; 1251 1252 argv[0] = ""; 1253 memcpy(argv + 1, args->elts, sizeof(char*) * args->nelts); 1254 argv[args->nelts + 1] = NULL; 1255 1256 apr_getopt_init(&os, pool, args->nelts + 1, argv); 1257 1258 /* Capture any error message from apr_getopt_long(). This will typically 1259 * say which option is wrong, which we would not otherwise know. */ 1260 os->errfn = opt_parsing_error_func; 1261 os->errarg = &opt_parsing_error_baton; 1262 1263 while (1) 1264 { 1265 const char *opt_arg; 1266 int opt_id; 1267 apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg); 1268 1269 if (APR_STATUS_IS_EOF(err)) 1270 break; 1271 if (err) 1272 /* Wrap apr_getopt_long()'s error message. Its doc string implies 1273 * it always will produce one, but never mind if it doesn't. Avoid 1274 * using the message associated with the return code ERR, because 1275 * it refers to the "command line" which may be misleading here. */ 1276 return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, 1277 opt_parsing_error_baton.err, 1278 _("Error in options to internal diff")); 1279 1280 switch (opt_id) 1281 { 1282 case 'b': 1283 /* -w takes precedence over -b. */ 1284 if (! options->ignore_space) 1285 options->ignore_space = svn_diff_file_ignore_space_change; 1286 break; 1287 case 'w': 1288 options->ignore_space = svn_diff_file_ignore_space_all; 1289 break; 1290 case SVN_DIFF__OPT_IGNORE_EOL_STYLE: 1291 options->ignore_eol_style = TRUE; 1292 break; 1293 case 'p': 1294 options->show_c_function = TRUE; 1295 break; 1296 case 'U': 1297 SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg)); 1298 break; 1299 default: 1300 break; 1301 } 1302 } 1303 1304 /* Check for spurious arguments. */ 1305 if (os->ind < os->argc) 1306 return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL, 1307 _("Invalid argument '%s' in diff options"), 1308 os->argv[os->ind]); 1309 1310 return SVN_NO_ERROR; 1311} 1312 1313svn_error_t * 1314svn_diff_file_diff_2(svn_diff_t **diff, 1315 const char *original, 1316 const char *modified, 1317 const svn_diff_file_options_t *options, 1318 apr_pool_t *pool) 1319{ 1320 svn_diff__file_baton_t baton = { 0 }; 1321 1322 baton.options = options; 1323 baton.files[0].path = original; 1324 baton.files[1].path = modified; 1325 baton.pool = svn_pool_create(pool); 1326 1327 SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool)); 1328 1329 svn_pool_destroy(baton.pool); 1330 return SVN_NO_ERROR; 1331} 1332 1333svn_error_t * 1334svn_diff_file_diff3_2(svn_diff_t **diff, 1335 const char *original, 1336 const char *modified, 1337 const char *latest, 1338 const svn_diff_file_options_t *options, 1339 apr_pool_t *pool) 1340{ 1341 svn_diff__file_baton_t baton = { 0 }; 1342 1343 baton.options = options; 1344 baton.files[0].path = original; 1345 baton.files[1].path = modified; 1346 baton.files[2].path = latest; 1347 baton.pool = svn_pool_create(pool); 1348 1349 SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool)); 1350 1351 svn_pool_destroy(baton.pool); 1352 return SVN_NO_ERROR; 1353} 1354 1355svn_error_t * 1356svn_diff_file_diff4_2(svn_diff_t **diff, 1357 const char *original, 1358 const char *modified, 1359 const char *latest, 1360 const char *ancestor, 1361 const svn_diff_file_options_t *options, 1362 apr_pool_t *pool) 1363{ 1364 svn_diff__file_baton_t baton = { 0 }; 1365 1366 baton.options = options; 1367 baton.files[0].path = original; 1368 baton.files[1].path = modified; 1369 baton.files[2].path = latest; 1370 baton.files[3].path = ancestor; 1371 baton.pool = svn_pool_create(pool); 1372 1373 SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool)); 1374 1375 svn_pool_destroy(baton.pool); 1376 return SVN_NO_ERROR; 1377} 1378 1379 1380/** Display unified context diffs **/ 1381 1382/* Maximum length of the extra context to show when show_c_function is set. 1383 * GNU diff uses 40, let's be brave and use 50 instead. */ 1384#define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50 1385typedef struct svn_diff__file_output_baton_t 1386{ 1387 svn_stream_t *output_stream; 1388 const char *header_encoding; 1389 1390 /* Cached markers, in header_encoding. */ 1391 const char *context_str; 1392 const char *delete_str; 1393 const char *insert_str; 1394 1395 const char *path[2]; 1396 apr_file_t *file[2]; 1397 1398 apr_off_t current_line[2]; 1399 1400 char buffer[2][4096]; 1401 apr_size_t length[2]; 1402 char *curp[2]; 1403 1404 apr_off_t hunk_start[2]; 1405 apr_off_t hunk_length[2]; 1406 svn_stringbuf_t *hunk; 1407 1408 /* Should we emit C functions in the unified diff header */ 1409 svn_boolean_t show_c_function; 1410 /* Extra strings to skip over if we match. */ 1411 apr_array_header_t *extra_skip_match; 1412 /* "Context" to append to the @@ line when the show_c_function option 1413 * is set. */ 1414 svn_stringbuf_t *extra_context; 1415 /* Extra context for the current hunk. */ 1416 char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1]; 1417 1418 int context_size; 1419 1420 apr_pool_t *pool; 1421} svn_diff__file_output_baton_t; 1422 1423typedef enum svn_diff__file_output_unified_type_e 1424{ 1425 svn_diff__file_output_unified_skip, 1426 svn_diff__file_output_unified_context, 1427 svn_diff__file_output_unified_delete, 1428 svn_diff__file_output_unified_insert 1429} svn_diff__file_output_unified_type_e; 1430 1431 1432static svn_error_t * 1433output_unified_line(svn_diff__file_output_baton_t *baton, 1434 svn_diff__file_output_unified_type_e type, int idx) 1435{ 1436 char *curp; 1437 char *eol; 1438 apr_size_t length; 1439 svn_error_t *err; 1440 svn_boolean_t bytes_processed = FALSE; 1441 svn_boolean_t had_cr = FALSE; 1442 /* Are we collecting extra context? */ 1443 svn_boolean_t collect_extra = FALSE; 1444 1445 length = baton->length[idx]; 1446 curp = baton->curp[idx]; 1447 1448 /* Lazily update the current line even if we're at EOF. 1449 * This way we fake output of context at EOF 1450 */ 1451 baton->current_line[idx]++; 1452 1453 if (length == 0 && apr_file_eof(baton->file[idx])) 1454 { 1455 return SVN_NO_ERROR; 1456 } 1457 1458 do 1459 { 1460 if (length > 0) 1461 { 1462 if (!bytes_processed) 1463 { 1464 switch (type) 1465 { 1466 case svn_diff__file_output_unified_context: 1467 svn_stringbuf_appendcstr(baton->hunk, baton->context_str); 1468 baton->hunk_length[0]++; 1469 baton->hunk_length[1]++; 1470 break; 1471 case svn_diff__file_output_unified_delete: 1472 svn_stringbuf_appendcstr(baton->hunk, baton->delete_str); 1473 baton->hunk_length[0]++; 1474 break; 1475 case svn_diff__file_output_unified_insert: 1476 svn_stringbuf_appendcstr(baton->hunk, baton->insert_str); 1477 baton->hunk_length[1]++; 1478 break; 1479 default: 1480 break; 1481 } 1482 1483 if (baton->show_c_function 1484 && (type == svn_diff__file_output_unified_skip 1485 || type == svn_diff__file_output_unified_context) 1486 && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_') 1487 && !svn_cstring_match_glob_list(curp, 1488 baton->extra_skip_match)) 1489 { 1490 svn_stringbuf_setempty(baton->extra_context); 1491 collect_extra = TRUE; 1492 } 1493 } 1494 1495 eol = svn_eol__find_eol_start(curp, length); 1496 1497 if (eol != NULL) 1498 { 1499 apr_size_t len; 1500 1501 had_cr = (*eol == '\r'); 1502 eol++; 1503 len = (apr_size_t)(eol - curp); 1504 1505 if (! had_cr || len < length) 1506 { 1507 if (had_cr && *eol == '\n') 1508 { 1509 ++eol; 1510 ++len; 1511 } 1512 1513 length -= len; 1514 1515 if (type != svn_diff__file_output_unified_skip) 1516 { 1517 svn_stringbuf_appendbytes(baton->hunk, curp, len); 1518 } 1519 if (collect_extra) 1520 { 1521 svn_stringbuf_appendbytes(baton->extra_context, 1522 curp, len); 1523 } 1524 1525 baton->curp[idx] = eol; 1526 baton->length[idx] = length; 1527 1528 err = SVN_NO_ERROR; 1529 1530 break; 1531 } 1532 } 1533 1534 if (type != svn_diff__file_output_unified_skip) 1535 { 1536 svn_stringbuf_appendbytes(baton->hunk, curp, length); 1537 } 1538 1539 if (collect_extra) 1540 { 1541 svn_stringbuf_appendbytes(baton->extra_context, curp, length); 1542 } 1543 1544 bytes_processed = TRUE; 1545 } 1546 1547 curp = baton->buffer[idx]; 1548 length = sizeof(baton->buffer[idx]); 1549 1550 err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool); 1551 1552 /* If the last chunk ended with a CR, we look for an LF at the start 1553 of this chunk. */ 1554 if (had_cr) 1555 { 1556 if (! err && length > 0 && *curp == '\n') 1557 { 1558 if (type != svn_diff__file_output_unified_skip) 1559 { 1560 svn_stringbuf_appendbyte(baton->hunk, *curp); 1561 } 1562 /* We don't append the LF to extra_context, since it would 1563 * just be stripped anyway. */ 1564 ++curp; 1565 --length; 1566 } 1567 1568 baton->curp[idx] = curp; 1569 baton->length[idx] = length; 1570 1571 break; 1572 } 1573 } 1574 while (! err); 1575 1576 if (err && ! APR_STATUS_IS_EOF(err->apr_err)) 1577 return err; 1578 1579 if (err && APR_STATUS_IS_EOF(err->apr_err)) 1580 { 1581 svn_error_clear(err); 1582 /* Special case if we reach the end of file AND the last line is in the 1583 changed range AND the file doesn't end with a newline */ 1584 if (bytes_processed && (type != svn_diff__file_output_unified_skip) 1585 && ! had_cr) 1586 { 1587 SVN_ERR(svn_diff__unified_append_no_newline_msg( 1588 baton->hunk, baton->header_encoding, baton->pool)); 1589 } 1590 1591 baton->length[idx] = 0; 1592 } 1593 1594 return SVN_NO_ERROR; 1595} 1596 1597static APR_INLINE svn_error_t * 1598output_unified_diff_range(svn_diff__file_output_baton_t *output_baton, 1599 int source, 1600 svn_diff__file_output_unified_type_e type, 1601 apr_off_t until) 1602{ 1603 while (output_baton->current_line[source] < until) 1604 { 1605 SVN_ERR(output_unified_line(output_baton, type, source)); 1606 } 1607 return SVN_NO_ERROR; 1608} 1609 1610static svn_error_t * 1611output_unified_flush_hunk(svn_diff__file_output_baton_t *baton) 1612{ 1613 apr_off_t target_line; 1614 apr_size_t hunk_len; 1615 apr_off_t old_start; 1616 apr_off_t new_start; 1617 1618 if (svn_stringbuf_isempty(baton->hunk)) 1619 { 1620 /* Nothing to flush */ 1621 return SVN_NO_ERROR; 1622 } 1623 1624 target_line = baton->hunk_start[0] + baton->hunk_length[0] 1625 + baton->context_size; 1626 1627 /* Add trailing context to the hunk */ 1628 SVN_ERR(output_unified_diff_range(baton, 0 /* original */, 1629 svn_diff__file_output_unified_context, 1630 target_line)); 1631 1632 old_start = baton->hunk_start[0]; 1633 new_start = baton->hunk_start[1]; 1634 1635 /* If the file is non-empty, convert the line indexes from 1636 zero based to one based */ 1637 if (baton->hunk_length[0]) 1638 old_start++; 1639 if (baton->hunk_length[1]) 1640 new_start++; 1641 1642 /* Write the hunk header */ 1643 SVN_ERR(svn_diff__unified_write_hunk_header( 1644 baton->output_stream, baton->header_encoding, "@@", 1645 old_start, baton->hunk_length[0], 1646 new_start, baton->hunk_length[1], 1647 baton->hunk_extra_context, 1648 baton->pool)); 1649 1650 /* Output the hunk content */ 1651 hunk_len = baton->hunk->len; 1652 SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data, 1653 &hunk_len)); 1654 1655 /* Prepare for the next hunk */ 1656 baton->hunk_length[0] = 0; 1657 baton->hunk_length[1] = 0; 1658 baton->hunk_start[0] = 0; 1659 baton->hunk_start[1] = 0; 1660 svn_stringbuf_setempty(baton->hunk); 1661 1662 return SVN_NO_ERROR; 1663} 1664 1665static svn_error_t * 1666output_unified_diff_modified(void *baton, 1667 apr_off_t original_start, apr_off_t original_length, 1668 apr_off_t modified_start, apr_off_t modified_length, 1669 apr_off_t latest_start, apr_off_t latest_length) 1670{ 1671 svn_diff__file_output_baton_t *output_baton = baton; 1672 apr_off_t context_prefix_length; 1673 apr_off_t prev_context_end; 1674 svn_boolean_t init_hunk = FALSE; 1675 1676 if (original_start > output_baton->context_size) 1677 context_prefix_length = output_baton->context_size; 1678 else 1679 context_prefix_length = original_start; 1680 1681 /* Calculate where the previous hunk will end if we would write it now 1682 (including the necessary context at the end) */ 1683 if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0) 1684 { 1685 prev_context_end = output_baton->hunk_start[0] 1686 + output_baton->hunk_length[0] 1687 + output_baton->context_size; 1688 } 1689 else 1690 { 1691 prev_context_end = -1; 1692 1693 if (output_baton->hunk_start[0] == 0 1694 && (original_length > 0 || modified_length > 0)) 1695 init_hunk = TRUE; 1696 } 1697 1698 /* If the changed range is far enough from the previous range, flush the current 1699 hunk. */ 1700 { 1701 apr_off_t new_hunk_start = (original_start - context_prefix_length); 1702 1703 if (output_baton->current_line[0] < new_hunk_start 1704 && prev_context_end <= new_hunk_start) 1705 { 1706 SVN_ERR(output_unified_flush_hunk(output_baton)); 1707 init_hunk = TRUE; 1708 } 1709 else if (output_baton->hunk_length[0] > 0 1710 || output_baton->hunk_length[1] > 0) 1711 { 1712 /* We extend the current hunk */ 1713 1714 1715 /* Original: Output the context preceding the changed range */ 1716 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1717 svn_diff__file_output_unified_context, 1718 original_start)); 1719 } 1720 } 1721 1722 /* Original: Skip lines until we are at the beginning of the context we want 1723 to display */ 1724 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1725 svn_diff__file_output_unified_skip, 1726 original_start - context_prefix_length)); 1727 1728 /* Note that the above skip stores data for the show_c_function support below */ 1729 1730 if (init_hunk) 1731 { 1732 SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0 1733 && output_baton->hunk_length[1] == 0); 1734 1735 output_baton->hunk_start[0] = original_start - context_prefix_length; 1736 output_baton->hunk_start[1] = modified_start - context_prefix_length; 1737 } 1738 1739 if (init_hunk && output_baton->show_c_function) 1740 { 1741 apr_size_t p; 1742 const char *invalid_character; 1743 1744 /* Save the extra context for later use. 1745 * Note that the last byte of the hunk_extra_context array is never 1746 * touched after it is zero-initialized, so the array is always 1747 * 0-terminated. */ 1748 strncpy(output_baton->hunk_extra_context, 1749 output_baton->extra_context->data, 1750 SVN_DIFF__EXTRA_CONTEXT_LENGTH); 1751 /* Trim whitespace at the end, most notably to get rid of any 1752 * newline characters. */ 1753 p = strlen(output_baton->hunk_extra_context); 1754 while (p > 0 1755 && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1])) 1756 { 1757 output_baton->hunk_extra_context[--p] = '\0'; 1758 } 1759 invalid_character = 1760 svn_utf__last_valid(output_baton->hunk_extra_context, 1761 SVN_DIFF__EXTRA_CONTEXT_LENGTH); 1762 for (p = invalid_character - output_baton->hunk_extra_context; 1763 p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++) 1764 { 1765 output_baton->hunk_extra_context[p] = '\0'; 1766 } 1767 } 1768 1769 /* Modified: Skip lines until we are at the start of the changed range */ 1770 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */, 1771 svn_diff__file_output_unified_skip, 1772 modified_start)); 1773 1774 /* Original: Output the context preceding the changed range */ 1775 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1776 svn_diff__file_output_unified_context, 1777 original_start)); 1778 1779 /* Both: Output the changed range */ 1780 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1781 svn_diff__file_output_unified_delete, 1782 original_start + original_length)); 1783 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */, 1784 svn_diff__file_output_unified_insert, 1785 modified_start + modified_length)); 1786 1787 return SVN_NO_ERROR; 1788} 1789 1790/* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */ 1791static svn_error_t * 1792output_unified_default_hdr(const char **header, const char *path, 1793 apr_pool_t *pool) 1794{ 1795 apr_finfo_t file_info; 1796 apr_time_exp_t exploded_time; 1797 char time_buffer[64]; 1798 apr_size_t time_len; 1799 const char *utf8_timestr; 1800 1801 SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool)); 1802 apr_time_exp_lt(&exploded_time, file_info.mtime); 1803 1804 apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1, 1805 /* Order of date components can be different in different languages */ 1806 _("%a %b %e %H:%M:%S %Y"), &exploded_time); 1807 1808 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool)); 1809 1810 *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr); 1811 1812 return SVN_NO_ERROR; 1813} 1814 1815static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable = 1816{ 1817 NULL, /* output_common */ 1818 output_unified_diff_modified, 1819 NULL, /* output_diff_latest */ 1820 NULL, /* output_diff_common */ 1821 NULL /* output_conflict */ 1822}; 1823 1824svn_error_t * 1825svn_diff_file_output_unified4(svn_stream_t *output_stream, 1826 svn_diff_t *diff, 1827 const char *original_path, 1828 const char *modified_path, 1829 const char *original_header, 1830 const char *modified_header, 1831 const char *header_encoding, 1832 const char *relative_to_dir, 1833 svn_boolean_t show_c_function, 1834 int context_size, 1835 svn_cancel_func_t cancel_func, 1836 void *cancel_baton, 1837 apr_pool_t *pool) 1838{ 1839 if (svn_diff_contains_diffs(diff)) 1840 { 1841 svn_diff__file_output_baton_t baton; 1842 int i; 1843 1844 memset(&baton, 0, sizeof(baton)); 1845 baton.output_stream = output_stream; 1846 baton.pool = pool; 1847 baton.header_encoding = header_encoding; 1848 baton.path[0] = original_path; 1849 baton.path[1] = modified_path; 1850 baton.hunk = svn_stringbuf_create_empty(pool); 1851 baton.show_c_function = show_c_function; 1852 baton.extra_context = svn_stringbuf_create_empty(pool); 1853 baton.context_size = (context_size >= 0) ? context_size 1854 : SVN_DIFF__UNIFIED_CONTEXT_SIZE; 1855 1856 if (show_c_function) 1857 { 1858 baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **)); 1859 1860 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*"; 1861 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*"; 1862 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*"; 1863 } 1864 1865 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ", 1866 header_encoding, pool)); 1867 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-", 1868 header_encoding, pool)); 1869 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+", 1870 header_encoding, pool)); 1871 1872 if (relative_to_dir) 1873 { 1874 /* Possibly adjust the "original" and "modified" paths shown in 1875 the output (see issue #2723). */ 1876 const char *child_path; 1877 1878 if (! original_header) 1879 { 1880 child_path = svn_dirent_is_child(relative_to_dir, 1881 original_path, pool); 1882 if (child_path) 1883 original_path = child_path; 1884 else 1885 return svn_error_createf( 1886 SVN_ERR_BAD_RELATIVE_PATH, NULL, 1887 _("Path '%s' must be inside " 1888 "the directory '%s'"), 1889 svn_dirent_local_style(original_path, pool), 1890 svn_dirent_local_style(relative_to_dir, 1891 pool)); 1892 } 1893 1894 if (! modified_header) 1895 { 1896 child_path = svn_dirent_is_child(relative_to_dir, 1897 modified_path, pool); 1898 if (child_path) 1899 modified_path = child_path; 1900 else 1901 return svn_error_createf( 1902 SVN_ERR_BAD_RELATIVE_PATH, NULL, 1903 _("Path '%s' must be inside " 1904 "the directory '%s'"), 1905 svn_dirent_local_style(modified_path, pool), 1906 svn_dirent_local_style(relative_to_dir, 1907 pool)); 1908 } 1909 } 1910 1911 for (i = 0; i < 2; i++) 1912 { 1913 SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i], 1914 APR_READ, APR_OS_DEFAULT, pool)); 1915 } 1916 1917 if (original_header == NULL) 1918 { 1919 SVN_ERR(output_unified_default_hdr(&original_header, original_path, 1920 pool)); 1921 } 1922 1923 if (modified_header == NULL) 1924 { 1925 SVN_ERR(output_unified_default_hdr(&modified_header, modified_path, 1926 pool)); 1927 } 1928 1929 SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding, 1930 original_header, modified_header, 1931 pool)); 1932 1933 SVN_ERR(svn_diff_output2(diff, &baton, 1934 &svn_diff__file_output_unified_vtable, 1935 cancel_func, cancel_baton)); 1936 SVN_ERR(output_unified_flush_hunk(&baton)); 1937 1938 for (i = 0; i < 2; i++) 1939 { 1940 SVN_ERR(svn_io_file_close(baton.file[i], pool)); 1941 } 1942 } 1943 1944 return SVN_NO_ERROR; 1945} 1946 1947 1948/** Display diff3 **/ 1949 1950/* A stream to remember *leading* context. Note that this stream does 1951 *not* copy the data that it is remembering; it just saves 1952 *pointers! */ 1953typedef struct context_saver_t { 1954 svn_stream_t *stream; 1955 int context_size; 1956 const char **data; /* const char *data[context_size] */ 1957 apr_size_t *len; /* apr_size_t len[context_size] */ 1958 apr_size_t next_slot; 1959 apr_size_t total_written; 1960} context_saver_t; 1961 1962 1963static svn_error_t * 1964context_saver_stream_write(void *baton, 1965 const char *data, 1966 apr_size_t *len) 1967{ 1968 context_saver_t *cs = baton; 1969 1970 if (cs->context_size > 0) 1971 { 1972 cs->data[cs->next_slot] = data; 1973 cs->len[cs->next_slot] = *len; 1974 cs->next_slot = (cs->next_slot + 1) % cs->context_size; 1975 cs->total_written++; 1976 } 1977 return SVN_NO_ERROR; 1978} 1979 1980typedef struct svn_diff3__file_output_baton_t 1981{ 1982 svn_stream_t *output_stream; 1983 1984 const char *path[3]; 1985 1986 apr_off_t current_line[3]; 1987 1988 char *buffer[3]; 1989 char *endp[3]; 1990 char *curp[3]; 1991 1992 /* The following four members are in the encoding used for the output. */ 1993 const char *conflict_modified; 1994 const char *conflict_original; 1995 const char *conflict_separator; 1996 const char *conflict_latest; 1997 1998 const char *marker_eol; 1999 2000 svn_diff_conflict_display_style_t conflict_style; 2001 int context_size; 2002 2003 /* cancel support */ 2004 svn_cancel_func_t cancel_func; 2005 void *cancel_baton; 2006 2007 /* The rest of the fields are for 2008 svn_diff_conflict_display_only_conflicts only. Note that for 2009 these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or 2010 (soon after a conflict) a "trailing context stream", never the 2011 actual output stream.*/ 2012 /* The actual output stream. */ 2013 svn_stream_t *real_output_stream; 2014 context_saver_t *context_saver; 2015 /* Used to allocate context_saver and trailing context streams, and 2016 for some printfs. */ 2017 apr_pool_t *pool; 2018} svn_diff3__file_output_baton_t; 2019 2020static svn_error_t * 2021flush_context_saver(context_saver_t *cs, 2022 svn_stream_t *output_stream) 2023{ 2024 int i; 2025 for (i = 0; i < cs->context_size; i++) 2026 { 2027 apr_size_t slot = (i + cs->next_slot) % cs->context_size; 2028 if (cs->data[slot]) 2029 { 2030 apr_size_t len = cs->len[slot]; 2031 SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len)); 2032 } 2033 } 2034 return SVN_NO_ERROR; 2035} 2036 2037static void 2038make_context_saver(svn_diff3__file_output_baton_t *fob) 2039{ 2040 context_saver_t *cs; 2041 2042 assert(fob->context_size > 0); /* Or nothing to save */ 2043 2044 svn_pool_clear(fob->pool); 2045 cs = apr_pcalloc(fob->pool, sizeof(*cs)); 2046 cs->stream = svn_stream_empty(fob->pool); 2047 svn_stream_set_baton(cs->stream, cs); 2048 svn_stream_set_write(cs->stream, context_saver_stream_write); 2049 fob->context_saver = cs; 2050 fob->output_stream = cs->stream; 2051 cs->context_size = fob->context_size; 2052 cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size); 2053 cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size); 2054} 2055 2056 2057/* A stream which prints LINES_TO_PRINT (based on context size) lines to 2058 BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to 2059 a context_saver; used for *trailing* context. */ 2060 2061struct trailing_context_printer { 2062 apr_size_t lines_to_print; 2063 svn_diff3__file_output_baton_t *fob; 2064}; 2065 2066 2067 2068static svn_error_t * 2069trailing_context_printer_write(void *baton, 2070 const char *data, 2071 apr_size_t *len) 2072{ 2073 struct trailing_context_printer *tcp = baton; 2074 SVN_ERR_ASSERT(tcp->lines_to_print > 0); 2075 SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len)); 2076 tcp->lines_to_print--; 2077 if (tcp->lines_to_print == 0) 2078 make_context_saver(tcp->fob); 2079 return SVN_NO_ERROR; 2080} 2081 2082 2083static void 2084make_trailing_context_printer(svn_diff3__file_output_baton_t *btn) 2085{ 2086 struct trailing_context_printer *tcp; 2087 svn_stream_t *s; 2088 2089 svn_pool_clear(btn->pool); 2090 2091 tcp = apr_pcalloc(btn->pool, sizeof(*tcp)); 2092 tcp->lines_to_print = btn->context_size; 2093 tcp->fob = btn; 2094 s = svn_stream_empty(btn->pool); 2095 svn_stream_set_baton(s, tcp); 2096 svn_stream_set_write(s, trailing_context_printer_write); 2097 btn->output_stream = s; 2098} 2099 2100 2101 2102typedef enum svn_diff3__file_output_type_e 2103{ 2104 svn_diff3__file_output_skip, 2105 svn_diff3__file_output_normal 2106} svn_diff3__file_output_type_e; 2107 2108 2109static svn_error_t * 2110output_line(svn_diff3__file_output_baton_t *baton, 2111 svn_diff3__file_output_type_e type, int idx) 2112{ 2113 char *curp; 2114 char *endp; 2115 char *eol; 2116 apr_size_t len; 2117 2118 curp = baton->curp[idx]; 2119 endp = baton->endp[idx]; 2120 2121 /* Lazily update the current line even if we're at EOF. 2122 */ 2123 baton->current_line[idx]++; 2124 2125 if (curp == endp) 2126 return SVN_NO_ERROR; 2127 2128 eol = svn_eol__find_eol_start(curp, endp - curp); 2129 if (!eol) 2130 eol = endp; 2131 else 2132 { 2133 svn_boolean_t had_cr = (*eol == '\r'); 2134 eol++; 2135 if (had_cr && eol != endp && *eol == '\n') 2136 eol++; 2137 } 2138 2139 if (type != svn_diff3__file_output_skip) 2140 { 2141 len = eol - curp; 2142 /* Note that the trailing context printer assumes that 2143 svn_stream_write is called exactly once per line. */ 2144 SVN_ERR(svn_stream_write(baton->output_stream, curp, &len)); 2145 } 2146 2147 baton->curp[idx] = eol; 2148 2149 return SVN_NO_ERROR; 2150} 2151 2152static svn_error_t * 2153output_marker_eol(svn_diff3__file_output_baton_t *btn) 2154{ 2155 return svn_stream_puts(btn->output_stream, btn->marker_eol); 2156} 2157 2158static svn_error_t * 2159output_hunk(void *baton, int idx, apr_off_t target_line, 2160 apr_off_t target_length) 2161{ 2162 svn_diff3__file_output_baton_t *output_baton = baton; 2163 2164 /* Skip lines until we are at the start of the changed range */ 2165 while (output_baton->current_line[idx] < target_line) 2166 { 2167 SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx)); 2168 } 2169 2170 target_line += target_length; 2171 2172 while (output_baton->current_line[idx] < target_line) 2173 { 2174 SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx)); 2175 } 2176 2177 return SVN_NO_ERROR; 2178} 2179 2180static svn_error_t * 2181output_common(void *baton, apr_off_t original_start, apr_off_t original_length, 2182 apr_off_t modified_start, apr_off_t modified_length, 2183 apr_off_t latest_start, apr_off_t latest_length) 2184{ 2185 return output_hunk(baton, 1, modified_start, modified_length); 2186} 2187 2188static svn_error_t * 2189output_diff_modified(void *baton, 2190 apr_off_t original_start, apr_off_t original_length, 2191 apr_off_t modified_start, apr_off_t modified_length, 2192 apr_off_t latest_start, apr_off_t latest_length) 2193{ 2194 return output_hunk(baton, 1, modified_start, modified_length); 2195} 2196 2197static svn_error_t * 2198output_diff_latest(void *baton, 2199 apr_off_t original_start, apr_off_t original_length, 2200 apr_off_t modified_start, apr_off_t modified_length, 2201 apr_off_t latest_start, apr_off_t latest_length) 2202{ 2203 return output_hunk(baton, 2, latest_start, latest_length); 2204} 2205 2206static svn_error_t * 2207output_conflict(void *baton, 2208 apr_off_t original_start, apr_off_t original_length, 2209 apr_off_t modified_start, apr_off_t modified_length, 2210 apr_off_t latest_start, apr_off_t latest_length, 2211 svn_diff_t *diff); 2212 2213static const svn_diff_output_fns_t svn_diff3__file_output_vtable = 2214{ 2215 output_common, 2216 output_diff_modified, 2217 output_diff_latest, 2218 output_diff_modified, /* output_diff_common */ 2219 output_conflict 2220}; 2221 2222static svn_error_t * 2223output_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn, 2224 const char *label, 2225 apr_off_t start, 2226 apr_off_t length) 2227{ 2228 if (length == 1) 2229 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2230 "%s (%" APR_OFF_T_FMT ")", 2231 label, start + 1)); 2232 else 2233 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2234 "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")", 2235 label, start + 1, length)); 2236 2237 SVN_ERR(output_marker_eol(btn)); 2238 2239 return SVN_NO_ERROR; 2240} 2241 2242static svn_error_t * 2243output_conflict_with_context(svn_diff3__file_output_baton_t *btn, 2244 apr_off_t original_start, 2245 apr_off_t original_length, 2246 apr_off_t modified_start, 2247 apr_off_t modified_length, 2248 apr_off_t latest_start, 2249 apr_off_t latest_length) 2250{ 2251 /* Are we currently saving starting context (as opposed to printing 2252 trailing context)? If so, flush it. */ 2253 if (btn->output_stream == btn->context_saver->stream) 2254 { 2255 if (btn->context_saver->total_written > btn->context_size) 2256 SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n")); 2257 SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream)); 2258 } 2259 2260 /* Print to the real output stream. */ 2261 btn->output_stream = btn->real_output_stream; 2262 2263 /* Output the conflict itself. */ 2264 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified, 2265 modified_start, modified_length)); 2266 SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length)); 2267 2268 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original, 2269 original_start, original_length)); 2270 SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length)); 2271 2272 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2273 "%s%s", btn->conflict_separator, btn->marker_eol)); 2274 SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length)); 2275 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest, 2276 latest_start, latest_length)); 2277 2278 /* Go into print-trailing-context mode instead. */ 2279 make_trailing_context_printer(btn); 2280 2281 return SVN_NO_ERROR; 2282} 2283 2284 2285static svn_error_t * 2286output_conflict(void *baton, 2287 apr_off_t original_start, apr_off_t original_length, 2288 apr_off_t modified_start, apr_off_t modified_length, 2289 apr_off_t latest_start, apr_off_t latest_length, 2290 svn_diff_t *diff) 2291{ 2292 svn_diff3__file_output_baton_t *file_baton = baton; 2293 2294 svn_diff_conflict_display_style_t style = file_baton->conflict_style; 2295 2296 if (style == svn_diff_conflict_display_only_conflicts) 2297 return output_conflict_with_context(file_baton, 2298 original_start, original_length, 2299 modified_start, modified_length, 2300 latest_start, latest_length); 2301 2302 if (style == svn_diff_conflict_display_resolved_modified_latest) 2303 { 2304 if (diff) 2305 return svn_diff_output2(diff, baton, 2306 &svn_diff3__file_output_vtable, 2307 file_baton->cancel_func, 2308 file_baton->cancel_baton); 2309 else 2310 style = svn_diff_conflict_display_modified_latest; 2311 } 2312 2313 if (style == svn_diff_conflict_display_modified_latest || 2314 style == svn_diff_conflict_display_modified_original_latest) 2315 { 2316 SVN_ERR(svn_stream_puts(file_baton->output_stream, 2317 file_baton->conflict_modified)); 2318 SVN_ERR(output_marker_eol(file_baton)); 2319 2320 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length)); 2321 2322 if (style == svn_diff_conflict_display_modified_original_latest) 2323 { 2324 SVN_ERR(svn_stream_puts(file_baton->output_stream, 2325 file_baton->conflict_original)); 2326 SVN_ERR(output_marker_eol(file_baton)); 2327 SVN_ERR(output_hunk(baton, 0, original_start, original_length)); 2328 } 2329 2330 SVN_ERR(svn_stream_puts(file_baton->output_stream, 2331 file_baton->conflict_separator)); 2332 SVN_ERR(output_marker_eol(file_baton)); 2333 2334 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length)); 2335 2336 SVN_ERR(svn_stream_puts(file_baton->output_stream, 2337 file_baton->conflict_latest)); 2338 SVN_ERR(output_marker_eol(file_baton)); 2339 } 2340 else if (style == svn_diff_conflict_display_modified) 2341 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length)); 2342 else if (style == svn_diff_conflict_display_latest) 2343 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length)); 2344 else /* unknown style */ 2345 SVN_ERR_MALFUNCTION(); 2346 2347 return SVN_NO_ERROR; 2348} 2349 2350svn_error_t * 2351svn_diff_file_output_merge3(svn_stream_t *output_stream, 2352 svn_diff_t *diff, 2353 const char *original_path, 2354 const char *modified_path, 2355 const char *latest_path, 2356 const char *conflict_original, 2357 const char *conflict_modified, 2358 const char *conflict_latest, 2359 const char *conflict_separator, 2360 svn_diff_conflict_display_style_t style, 2361 svn_cancel_func_t cancel_func, 2362 void *cancel_baton, 2363 apr_pool_t *scratch_pool) 2364{ 2365 svn_diff3__file_output_baton_t baton; 2366 apr_file_t *file[3]; 2367 int idx; 2368#if APR_HAS_MMAP 2369 apr_mmap_t *mm[3] = { 0 }; 2370#endif /* APR_HAS_MMAP */ 2371 const char *eol; 2372 svn_boolean_t conflicts_only = 2373 (style == svn_diff_conflict_display_only_conflicts); 2374 2375 memset(&baton, 0, sizeof(baton)); 2376 baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE; 2377 if (conflicts_only) 2378 { 2379 baton.pool = svn_pool_create(scratch_pool); 2380 make_context_saver(&baton); 2381 baton.real_output_stream = output_stream; 2382 } 2383 else 2384 baton.output_stream = output_stream; 2385 baton.path[0] = original_path; 2386 baton.path[1] = modified_path; 2387 baton.path[2] = latest_path; 2388 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified, 2389 conflict_modified ? conflict_modified 2390 : apr_psprintf(scratch_pool, "<<<<<<< %s", 2391 modified_path), 2392 scratch_pool)); 2393 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original, 2394 conflict_original ? conflict_original 2395 : apr_psprintf(scratch_pool, "||||||| %s", 2396 original_path), 2397 scratch_pool)); 2398 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator, 2399 conflict_separator ? conflict_separator 2400 : "=======", scratch_pool)); 2401 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest, 2402 conflict_latest ? conflict_latest 2403 : apr_psprintf(scratch_pool, ">>>>>>> %s", 2404 latest_path), 2405 scratch_pool)); 2406 2407 baton.conflict_style = style; 2408 2409 for (idx = 0; idx < 3; idx++) 2410 { 2411 apr_size_t size; 2412 2413 SVN_ERR(map_or_read_file(&file[idx], 2414 MMAP_T_ARG(mm[idx]) 2415 &baton.buffer[idx], &size, 2416 baton.path[idx], scratch_pool)); 2417 2418 baton.curp[idx] = baton.buffer[idx]; 2419 baton.endp[idx] = baton.buffer[idx]; 2420 2421 if (baton.endp[idx]) 2422 baton.endp[idx] += size; 2423 } 2424 2425 /* Check what eol marker we should use for conflict markers. 2426 We use the eol marker of the modified file and fall back on the 2427 platform's eol marker if that file doesn't contain any newlines. */ 2428 eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1], 2429 NULL); 2430 if (! eol) 2431 eol = APR_EOL_STR; 2432 baton.marker_eol = eol; 2433 2434 baton.cancel_func = cancel_func; 2435 baton.cancel_baton = cancel_baton; 2436 2437 SVN_ERR(svn_diff_output2(diff, &baton, 2438 &svn_diff3__file_output_vtable, 2439 cancel_func, cancel_baton)); 2440 2441 for (idx = 0; idx < 3; idx++) 2442 { 2443#if APR_HAS_MMAP 2444 if (mm[idx]) 2445 { 2446 apr_status_t rv = apr_mmap_delete(mm[idx]); 2447 if (rv != APR_SUCCESS) 2448 { 2449 return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"), 2450 baton.path[idx]); 2451 } 2452 } 2453#endif /* APR_HAS_MMAP */ 2454 2455 if (file[idx]) 2456 { 2457 SVN_ERR(svn_io_file_close(file[idx], scratch_pool)); 2458 } 2459 } 2460 2461 if (conflicts_only) 2462 svn_pool_destroy(baton.pool); 2463 2464 return SVN_NO_ERROR; 2465} 2466 2467