diff_file.c revision 362181
1/* 2 * diff_file.c : routines for doing diffs on files 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25#include <apr.h> 26#include <apr_pools.h> 27#include <apr_general.h> 28#include <apr_file_io.h> 29#include <apr_file_info.h> 30#include <apr_time.h> 31#include <apr_mmap.h> 32#include <apr_getopt.h> 33 34#include <assert.h> 35 36#include "svn_error.h" 37#include "svn_diff.h" 38#include "svn_types.h" 39#include "svn_string.h" 40#include "svn_subst.h" 41#include "svn_io.h" 42#include "svn_utf.h" 43#include "svn_pools.h" 44#include "diff.h" 45#include "svn_private_config.h" 46#include "svn_path.h" 47#include "svn_ctype.h" 48 49#include "private/svn_utf_private.h" 50#include "private/svn_eol_private.h" 51#include "private/svn_dep_compat.h" 52#include "private/svn_adler32.h" 53#include "private/svn_diff_private.h" 54 55/* A token, i.e. a line read from a file. */ 56typedef struct svn_diff__file_token_t 57{ 58 /* Next token in free list. */ 59 struct svn_diff__file_token_t *next; 60 svn_diff_datasource_e datasource; 61 /* Offset in the datasource. */ 62 apr_off_t offset; 63 /* Offset of the normalized token (may skip leading whitespace) */ 64 apr_off_t norm_offset; 65 /* Total length - before normalization. */ 66 apr_off_t raw_length; 67 /* Total length - after normalization. */ 68 apr_off_t length; 69} svn_diff__file_token_t; 70 71 72typedef struct svn_diff__file_baton_t 73{ 74 const svn_diff_file_options_t *options; 75 76 struct file_info { 77 const char *path; /* path to this file, absolute or relative to CWD */ 78 79 /* All the following fields are active while this datasource is open */ 80 apr_file_t *file; /* handle of this file */ 81 apr_off_t size; /* total raw size in bytes of this file */ 82 83 /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */ 84 int chunk; /* the current chunk number, zero-based */ 85 char *buffer; /* a buffer containing the current chunk */ 86 char *curp; /* current position in the current chunk */ 87 char *endp; /* next memory address after the current chunk */ 88 89 svn_diff__normalize_state_t normalize_state; 90 91 /* Where the identical suffix starts in this datasource */ 92 int suffix_start_chunk; 93 apr_off_t suffix_offset_in_chunk; 94 } files[4]; 95 96 /* List of free tokens that may be reused. */ 97 svn_diff__file_token_t *tokens; 98 99 apr_pool_t *pool; 100} svn_diff__file_baton_t; 101 102static int 103datasource_to_index(svn_diff_datasource_e datasource) 104{ 105 switch (datasource) 106 { 107 case svn_diff_datasource_original: 108 return 0; 109 110 case svn_diff_datasource_modified: 111 return 1; 112 113 case svn_diff_datasource_latest: 114 return 2; 115 116 case svn_diff_datasource_ancestor: 117 return 3; 118 } 119 120 return -1; 121} 122 123/* Files are read in chunks of 128k. There is no support for this number 124 * whatsoever. If there is a number someone comes up with that has some 125 * argumentation, let's use that. 126 */ 127/* If you change this number, update test_norm_offset(), 128 * test_identical_suffix() and and test_token_compare() in diff-diff3-test.c. 129 */ 130#define CHUNK_SHIFT 17 131#define CHUNK_SIZE (1 << CHUNK_SHIFT) 132 133#define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT) 134#define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT) 135#define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1)) 136 137 138/* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for 139 * *LENGTH. The actual bytes read are stored in *LENGTH on return. 140 */ 141static APR_INLINE svn_error_t * 142read_chunk(apr_file_t *file, 143 char *buffer, apr_off_t length, 144 apr_off_t offset, apr_pool_t *scratch_pool) 145{ 146 /* XXX: The final offset may not be the one we asked for. 147 * XXX: Check. 148 */ 149 SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool)); 150 return svn_io_file_read_full2(file, buffer, (apr_size_t) length, 151 NULL, NULL, scratch_pool); 152} 153 154 155/* Map or read a file at PATH. *BUFFER will point to the file 156 * contents; if the file was mapped, *FILE and *MM will contain the 157 * mmap context; otherwise they will be NULL. SIZE will contain the 158 * file size. Allocate from POOL. 159 */ 160#if APR_HAS_MMAP 161#define MMAP_T_PARAM(NAME) apr_mmap_t **NAME, 162#define MMAP_T_ARG(NAME) &(NAME), 163#else 164#define MMAP_T_PARAM(NAME) 165#define MMAP_T_ARG(NAME) 166#endif 167 168static svn_error_t * 169map_or_read_file(apr_file_t **file, 170 MMAP_T_PARAM(mm) 171 char **buffer, apr_size_t *size_p, 172 const char *path, apr_pool_t *pool) 173{ 174 apr_finfo_t finfo; 175 apr_status_t rv; 176 apr_size_t size; 177 178 *buffer = NULL; 179 180 SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool)); 181 SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool)); 182 183 if (finfo.size > APR_SIZE_MAX) 184 { 185 return svn_error_createf(APR_ENOMEM, NULL, 186 _("File '%s' is too large to be read in " 187 "to memory"), path); 188 } 189 190 size = (apr_size_t) finfo.size; 191#if APR_HAS_MMAP 192 if (size > APR_MMAP_THRESHOLD) 193 { 194 rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool); 195 if (rv == APR_SUCCESS) 196 { 197 *buffer = (*mm)->mm; 198 } 199 else 200 { 201 /* Clear *MM because output parameters are undefined on error. */ 202 *mm = NULL; 203 } 204 205 /* On failure we just fall through and try reading the file into 206 * memory instead. 207 */ 208 } 209#endif /* APR_HAS_MMAP */ 210 211 if (*buffer == NULL && size > 0) 212 { 213 *buffer = apr_palloc(pool, size); 214 215 SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool)); 216 217 /* Since we have the entire contents of the file we can 218 * close it now. 219 */ 220 SVN_ERR(svn_io_file_close(*file, pool)); 221 222 *file = NULL; 223 } 224 225 *size_p = size; 226 227 return SVN_NO_ERROR; 228} 229 230 231/* For all files in the FILE array, increment the curp pointer. If a file 232 * points before the beginning of file, let it point at the first byte again. 233 * If the end of the current chunk is reached, read the next chunk in the 234 * buffer and point curp to the start of the chunk. If EOF is reached, set 235 * curp equal to endp to indicate EOF. */ 236#define INCREMENT_POINTERS(all_files, files_len, pool) \ 237 do { \ 238 apr_size_t svn_macro__i; \ 239 \ 240 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \ 241 { \ 242 if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\ 243 (all_files)[svn_macro__i].curp++; \ 244 else \ 245 SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool))); \ 246 } \ 247 } while (0) 248 249 250/* For all files in the FILE array, decrement the curp pointer. If the 251 * start of a chunk is reached, read the previous chunk in the buffer and 252 * point curp to the last byte of the chunk. If the beginning of a FILE is 253 * reached, set chunk to -1 to indicate BOF. */ 254#define DECREMENT_POINTERS(all_files, files_len, pool) \ 255 do { \ 256 apr_size_t svn_macro__i; \ 257 \ 258 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \ 259 { \ 260 if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \ 261 (all_files)[svn_macro__i].curp--; \ 262 else \ 263 SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool))); \ 264 } \ 265 } while (0) 266 267 268static svn_error_t * 269increment_chunk(struct file_info *file, apr_pool_t *pool) 270{ 271 apr_off_t length; 272 apr_off_t last_chunk = offset_to_chunk(file->size); 273 274 if (file->chunk == -1) 275 { 276 /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */ 277 file->chunk = 0; 278 file->curp = file->buffer; 279 } 280 else if (file->chunk == last_chunk) 281 { 282 /* We are at the last chunk. Indicate EOF by setting curp == endp. */ 283 file->curp = file->endp; 284 } 285 else 286 { 287 /* There are still chunks left. Read next chunk and reset pointers. */ 288 file->chunk++; 289 length = file->chunk == last_chunk ? 290 offset_in_chunk(file->size) : CHUNK_SIZE; 291 SVN_ERR(read_chunk(file->file, file->buffer, 292 length, chunk_to_offset(file->chunk), 293 pool)); 294 file->endp = file->buffer + length; 295 file->curp = file->buffer; 296 } 297 298 return SVN_NO_ERROR; 299} 300 301 302static svn_error_t * 303decrement_chunk(struct file_info *file, apr_pool_t *pool) 304{ 305 if (file->chunk == 0) 306 { 307 /* We are already at the first chunk. Indicate BOF (Beginning Of File) 308 by setting chunk = -1 and curp = endp - 1. Both conditions are 309 important. They help the increment step to catch the BOF situation 310 in an efficient way. */ 311 file->chunk--; 312 file->curp = file->endp - 1; 313 } 314 else 315 { 316 /* Read previous chunk and reset pointers. */ 317 file->chunk--; 318 SVN_ERR(read_chunk(file->file, file->buffer, 319 CHUNK_SIZE, chunk_to_offset(file->chunk), 320 pool)); 321 file->endp = file->buffer + CHUNK_SIZE; 322 file->curp = file->endp - 1; 323 } 324 325 return SVN_NO_ERROR; 326} 327 328 329/* Check whether one of the FILEs has its pointers 'before' the beginning of 330 * the file (this can happen while scanning backwards). This is the case if 331 * one of them has chunk == -1. */ 332static svn_boolean_t 333is_one_at_bof(struct file_info file[], apr_size_t file_len) 334{ 335 apr_size_t i; 336 337 for (i = 0; i < file_len; i++) 338 if (file[i].chunk == -1) 339 return TRUE; 340 341 return FALSE; 342} 343 344/* Check whether one of the FILEs has its pointers at EOF (this is the case if 345 * one of them has curp == endp (this can only happen at the last chunk)) */ 346static svn_boolean_t 347is_one_at_eof(struct file_info file[], apr_size_t file_len) 348{ 349 apr_size_t i; 350 351 for (i = 0; i < file_len; i++) 352 if (file[i].curp == file[i].endp) 353 return TRUE; 354 355 return FALSE; 356} 357 358/* Quickly determine whether there is a eol char in CHUNK. 359 * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start). 360 */ 361 362#if SVN_UNALIGNED_ACCESS_IS_OK 363static svn_boolean_t contains_eol(apr_uintptr_t chunk) 364{ 365 apr_uintptr_t r_test = chunk ^ SVN__R_MASK; 366 apr_uintptr_t n_test = chunk ^ SVN__N_MASK; 367 368 r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET; 369 n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET; 370 371 return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET; 372} 373#endif 374 375/* Find the prefix which is identical between all elements of the FILE array. 376 * Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be 377 * set to TRUE if one of the FILEs reached its end while scanning prefix, 378 * i.e. at least one file consisted entirely of prefix. Otherwise, 379 * REACHED_ONE_EOF is set to FALSE. 380 * 381 * After this function is finished, the buffers, chunks, curp's and endp's 382 * of the FILEs are set to point at the first byte after the prefix. */ 383static svn_error_t * 384find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines, 385 struct file_info file[], apr_size_t file_len, 386 apr_pool_t *pool) 387{ 388 svn_boolean_t had_cr = FALSE; 389 svn_boolean_t is_match; 390 apr_off_t lines = 0; 391 apr_size_t i; 392 393 *reached_one_eof = FALSE; 394 395 for (i = 1, is_match = TRUE; i < file_len; i++) 396 is_match = is_match && *file[0].curp == *file[i].curp; 397 while (is_match) 398 { 399#if SVN_UNALIGNED_ACCESS_IS_OK 400 apr_ssize_t max_delta, delta; 401#endif /* SVN_UNALIGNED_ACCESS_IS_OK */ 402 403 /* ### TODO: see if we can take advantage of 404 diff options like ignore_eol_style or ignore_space. */ 405 /* check for eol, and count */ 406 if (*file[0].curp == '\r') 407 { 408 lines++; 409 had_cr = TRUE; 410 } 411 else if (*file[0].curp == '\n' && !had_cr) 412 { 413 lines++; 414 } 415 else 416 { 417 had_cr = FALSE; 418 } 419 420 INCREMENT_POINTERS(file, file_len, pool); 421 422#if SVN_UNALIGNED_ACCESS_IS_OK 423 424 /* Try to advance as far as possible with machine-word granularity. 425 * Determine how far we may advance with chunky ops without reaching 426 * endp for any of the files. 427 * Signedness is important here if curp gets close to endp. 428 */ 429 max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t); 430 for (i = 1; i < file_len; i++) 431 { 432 delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t); 433 if (delta < max_delta) 434 max_delta = delta; 435 } 436 437 is_match = TRUE; 438 for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t)) 439 { 440 apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta); 441 if (contains_eol(chunk)) 442 break; 443 444 for (i = 1; i < file_len; i++) 445 if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta)) 446 { 447 is_match = FALSE; 448 break; 449 } 450 451 if (! is_match) 452 break; 453 } 454 455 if (delta /* > 0*/) 456 { 457 /* We either found a mismatch or an EOL at or shortly behind curp+delta 458 * or we cannot proceed with chunky ops without exceeding endp. 459 * In any way, everything up to curp + delta is equal and not an EOL. 460 */ 461 for (i = 0; i < file_len; i++) 462 file[i].curp += delta; 463 464 /* Skipped data without EOL markers, so last char was not a CR. */ 465 had_cr = FALSE; 466 } 467#endif 468 469 *reached_one_eof = is_one_at_eof(file, file_len); 470 if (*reached_one_eof) 471 break; 472 else 473 for (i = 1, is_match = TRUE; i < file_len; i++) 474 is_match = is_match && *file[0].curp == *file[i].curp; 475 } 476 477 if (had_cr) 478 { 479 /* Check if we ended in the middle of a \r\n for one file, but \r for 480 another. If so, back up one byte, so the next loop will back up 481 the entire line. Also decrement lines, since we counted one 482 too many for the \r. */ 483 svn_boolean_t ended_at_nonmatching_newline = FALSE; 484 for (i = 0; i < file_len; i++) 485 if (file[i].curp < file[i].endp) 486 ended_at_nonmatching_newline = ended_at_nonmatching_newline 487 || *file[i].curp == '\n'; 488 if (ended_at_nonmatching_newline) 489 { 490 lines--; 491 DECREMENT_POINTERS(file, file_len, pool); 492 } 493 } 494 495 /* Back up one byte, so we point at the last identical byte */ 496 DECREMENT_POINTERS(file, file_len, pool); 497 498 /* Back up to the last eol sequence (\n, \r\n or \r) */ 499 while (!is_one_at_bof(file, file_len) && 500 *file[0].curp != '\n' && *file[0].curp != '\r') 501 DECREMENT_POINTERS(file, file_len, pool); 502 503 /* Slide one byte forward, to point past the eol sequence */ 504 INCREMENT_POINTERS(file, file_len, pool); 505 506 *prefix_lines = lines; 507 508 return SVN_NO_ERROR; 509} 510 511 512/* The number of identical suffix lines to keep with the middle section. These 513 * lines are not eliminated as suffix, and can be picked up by the token 514 * parsing and lcs steps. This is mainly for backward compatibility with 515 * the previous diff (and blame) output (if there are multiple diff solutions, 516 * our lcs algorithm prefers taking common lines from the start, rather than 517 * from the end. By giving it back some suffix lines, we give it some wiggle 518 * room to find the exact same diff as before). 519 * 520 * The number 50 is more or less arbitrary, based on some real-world tests 521 * with big files (and then doubling the required number to be on the safe 522 * side). This has a negligible effect on the power of the optimization. */ 523/* If you change this number, update test_identical_suffix() in diff-diff3-test.c */ 524#ifndef SUFFIX_LINES_TO_KEEP 525#define SUFFIX_LINES_TO_KEEP 50 526#endif 527 528/* Find the suffix which is identical between all elements of the FILE array. 529 * Return the number of suffix lines in SUFFIX_LINES. 530 * 531 * Before this function is called the FILEs' pointers and chunks should be 532 * positioned right after the identical prefix (which is the case after 533 * find_identical_prefix), so we can determine where suffix scanning should 534 * ultimately stop. */ 535static svn_error_t * 536find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[], 537 apr_size_t file_len, apr_pool_t *pool) 538{ 539 struct file_info file_for_suffix[4] = { { 0 } }; 540 apr_off_t length[4]; 541 apr_off_t suffix_min_chunk0; 542 apr_off_t suffix_min_offset0; 543 apr_off_t min_file_size; 544 int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP; 545 svn_boolean_t is_match; 546 apr_off_t lines = 0; 547 svn_boolean_t had_nl; 548 apr_size_t i; 549 550 /* Initialize file_for_suffix[]. 551 Read last chunk, position curp at last byte. */ 552 for (i = 0; i < file_len; i++) 553 { 554 file_for_suffix[i].path = file[i].path; 555 file_for_suffix[i].file = file[i].file; 556 file_for_suffix[i].size = file[i].size; 557 file_for_suffix[i].chunk = 558 (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */ 559 length[i] = offset_in_chunk(file_for_suffix[i].size); 560 if (length[i] == 0) 561 { 562 /* last chunk is an empty chunk -> start at next-to-last chunk */ 563 file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1; 564 length[i] = CHUNK_SIZE; 565 } 566 567 if (file_for_suffix[i].chunk == file[i].chunk) 568 { 569 /* Prefix ended in last chunk, so we can reuse the prefix buffer */ 570 file_for_suffix[i].buffer = file[i].buffer; 571 } 572 else 573 { 574 /* There is at least more than 1 chunk, 575 so allocate full chunk size buffer */ 576 file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE); 577 SVN_ERR(read_chunk(file_for_suffix[i].file, 578 file_for_suffix[i].buffer, length[i], 579 chunk_to_offset(file_for_suffix[i].chunk), 580 pool)); 581 } 582 file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i]; 583 file_for_suffix[i].curp = file_for_suffix[i].endp - 1; 584 } 585 586 /* Get the chunk and pointer offset (for file[0]) at which we should stop 587 scanning backward for the identical suffix, i.e. when we reach prefix. */ 588 suffix_min_chunk0 = file[0].chunk; 589 suffix_min_offset0 = file[0].curp - file[0].buffer; 590 591 /* Compensate if other files are smaller than file[0] */ 592 for (i = 1, min_file_size = file[0].size; i < file_len; i++) 593 if (file[i].size < min_file_size) 594 min_file_size = file[i].size; 595 if (file[0].size > min_file_size) 596 { 597 suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE; 598 suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE; 599 } 600 601 /* Scan backwards until mismatch or until we reach the prefix. */ 602 for (i = 1, is_match = TRUE; i < file_len; i++) 603 is_match = is_match 604 && *file_for_suffix[0].curp == *file_for_suffix[i].curp; 605 if (is_match && *file_for_suffix[0].curp != '\r' 606 && *file_for_suffix[0].curp != '\n') 607 /* Count an extra line for the last line not ending in an eol. */ 608 lines++; 609 610 had_nl = FALSE; 611 while (is_match) 612 { 613 svn_boolean_t reached_prefix; 614#if SVN_UNALIGNED_ACCESS_IS_OK 615 /* Initialize the minimum pointer positions. */ 616 const char *min_curp[4]; 617 svn_boolean_t can_read_word; 618#endif /* SVN_UNALIGNED_ACCESS_IS_OK */ 619 620 /* ### TODO: see if we can take advantage of 621 diff options like ignore_eol_style or ignore_space. */ 622 /* check for eol, and count */ 623 if (*file_for_suffix[0].curp == '\n') 624 { 625 lines++; 626 had_nl = TRUE; 627 } 628 else if (*file_for_suffix[0].curp == '\r' && !had_nl) 629 { 630 lines++; 631 } 632 else 633 { 634 had_nl = FALSE; 635 } 636 637 DECREMENT_POINTERS(file_for_suffix, file_len, pool); 638 639#if SVN_UNALIGNED_ACCESS_IS_OK 640 for (i = 0; i < file_len; i++) 641 min_curp[i] = file_for_suffix[i].buffer; 642 643 /* If we are in the same chunk that contains the last part of the common 644 prefix, use the min_curp[0] pointer to make sure we don't get a 645 suffix that overlaps the already determined common prefix. */ 646 if (file_for_suffix[0].chunk == suffix_min_chunk0) 647 min_curp[0] += suffix_min_offset0; 648 649 /* Scan quickly by reading with machine-word granularity. */ 650 for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++) 651 can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t)) 652 > min_curp[i]); 653 654 while (can_read_word) 655 { 656 apr_uintptr_t chunk; 657 658 /* For each file curp is positioned at the current byte, but we 659 want to examine the current byte and the ones before the current 660 location as one machine word. */ 661 662 chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1 663 - sizeof(apr_uintptr_t)); 664 if (contains_eol(chunk)) 665 break; 666 667 for (i = 1, is_match = TRUE; is_match && i < file_len; i++) 668 is_match = (chunk 669 == *(const apr_uintptr_t *) 670 (file_for_suffix[i].curp + 1 671 - sizeof(apr_uintptr_t))); 672 673 if (! is_match) 674 break; 675 676 for (i = 0; i < file_len; i++) 677 { 678 file_for_suffix[i].curp -= sizeof(apr_uintptr_t); 679 can_read_word = can_read_word 680 && ( (file_for_suffix[i].curp + 1 681 - sizeof(apr_uintptr_t)) 682 > min_curp[i]); 683 } 684 685 /* We skipped some bytes, so there are no closing EOLs */ 686 had_nl = FALSE; 687 } 688 689 /* The > min_curp[i] check leaves at least one final byte for checking 690 in the non block optimized case below. */ 691#endif 692 693 reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0 694 && (file_for_suffix[0].curp - file_for_suffix[0].buffer) 695 == suffix_min_offset0; 696 if (reached_prefix || is_one_at_bof(file_for_suffix, file_len)) 697 break; 698 699 is_match = TRUE; 700 for (i = 1; i < file_len; i++) 701 is_match = is_match 702 && *file_for_suffix[0].curp == *file_for_suffix[i].curp; 703 } 704 705 /* Slide one byte forward, to point at the first byte of identical suffix */ 706 INCREMENT_POINTERS(file_for_suffix, file_len, pool); 707 708 /* Slide forward until we find an eol sequence to add the rest of the line 709 we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least 710 one file reaches its end. */ 711 do 712 { 713 svn_boolean_t had_cr = FALSE; 714 while (!is_one_at_eof(file_for_suffix, file_len) 715 && *file_for_suffix[0].curp != '\n' 716 && *file_for_suffix[0].curp != '\r') 717 INCREMENT_POINTERS(file_for_suffix, file_len, pool); 718 719 /* Slide one or two more bytes, to point past the eol. */ 720 if (!is_one_at_eof(file_for_suffix, file_len) 721 && *file_for_suffix[0].curp == '\r') 722 { 723 lines--; 724 had_cr = TRUE; 725 INCREMENT_POINTERS(file_for_suffix, file_len, pool); 726 } 727 if (!is_one_at_eof(file_for_suffix, file_len) 728 && *file_for_suffix[0].curp == '\n') 729 { 730 if (!had_cr) 731 lines--; 732 INCREMENT_POINTERS(file_for_suffix, file_len, pool); 733 } 734 } 735 while (!is_one_at_eof(file_for_suffix, file_len) 736 && suffix_lines_to_keep--); 737 738 if (is_one_at_eof(file_for_suffix, file_len)) 739 lines = 0; 740 741 /* Save the final suffix information in the original file_info */ 742 for (i = 0; i < file_len; i++) 743 { 744 file[i].suffix_start_chunk = file_for_suffix[i].chunk; 745 file[i].suffix_offset_in_chunk = 746 file_for_suffix[i].curp - file_for_suffix[i].buffer; 747 } 748 749 *suffix_lines = lines; 750 751 return SVN_NO_ERROR; 752} 753 754 755/* Let FILE stand for the array of file_info struct elements of BATON->files 756 * that are indexed by the elements of the DATASOURCE array. 757 * BATON's type is (svn_diff__file_baton_t *). 758 * 759 * For each file in the FILE array, open the file at FILE.path; initialize 760 * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a 761 * buffer and read the first chunk. Then find the prefix and suffix lines 762 * which are identical between all the files. Return the number of identical 763 * prefix lines in PREFIX_LINES, and the number of identical suffix lines in 764 * SUFFIX_LINES. 765 * 766 * Finding the identical prefix and suffix allows us to exclude those from the 767 * rest of the diff algorithm, which increases performance by reducing the 768 * problem space. 769 * 770 * Implements svn_diff_fns2_t::datasources_open. */ 771static svn_error_t * 772datasources_open(void *baton, 773 apr_off_t *prefix_lines, 774 apr_off_t *suffix_lines, 775 const svn_diff_datasource_e *datasources, 776 apr_size_t datasources_len) 777{ 778 svn_diff__file_baton_t *file_baton = baton; 779 struct file_info files[4]; 780 apr_off_t length[4]; 781#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING 782 svn_boolean_t reached_one_eof; 783#endif 784 apr_size_t i; 785 786 /* Make sure prefix_lines and suffix_lines are set correctly, even if we 787 * exit early because one of the files is empty. */ 788 *prefix_lines = 0; 789 *suffix_lines = 0; 790 791 /* Open datasources and read first chunk */ 792 for (i = 0; i < datasources_len; i++) 793 { 794 svn_filesize_t filesize; 795 struct file_info *file 796 = &file_baton->files[datasource_to_index(datasources[i])]; 797 SVN_ERR(svn_io_file_open(&file->file, file->path, 798 APR_READ, APR_OS_DEFAULT, file_baton->pool)); 799 SVN_ERR(svn_io_file_size_get(&filesize, file->file, file_baton->pool)); 800 file->size = filesize; 801 length[i] = filesize > CHUNK_SIZE ? CHUNK_SIZE : filesize; 802 file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]); 803 SVN_ERR(read_chunk(file->file, file->buffer, 804 length[i], 0, file_baton->pool)); 805 file->endp = file->buffer + length[i]; 806 file->curp = file->buffer; 807 /* Set suffix_start_chunk to a guard value, so if suffix scanning is 808 * skipped because one of the files is empty, or because of 809 * reached_one_eof, we can still easily check for the suffix during 810 * token reading (datasource_get_next_token). */ 811 file->suffix_start_chunk = -1; 812 813 files[i] = *file; 814 } 815 816 for (i = 0; i < datasources_len; i++) 817 if (length[i] == 0) 818 /* There will not be any identical prefix/suffix, so we're done. */ 819 return SVN_NO_ERROR; 820 821#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING 822 823 SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines, 824 files, datasources_len, file_baton->pool)); 825 826 if (!reached_one_eof) 827 /* No file consisted totally of identical prefix, 828 * so there may be some identical suffix. */ 829 SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len, 830 file_baton->pool)); 831 832#endif 833 834 /* Copy local results back to baton. */ 835 for (i = 0; i < datasources_len; i++) 836 file_baton->files[datasource_to_index(datasources[i])] = files[i]; 837 838 return SVN_NO_ERROR; 839} 840 841 842/* Implements svn_diff_fns2_t::datasource_close */ 843static svn_error_t * 844datasource_close(void *baton, svn_diff_datasource_e datasource) 845{ 846 /* Do nothing. The compare_token function needs previous datasources 847 * to stay available until all datasources are processed. 848 */ 849 850 return SVN_NO_ERROR; 851} 852 853/* Implements svn_diff_fns2_t::datasource_get_next_token */ 854static svn_error_t * 855datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton, 856 svn_diff_datasource_e datasource) 857{ 858 svn_diff__file_baton_t *file_baton = baton; 859 svn_diff__file_token_t *file_token; 860 struct file_info *file = &file_baton->files[datasource_to_index(datasource)]; 861 char *endp; 862 char *curp; 863 char *eol; 864 apr_off_t last_chunk; 865 apr_off_t length; 866 apr_uint32_t h = 0; 867 /* Did the last chunk end in a CR character? */ 868 svn_boolean_t had_cr = FALSE; 869 870 *token = NULL; 871 872 curp = file->curp; 873 endp = file->endp; 874 875 last_chunk = offset_to_chunk(file->size); 876 877 /* Are we already at the end of a chunk? */ 878 if (curp == endp) 879 { 880 /* Are we at EOF */ 881 if (last_chunk == file->chunk) 882 return SVN_NO_ERROR; /* EOF */ 883 884 /* Or right before an identical suffix in the next chunk? */ 885 if (file->chunk + 1 == file->suffix_start_chunk 886 && file->suffix_offset_in_chunk == 0) 887 return SVN_NO_ERROR; 888 } 889 890 /* Stop when we encounter the identical suffix. If suffix scanning was not 891 * performed, suffix_start_chunk will be -1, so this condition will never 892 * be true. */ 893 if (file->chunk == file->suffix_start_chunk 894 && (curp - file->buffer) == file->suffix_offset_in_chunk) 895 return SVN_NO_ERROR; 896 897 /* Allocate a new token, or fetch one from the "reusable tokens" list. */ 898 file_token = file_baton->tokens; 899 if (file_token) 900 { 901 file_baton->tokens = file_token->next; 902 } 903 else 904 { 905 file_token = apr_palloc(file_baton->pool, sizeof(*file_token)); 906 } 907 908 file_token->datasource = datasource; 909 file_token->offset = chunk_to_offset(file->chunk) 910 + (curp - file->buffer); 911 file_token->norm_offset = file_token->offset; 912 file_token->raw_length = 0; 913 file_token->length = 0; 914 915 while (1) 916 { 917 eol = svn_eol__find_eol_start(curp, endp - curp); 918 if (eol) 919 { 920 had_cr = (*eol == '\r'); 921 eol++; 922 /* If we have the whole eol sequence in the chunk... */ 923 if (!(had_cr && eol == endp)) 924 { 925 /* Also skip past the '\n' in an '\r\n' sequence. */ 926 if (had_cr && *eol == '\n') 927 eol++; 928 break; 929 } 930 } 931 932 if (file->chunk == last_chunk) 933 { 934 eol = endp; 935 break; 936 } 937 938 length = endp - curp; 939 file_token->raw_length += length; 940 { 941 char *c = curp; 942 943 svn_diff__normalize_buffer(&c, &length, 944 &file->normalize_state, 945 curp, file_baton->options); 946 if (file_token->length == 0) 947 { 948 /* When we are reading the first part of the token, move the 949 normalized offset past leading ignored characters, if any. */ 950 file_token->norm_offset += (c - curp); 951 } 952 file_token->length += length; 953 h = svn__adler32(h, c, length); 954 } 955 956 curp = endp = file->buffer; 957 file->chunk++; 958 length = file->chunk == last_chunk ? 959 offset_in_chunk(file->size) : CHUNK_SIZE; 960 endp += length; 961 file->endp = endp; 962 963 /* Issue #4283: Normally we should have checked for reaching the skipped 964 suffix here, but because we assume that a suffix always starts on a 965 line and token boundary we rely on catching the suffix earlier in this 966 function. 967 968 When changing things here, make sure the whitespace settings are 969 applied, or we might not reach the exact suffix boundary as token 970 boundary. */ 971 SVN_ERR(read_chunk(file->file, 972 curp, length, 973 chunk_to_offset(file->chunk), 974 file_baton->pool)); 975 976 /* If the last chunk ended in a CR, we're done. */ 977 if (had_cr) 978 { 979 eol = curp; 980 if (*curp == '\n') 981 ++eol; 982 break; 983 } 984 } 985 986 length = eol - curp; 987 file_token->raw_length += length; 988 file->curp = eol; 989 990 /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up 991 * with a spurious empty token. Avoid returning it. 992 * Note that we use the unnormalized length; we don't want a line containing 993 * only spaces (and no trailing newline) to appear like a non-existent 994 * line. */ 995 if (file_token->raw_length > 0) 996 { 997 char *c = curp; 998 svn_diff__normalize_buffer(&c, &length, 999 &file->normalize_state, 1000 curp, file_baton->options); 1001 if (file_token->length == 0) 1002 { 1003 /* When we are reading the first part of the token, move the 1004 normalized offset past leading ignored characters, if any. */ 1005 file_token->norm_offset += (c - curp); 1006 } 1007 1008 file_token->length += length; 1009 1010 *hash = svn__adler32(h, c, length); 1011 *token = file_token; 1012 } 1013 1014 return SVN_NO_ERROR; 1015} 1016 1017#define COMPARE_CHUNK_SIZE 4096 1018 1019/* Implements svn_diff_fns2_t::token_compare */ 1020static svn_error_t * 1021token_compare(void *baton, void *token1, void *token2, int *compare) 1022{ 1023 svn_diff__file_baton_t *file_baton = baton; 1024 svn_diff__file_token_t *file_token[2]; 1025 char buffer[2][COMPARE_CHUNK_SIZE]; 1026 char *bufp[2]; 1027 apr_off_t offset[2]; 1028 struct file_info *file[2]; 1029 apr_off_t length[2]; 1030 apr_off_t total_length; 1031 /* How much is left to read of each token from the file. */ 1032 apr_off_t raw_length[2]; 1033 int i; 1034 svn_diff__normalize_state_t state[2]; 1035 1036 file_token[0] = token1; 1037 file_token[1] = token2; 1038 if (file_token[0]->length < file_token[1]->length) 1039 { 1040 *compare = -1; 1041 return SVN_NO_ERROR; 1042 } 1043 1044 if (file_token[0]->length > file_token[1]->length) 1045 { 1046 *compare = 1; 1047 return SVN_NO_ERROR; 1048 } 1049 1050 total_length = file_token[0]->length; 1051 if (total_length == 0) 1052 { 1053 *compare = 0; 1054 return SVN_NO_ERROR; 1055 } 1056 1057 for (i = 0; i < 2; ++i) 1058 { 1059 int idx = datasource_to_index(file_token[i]->datasource); 1060 1061 file[i] = &file_baton->files[idx]; 1062 offset[i] = file_token[i]->norm_offset; 1063 state[i] = svn_diff__normalize_state_normal; 1064 1065 if (offset_to_chunk(offset[i]) == file[i]->chunk) 1066 { 1067 /* If the start of the token is in memory, the entire token is 1068 * in memory. 1069 */ 1070 bufp[i] = file[i]->buffer; 1071 bufp[i] += offset_in_chunk(offset[i]); 1072 1073 length[i] = total_length; 1074 raw_length[i] = 0; 1075 } 1076 else 1077 { 1078 apr_off_t skipped; 1079 1080 length[i] = 0; 1081 1082 /* When we skipped the first part of the token via the whitespace 1083 normalization we must reduce the raw length of the token */ 1084 skipped = (file_token[i]->norm_offset - file_token[i]->offset); 1085 1086 raw_length[i] = file_token[i]->raw_length - skipped; 1087 } 1088 } 1089 1090 do 1091 { 1092 apr_off_t len; 1093 for (i = 0; i < 2; i++) 1094 { 1095 if (length[i] == 0) 1096 { 1097 /* Error if raw_length is 0, that's an unexpected change 1098 * of the file that can happen when ingoring whitespace 1099 * and that can lead to an infinite loop. */ 1100 if (raw_length[i] == 0) 1101 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED, 1102 NULL, 1103 _("The file '%s' changed unexpectedly" 1104 " during diff"), 1105 file[i]->path); 1106 1107 /* Read a chunk from disk into a buffer */ 1108 bufp[i] = buffer[i]; 1109 length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ? 1110 COMPARE_CHUNK_SIZE : raw_length[i]; 1111 1112 SVN_ERR(read_chunk(file[i]->file, 1113 bufp[i], length[i], offset[i], 1114 file_baton->pool)); 1115 offset[i] += length[i]; 1116 raw_length[i] -= length[i]; 1117 /* bufp[i] gets reset to buffer[i] before reading each chunk, 1118 so, overwriting it isn't a problem */ 1119 svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i], 1120 bufp[i], file_baton->options); 1121 1122 /* assert(length[i] == file_token[i]->length); */ 1123 } 1124 } 1125 1126 len = length[0] > length[1] ? length[1] : length[0]; 1127 1128 /* Compare two chunks (that could be entire tokens if they both reside 1129 * in memory). 1130 */ 1131 *compare = memcmp(bufp[0], bufp[1], (size_t) len); 1132 if (*compare != 0) 1133 return SVN_NO_ERROR; 1134 1135 total_length -= len; 1136 length[0] -= len; 1137 length[1] -= len; 1138 bufp[0] += len; 1139 bufp[1] += len; 1140 } 1141 while(total_length > 0); 1142 1143 *compare = 0; 1144 return SVN_NO_ERROR; 1145} 1146 1147 1148/* Implements svn_diff_fns2_t::token_discard */ 1149static void 1150token_discard(void *baton, void *token) 1151{ 1152 svn_diff__file_baton_t *file_baton = baton; 1153 svn_diff__file_token_t *file_token = token; 1154 1155 /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */ 1156 file_token->next = file_baton->tokens; 1157 file_baton->tokens = file_token; 1158} 1159 1160 1161/* Implements svn_diff_fns2_t::token_discard_all */ 1162static void 1163token_discard_all(void *baton) 1164{ 1165 svn_diff__file_baton_t *file_baton = baton; 1166 1167 /* Discard all memory in use by the tokens, and close all open files. */ 1168 svn_pool_clear(file_baton->pool); 1169} 1170 1171 1172static const svn_diff_fns2_t svn_diff__file_vtable = 1173{ 1174 datasources_open, 1175 datasource_close, 1176 datasource_get_next_token, 1177 token_compare, 1178 token_discard, 1179 token_discard_all 1180}; 1181 1182/* Id for the --ignore-eol-style option, which doesn't have a short name. */ 1183#define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256 1184 1185/* Options supported by svn_diff_file_options_parse(). */ 1186static const apr_getopt_option_t diff_options[] = 1187{ 1188 { "ignore-space-change", 'b', 0, NULL }, 1189 { "ignore-all-space", 'w', 0, NULL }, 1190 { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL }, 1191 { "show-c-function", 'p', 0, NULL }, 1192 /* ### For compatibility; we don't support the argument to -u, because 1193 * ### we don't have optional argument support. */ 1194 { "unified", 'u', 0, NULL }, 1195 { "context", 'U', 1, NULL }, 1196 { NULL, 0, 0, NULL } 1197}; 1198 1199svn_diff_file_options_t * 1200svn_diff_file_options_create(apr_pool_t *pool) 1201{ 1202 svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts)); 1203 1204 opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE; 1205 1206 return opts; 1207} 1208 1209/* A baton for use with opt_parsing_error_func(). */ 1210struct opt_parsing_error_baton_t 1211{ 1212 svn_error_t *err; 1213 apr_pool_t *pool; 1214}; 1215 1216/* Store an error message from apr_getopt_long(). Set BATON->err to a new 1217 * error with a message generated from FMT and the remaining arguments. 1218 * Implements apr_getopt_err_fn_t. */ 1219static void 1220opt_parsing_error_func(void *baton, 1221 const char *fmt, ...) 1222{ 1223 struct opt_parsing_error_baton_t *b = baton; 1224 const char *message; 1225 va_list ap; 1226 1227 va_start(ap, fmt); 1228 message = apr_pvsprintf(b->pool, fmt, ap); 1229 va_end(ap); 1230 1231 /* Skip leading ": " (if present, which it always is in known cases). */ 1232 if (strncmp(message, ": ", 2) == 0) 1233 message += 2; 1234 1235 b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message); 1236} 1237 1238svn_error_t * 1239svn_diff_file_options_parse(svn_diff_file_options_t *options, 1240 const apr_array_header_t *args, 1241 apr_pool_t *pool) 1242{ 1243 apr_getopt_t *os; 1244 struct opt_parsing_error_baton_t opt_parsing_error_baton; 1245 apr_array_header_t *argv; 1246 1247 opt_parsing_error_baton.err = NULL; 1248 opt_parsing_error_baton.pool = pool; 1249 1250 /* Make room for each option (starting at index 1) plus trailing NULL. */ 1251 argv = apr_array_make(pool, args->nelts + 2, sizeof(char*)); 1252 APR_ARRAY_PUSH(argv, const char *) = ""; 1253 apr_array_cat(argv, args); 1254 APR_ARRAY_PUSH(argv, const char *) = NULL; 1255 1256 apr_getopt_init(&os, pool, 1257 argv->nelts - 1 /* Exclude trailing NULL */, 1258 (const char *const *) argv->elts); 1259 1260 /* Capture any error message from apr_getopt_long(). This will typically 1261 * say which option is wrong, which we would not otherwise know. */ 1262 os->errfn = opt_parsing_error_func; 1263 os->errarg = &opt_parsing_error_baton; 1264 1265 while (1) 1266 { 1267 const char *opt_arg; 1268 int opt_id; 1269 apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg); 1270 1271 if (APR_STATUS_IS_EOF(err)) 1272 break; 1273 if (err) 1274 /* Wrap apr_getopt_long()'s error message. Its doc string implies 1275 * it always will produce one, but never mind if it doesn't. Avoid 1276 * using the message associated with the return code ERR, because 1277 * it refers to the "command line" which may be misleading here. */ 1278 return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, 1279 opt_parsing_error_baton.err, 1280 _("Error in options to internal diff")); 1281 1282 switch (opt_id) 1283 { 1284 case 'b': 1285 /* -w takes precedence over -b. */ 1286 if (! options->ignore_space) 1287 options->ignore_space = svn_diff_file_ignore_space_change; 1288 break; 1289 case 'w': 1290 options->ignore_space = svn_diff_file_ignore_space_all; 1291 break; 1292 case SVN_DIFF__OPT_IGNORE_EOL_STYLE: 1293 options->ignore_eol_style = TRUE; 1294 break; 1295 case 'p': 1296 options->show_c_function = TRUE; 1297 break; 1298 case 'U': 1299 SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg)); 1300 break; 1301 default: 1302 break; 1303 } 1304 } 1305 1306 /* Check for spurious arguments. */ 1307 if (os->ind < os->argc) 1308 return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL, 1309 _("Invalid argument '%s' in diff options"), 1310 os->argv[os->ind]); 1311 1312 return SVN_NO_ERROR; 1313} 1314 1315svn_error_t * 1316svn_diff_file_diff_2(svn_diff_t **diff, 1317 const char *original, 1318 const char *modified, 1319 const svn_diff_file_options_t *options, 1320 apr_pool_t *pool) 1321{ 1322 svn_diff__file_baton_t baton = { 0 }; 1323 1324 baton.options = options; 1325 baton.files[0].path = original; 1326 baton.files[1].path = modified; 1327 baton.pool = svn_pool_create(pool); 1328 1329 SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool)); 1330 1331 svn_pool_destroy(baton.pool); 1332 return SVN_NO_ERROR; 1333} 1334 1335svn_error_t * 1336svn_diff_file_diff3_2(svn_diff_t **diff, 1337 const char *original, 1338 const char *modified, 1339 const char *latest, 1340 const svn_diff_file_options_t *options, 1341 apr_pool_t *pool) 1342{ 1343 svn_diff__file_baton_t baton = { 0 }; 1344 1345 baton.options = options; 1346 baton.files[0].path = original; 1347 baton.files[1].path = modified; 1348 baton.files[2].path = latest; 1349 baton.pool = svn_pool_create(pool); 1350 1351 SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool)); 1352 1353 svn_pool_destroy(baton.pool); 1354 return SVN_NO_ERROR; 1355} 1356 1357svn_error_t * 1358svn_diff_file_diff4_2(svn_diff_t **diff, 1359 const char *original, 1360 const char *modified, 1361 const char *latest, 1362 const char *ancestor, 1363 const svn_diff_file_options_t *options, 1364 apr_pool_t *pool) 1365{ 1366 svn_diff__file_baton_t baton = { 0 }; 1367 1368 baton.options = options; 1369 baton.files[0].path = original; 1370 baton.files[1].path = modified; 1371 baton.files[2].path = latest; 1372 baton.files[3].path = ancestor; 1373 baton.pool = svn_pool_create(pool); 1374 1375 SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool)); 1376 1377 svn_pool_destroy(baton.pool); 1378 return SVN_NO_ERROR; 1379} 1380 1381 1382/** Display unified context diffs **/ 1383 1384/* Maximum length of the extra context to show when show_c_function is set. 1385 * GNU diff uses 40, let's be brave and use 50 instead. */ 1386#define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50 1387typedef struct svn_diff__file_output_baton_t 1388{ 1389 svn_stream_t *output_stream; 1390 const char *header_encoding; 1391 1392 /* Cached markers, in header_encoding. */ 1393 const char *context_str; 1394 const char *delete_str; 1395 const char *insert_str; 1396 1397 const char *path[2]; 1398 apr_file_t *file[2]; 1399 1400 apr_off_t current_line[2]; 1401 1402 char buffer[2][4096]; 1403 apr_size_t length[2]; 1404 char *curp[2]; 1405 1406 apr_off_t hunk_start[2]; 1407 apr_off_t hunk_length[2]; 1408 svn_stringbuf_t *hunk; 1409 1410 /* Should we emit C functions in the unified diff header */ 1411 svn_boolean_t show_c_function; 1412 /* Extra strings to skip over if we match. */ 1413 apr_array_header_t *extra_skip_match; 1414 /* "Context" to append to the @@ line when the show_c_function option 1415 * is set. */ 1416 svn_stringbuf_t *extra_context; 1417 /* Extra context for the current hunk. */ 1418 char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1]; 1419 1420 int context_size; 1421 1422 /* Cancel handler */ 1423 svn_cancel_func_t cancel_func; 1424 void *cancel_baton; 1425 1426 apr_pool_t *pool; 1427} svn_diff__file_output_baton_t; 1428 1429typedef enum svn_diff__file_output_unified_type_e 1430{ 1431 svn_diff__file_output_unified_skip, 1432 svn_diff__file_output_unified_context, 1433 svn_diff__file_output_unified_delete, 1434 svn_diff__file_output_unified_insert 1435} svn_diff__file_output_unified_type_e; 1436 1437 1438static svn_error_t * 1439output_unified_line(svn_diff__file_output_baton_t *baton, 1440 svn_diff__file_output_unified_type_e type, int idx) 1441{ 1442 char *curp; 1443 char *eol; 1444 apr_size_t length; 1445 svn_error_t *err; 1446 svn_boolean_t bytes_processed = FALSE; 1447 svn_boolean_t had_cr = FALSE; 1448 /* Are we collecting extra context? */ 1449 svn_boolean_t collect_extra = FALSE; 1450 1451 length = baton->length[idx]; 1452 curp = baton->curp[idx]; 1453 1454 /* Lazily update the current line even if we're at EOF. 1455 * This way we fake output of context at EOF 1456 */ 1457 baton->current_line[idx]++; 1458 1459 if (length == 0 && apr_file_eof(baton->file[idx])) 1460 { 1461 return SVN_NO_ERROR; 1462 } 1463 1464 do 1465 { 1466 if (length > 0) 1467 { 1468 if (!bytes_processed) 1469 { 1470 switch (type) 1471 { 1472 case svn_diff__file_output_unified_context: 1473 svn_stringbuf_appendcstr(baton->hunk, baton->context_str); 1474 baton->hunk_length[0]++; 1475 baton->hunk_length[1]++; 1476 break; 1477 case svn_diff__file_output_unified_delete: 1478 svn_stringbuf_appendcstr(baton->hunk, baton->delete_str); 1479 baton->hunk_length[0]++; 1480 break; 1481 case svn_diff__file_output_unified_insert: 1482 svn_stringbuf_appendcstr(baton->hunk, baton->insert_str); 1483 baton->hunk_length[1]++; 1484 break; 1485 default: 1486 break; 1487 } 1488 1489 if (baton->show_c_function 1490 && (type == svn_diff__file_output_unified_skip 1491 || type == svn_diff__file_output_unified_context) 1492 && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_') 1493 && !svn_cstring_match_glob_list(curp, 1494 baton->extra_skip_match)) 1495 { 1496 svn_stringbuf_setempty(baton->extra_context); 1497 collect_extra = TRUE; 1498 } 1499 } 1500 1501 eol = svn_eol__find_eol_start(curp, length); 1502 1503 if (eol != NULL) 1504 { 1505 apr_size_t len; 1506 1507 had_cr = (*eol == '\r'); 1508 eol++; 1509 len = (apr_size_t)(eol - curp); 1510 1511 if (! had_cr || len < length) 1512 { 1513 if (had_cr && *eol == '\n') 1514 { 1515 ++eol; 1516 ++len; 1517 } 1518 1519 length -= len; 1520 1521 if (type != svn_diff__file_output_unified_skip) 1522 { 1523 svn_stringbuf_appendbytes(baton->hunk, curp, len); 1524 } 1525 if (collect_extra) 1526 { 1527 svn_stringbuf_appendbytes(baton->extra_context, 1528 curp, len); 1529 } 1530 1531 baton->curp[idx] = eol; 1532 baton->length[idx] = length; 1533 1534 err = SVN_NO_ERROR; 1535 1536 break; 1537 } 1538 } 1539 1540 if (type != svn_diff__file_output_unified_skip) 1541 { 1542 svn_stringbuf_appendbytes(baton->hunk, curp, length); 1543 } 1544 1545 if (collect_extra) 1546 { 1547 svn_stringbuf_appendbytes(baton->extra_context, curp, length); 1548 } 1549 1550 bytes_processed = TRUE; 1551 } 1552 1553 curp = baton->buffer[idx]; 1554 length = sizeof(baton->buffer[idx]); 1555 1556 err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool); 1557 1558 /* If the last chunk ended with a CR, we look for an LF at the start 1559 of this chunk. */ 1560 if (had_cr) 1561 { 1562 if (! err && length > 0 && *curp == '\n') 1563 { 1564 if (type != svn_diff__file_output_unified_skip) 1565 { 1566 svn_stringbuf_appendbyte(baton->hunk, *curp); 1567 } 1568 /* We don't append the LF to extra_context, since it would 1569 * just be stripped anyway. */ 1570 ++curp; 1571 --length; 1572 } 1573 1574 baton->curp[idx] = curp; 1575 baton->length[idx] = length; 1576 1577 break; 1578 } 1579 } 1580 while (! err); 1581 1582 if (err && ! APR_STATUS_IS_EOF(err->apr_err)) 1583 return err; 1584 1585 if (err && APR_STATUS_IS_EOF(err->apr_err)) 1586 { 1587 svn_error_clear(err); 1588 /* Special case if we reach the end of file AND the last line is in the 1589 changed range AND the file doesn't end with a newline */ 1590 if (bytes_processed && (type != svn_diff__file_output_unified_skip) 1591 && ! had_cr) 1592 { 1593 SVN_ERR(svn_diff__unified_append_no_newline_msg( 1594 baton->hunk, baton->header_encoding, baton->pool)); 1595 } 1596 1597 baton->length[idx] = 0; 1598 } 1599 1600 return SVN_NO_ERROR; 1601} 1602 1603static APR_INLINE svn_error_t * 1604output_unified_diff_range(svn_diff__file_output_baton_t *output_baton, 1605 int source, 1606 svn_diff__file_output_unified_type_e type, 1607 apr_off_t until, 1608 svn_cancel_func_t cancel_func, 1609 void *cancel_baton) 1610{ 1611 while (output_baton->current_line[source] < until) 1612 { 1613 if (cancel_func) 1614 SVN_ERR(cancel_func(cancel_baton)); 1615 1616 SVN_ERR(output_unified_line(output_baton, type, source)); 1617 } 1618 return SVN_NO_ERROR; 1619} 1620 1621static svn_error_t * 1622output_unified_flush_hunk(svn_diff__file_output_baton_t *baton) 1623{ 1624 apr_off_t target_line; 1625 apr_size_t hunk_len; 1626 apr_off_t old_start; 1627 apr_off_t new_start; 1628 1629 if (svn_stringbuf_isempty(baton->hunk)) 1630 { 1631 /* Nothing to flush */ 1632 return SVN_NO_ERROR; 1633 } 1634 1635 target_line = baton->hunk_start[0] + baton->hunk_length[0] 1636 + baton->context_size; 1637 1638 /* Add trailing context to the hunk */ 1639 SVN_ERR(output_unified_diff_range(baton, 0 /* original */, 1640 svn_diff__file_output_unified_context, 1641 target_line, 1642 baton->cancel_func, baton->cancel_baton)); 1643 1644 old_start = baton->hunk_start[0]; 1645 new_start = baton->hunk_start[1]; 1646 1647 /* If the file is non-empty, convert the line indexes from 1648 zero based to one based */ 1649 if (baton->hunk_length[0]) 1650 old_start++; 1651 if (baton->hunk_length[1]) 1652 new_start++; 1653 1654 /* Write the hunk header */ 1655 SVN_ERR(svn_diff__unified_write_hunk_header( 1656 baton->output_stream, baton->header_encoding, "@@", 1657 old_start, baton->hunk_length[0], 1658 new_start, baton->hunk_length[1], 1659 baton->hunk_extra_context, 1660 baton->pool)); 1661 1662 /* Output the hunk content */ 1663 hunk_len = baton->hunk->len; 1664 SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data, 1665 &hunk_len)); 1666 1667 /* Prepare for the next hunk */ 1668 baton->hunk_length[0] = 0; 1669 baton->hunk_length[1] = 0; 1670 baton->hunk_start[0] = 0; 1671 baton->hunk_start[1] = 0; 1672 svn_stringbuf_setempty(baton->hunk); 1673 1674 return SVN_NO_ERROR; 1675} 1676 1677static svn_error_t * 1678output_unified_diff_modified(void *baton, 1679 apr_off_t original_start, apr_off_t original_length, 1680 apr_off_t modified_start, apr_off_t modified_length, 1681 apr_off_t latest_start, apr_off_t latest_length) 1682{ 1683 svn_diff__file_output_baton_t *output_baton = baton; 1684 apr_off_t context_prefix_length; 1685 apr_off_t prev_context_end; 1686 svn_boolean_t init_hunk = FALSE; 1687 1688 if (original_start > output_baton->context_size) 1689 context_prefix_length = output_baton->context_size; 1690 else 1691 context_prefix_length = original_start; 1692 1693 /* Calculate where the previous hunk will end if we would write it now 1694 (including the necessary context at the end) */ 1695 if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0) 1696 { 1697 prev_context_end = output_baton->hunk_start[0] 1698 + output_baton->hunk_length[0] 1699 + output_baton->context_size; 1700 } 1701 else 1702 { 1703 prev_context_end = -1; 1704 1705 if (output_baton->hunk_start[0] == 0 1706 && (original_length > 0 || modified_length > 0)) 1707 init_hunk = TRUE; 1708 } 1709 1710 /* If the changed range is far enough from the previous range, flush the current 1711 hunk. */ 1712 { 1713 apr_off_t new_hunk_start = (original_start - context_prefix_length); 1714 1715 if (output_baton->current_line[0] < new_hunk_start 1716 && prev_context_end <= new_hunk_start) 1717 { 1718 SVN_ERR(output_unified_flush_hunk(output_baton)); 1719 init_hunk = TRUE; 1720 } 1721 else if (output_baton->hunk_length[0] > 0 1722 || output_baton->hunk_length[1] > 0) 1723 { 1724 /* We extend the current hunk */ 1725 1726 1727 /* Original: Output the context preceding the changed range */ 1728 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1729 svn_diff__file_output_unified_context, 1730 original_start, 1731 output_baton->cancel_func, 1732 output_baton->cancel_baton)); 1733 } 1734 } 1735 1736 /* Original: Skip lines until we are at the beginning of the context we want 1737 to display */ 1738 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1739 svn_diff__file_output_unified_skip, 1740 original_start - context_prefix_length, 1741 output_baton->cancel_func, 1742 output_baton->cancel_baton)); 1743 1744 /* Note that the above skip stores data for the show_c_function support below */ 1745 1746 if (init_hunk) 1747 { 1748 SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0 1749 && output_baton->hunk_length[1] == 0); 1750 1751 output_baton->hunk_start[0] = original_start - context_prefix_length; 1752 output_baton->hunk_start[1] = modified_start - context_prefix_length; 1753 } 1754 1755 if (init_hunk && output_baton->show_c_function) 1756 { 1757 apr_size_t p; 1758 const char *invalid_character; 1759 1760 /* Save the extra context for later use. 1761 * Note that the last byte of the hunk_extra_context array is never 1762 * touched after it is zero-initialized, so the array is always 1763 * 0-terminated. */ 1764 strncpy(output_baton->hunk_extra_context, 1765 output_baton->extra_context->data, 1766 SVN_DIFF__EXTRA_CONTEXT_LENGTH); 1767 /* Trim whitespace at the end, most notably to get rid of any 1768 * newline characters. */ 1769 p = strlen(output_baton->hunk_extra_context); 1770 while (p > 0 1771 && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1])) 1772 { 1773 output_baton->hunk_extra_context[--p] = '\0'; 1774 } 1775 invalid_character = 1776 svn_utf__last_valid(output_baton->hunk_extra_context, 1777 SVN_DIFF__EXTRA_CONTEXT_LENGTH); 1778 for (p = invalid_character - output_baton->hunk_extra_context; 1779 p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++) 1780 { 1781 output_baton->hunk_extra_context[p] = '\0'; 1782 } 1783 } 1784 1785 /* Modified: Skip lines until we are at the start of the changed range */ 1786 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */, 1787 svn_diff__file_output_unified_skip, 1788 modified_start, 1789 output_baton->cancel_func, 1790 output_baton->cancel_baton)); 1791 1792 /* Original: Output the context preceding the changed range */ 1793 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1794 svn_diff__file_output_unified_context, 1795 original_start, 1796 output_baton->cancel_func, 1797 output_baton->cancel_baton)); 1798 1799 /* Both: Output the changed range */ 1800 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1801 svn_diff__file_output_unified_delete, 1802 original_start + original_length, 1803 output_baton->cancel_func, 1804 output_baton->cancel_baton)); 1805 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */, 1806 svn_diff__file_output_unified_insert, 1807 modified_start + modified_length, 1808 output_baton->cancel_func, 1809 output_baton->cancel_baton)); 1810 1811 return SVN_NO_ERROR; 1812} 1813 1814/* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */ 1815static svn_error_t * 1816output_unified_default_hdr(const char **header, const char *path, 1817 apr_pool_t *pool) 1818{ 1819 apr_finfo_t file_info; 1820 apr_time_exp_t exploded_time; 1821 char time_buffer[64]; 1822 apr_size_t time_len; 1823 const char *utf8_timestr; 1824 1825 SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool)); 1826 apr_time_exp_lt(&exploded_time, file_info.mtime); 1827 1828 apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1, 1829 /* Order of date components can be different in different languages */ 1830 _("%a %b %e %H:%M:%S %Y"), &exploded_time); 1831 1832 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool)); 1833 1834 *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr); 1835 1836 return SVN_NO_ERROR; 1837} 1838 1839static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable = 1840{ 1841 NULL, /* output_common */ 1842 output_unified_diff_modified, 1843 NULL, /* output_diff_latest */ 1844 NULL, /* output_diff_common */ 1845 NULL /* output_conflict */ 1846}; 1847 1848svn_error_t * 1849svn_diff_file_output_unified4(svn_stream_t *output_stream, 1850 svn_diff_t *diff, 1851 const char *original_path, 1852 const char *modified_path, 1853 const char *original_header, 1854 const char *modified_header, 1855 const char *header_encoding, 1856 const char *relative_to_dir, 1857 svn_boolean_t show_c_function, 1858 int context_size, 1859 svn_cancel_func_t cancel_func, 1860 void *cancel_baton, 1861 apr_pool_t *pool) 1862{ 1863 if (svn_diff_contains_diffs(diff)) 1864 { 1865 svn_diff__file_output_baton_t baton; 1866 int i; 1867 1868 memset(&baton, 0, sizeof(baton)); 1869 baton.output_stream = output_stream; 1870 baton.cancel_func = cancel_func; 1871 baton.cancel_baton = cancel_baton; 1872 baton.pool = pool; 1873 baton.header_encoding = header_encoding; 1874 baton.path[0] = original_path; 1875 baton.path[1] = modified_path; 1876 baton.hunk = svn_stringbuf_create_empty(pool); 1877 baton.show_c_function = show_c_function; 1878 baton.extra_context = svn_stringbuf_create_empty(pool); 1879 baton.context_size = (context_size >= 0) ? context_size 1880 : SVN_DIFF__UNIFIED_CONTEXT_SIZE; 1881 1882 if (show_c_function) 1883 { 1884 baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **)); 1885 1886 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*"; 1887 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*"; 1888 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*"; 1889 } 1890 1891 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ", 1892 header_encoding, pool)); 1893 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-", 1894 header_encoding, pool)); 1895 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+", 1896 header_encoding, pool)); 1897 1898 if (relative_to_dir) 1899 { 1900 /* Possibly adjust the "original" and "modified" paths shown in 1901 the output (see issue #2723). */ 1902 const char *child_path; 1903 1904 if (! original_header) 1905 { 1906 child_path = svn_dirent_is_child(relative_to_dir, 1907 original_path, pool); 1908 if (child_path) 1909 original_path = child_path; 1910 else 1911 return svn_error_createf( 1912 SVN_ERR_BAD_RELATIVE_PATH, NULL, 1913 _("Path '%s' must be inside " 1914 "the directory '%s'"), 1915 svn_dirent_local_style(original_path, pool), 1916 svn_dirent_local_style(relative_to_dir, 1917 pool)); 1918 } 1919 1920 if (! modified_header) 1921 { 1922 child_path = svn_dirent_is_child(relative_to_dir, 1923 modified_path, pool); 1924 if (child_path) 1925 modified_path = child_path; 1926 else 1927 return svn_error_createf( 1928 SVN_ERR_BAD_RELATIVE_PATH, NULL, 1929 _("Path '%s' must be inside " 1930 "the directory '%s'"), 1931 svn_dirent_local_style(modified_path, pool), 1932 svn_dirent_local_style(relative_to_dir, 1933 pool)); 1934 } 1935 } 1936 1937 for (i = 0; i < 2; i++) 1938 { 1939 SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i], 1940 APR_READ, APR_OS_DEFAULT, pool)); 1941 } 1942 1943 if (original_header == NULL) 1944 { 1945 SVN_ERR(output_unified_default_hdr(&original_header, original_path, 1946 pool)); 1947 } 1948 1949 if (modified_header == NULL) 1950 { 1951 SVN_ERR(output_unified_default_hdr(&modified_header, modified_path, 1952 pool)); 1953 } 1954 1955 SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding, 1956 original_header, modified_header, 1957 pool)); 1958 1959 SVN_ERR(svn_diff_output2(diff, &baton, 1960 &svn_diff__file_output_unified_vtable, 1961 cancel_func, cancel_baton)); 1962 SVN_ERR(output_unified_flush_hunk(&baton)); 1963 1964 for (i = 0; i < 2; i++) 1965 { 1966 SVN_ERR(svn_io_file_close(baton.file[i], pool)); 1967 } 1968 } 1969 1970 return SVN_NO_ERROR; 1971} 1972 1973 1974/** Display diff3 **/ 1975 1976/* A stream to remember *leading* context. Note that this stream does 1977 *not* copy the data that it is remembering; it just saves 1978 *pointers! */ 1979typedef struct context_saver_t { 1980 svn_stream_t *stream; 1981 int context_size; 1982 const char **data; /* const char *data[context_size] */ 1983 apr_size_t *len; /* apr_size_t len[context_size] */ 1984 apr_size_t next_slot; 1985 apr_ssize_t total_writes; 1986} context_saver_t; 1987 1988 1989static svn_error_t * 1990context_saver_stream_write(void *baton, 1991 const char *data, 1992 apr_size_t *len) 1993{ 1994 context_saver_t *cs = baton; 1995 1996 if (cs->context_size > 0) 1997 { 1998 cs->data[cs->next_slot] = data; 1999 cs->len[cs->next_slot] = *len; 2000 cs->next_slot = (cs->next_slot + 1) % cs->context_size; 2001 cs->total_writes++; 2002 } 2003 return SVN_NO_ERROR; 2004} 2005 2006typedef struct svn_diff3__file_output_baton_t 2007{ 2008 svn_stream_t *output_stream; 2009 2010 const char *path[3]; 2011 2012 apr_off_t current_line[3]; 2013 2014 char *buffer[3]; 2015 char *endp[3]; 2016 char *curp[3]; 2017 2018 /* The following four members are in the encoding used for the output. */ 2019 const char *conflict_modified; 2020 const char *conflict_original; 2021 const char *conflict_separator; 2022 const char *conflict_latest; 2023 2024 const char *marker_eol; 2025 2026 svn_diff_conflict_display_style_t conflict_style; 2027 int context_size; 2028 2029 /* cancel support */ 2030 svn_cancel_func_t cancel_func; 2031 void *cancel_baton; 2032 2033 /* The rest of the fields are for 2034 svn_diff_conflict_display_only_conflicts only. Note that for 2035 these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or 2036 (soon after a conflict) a "trailing context stream", never the 2037 actual output stream.*/ 2038 /* The actual output stream. */ 2039 svn_stream_t *real_output_stream; 2040 context_saver_t *context_saver; 2041 /* Used to allocate context_saver and trailing context streams, and 2042 for some printfs. */ 2043 apr_pool_t *pool; 2044} svn_diff3__file_output_baton_t; 2045 2046static svn_error_t * 2047flush_context_saver(context_saver_t *cs, 2048 svn_stream_t *output_stream) 2049{ 2050 int i; 2051 for (i = 0; i < cs->context_size; i++) 2052 { 2053 apr_size_t slot = (i + cs->next_slot) % cs->context_size; 2054 if (cs->data[slot]) 2055 { 2056 apr_size_t len = cs->len[slot]; 2057 SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len)); 2058 } 2059 } 2060 return SVN_NO_ERROR; 2061} 2062 2063static void 2064make_context_saver(svn_diff3__file_output_baton_t *fob) 2065{ 2066 context_saver_t *cs; 2067 2068 assert(fob->context_size > 0); /* Or nothing to save */ 2069 2070 svn_pool_clear(fob->pool); 2071 cs = apr_pcalloc(fob->pool, sizeof(*cs)); 2072 cs->stream = svn_stream_empty(fob->pool); 2073 svn_stream_set_baton(cs->stream, cs); 2074 svn_stream_set_write(cs->stream, context_saver_stream_write); 2075 fob->context_saver = cs; 2076 fob->output_stream = cs->stream; 2077 cs->context_size = fob->context_size; 2078 cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size); 2079 cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size); 2080} 2081 2082 2083/* A stream which prints LINES_TO_PRINT (based on context size) lines to 2084 BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to 2085 a context_saver; used for *trailing* context. */ 2086 2087struct trailing_context_printer { 2088 apr_size_t lines_to_print; 2089 svn_diff3__file_output_baton_t *fob; 2090}; 2091 2092 2093 2094static svn_error_t * 2095trailing_context_printer_write(void *baton, 2096 const char *data, 2097 apr_size_t *len) 2098{ 2099 struct trailing_context_printer *tcp = baton; 2100 SVN_ERR_ASSERT(tcp->lines_to_print > 0); 2101 SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len)); 2102 tcp->lines_to_print--; 2103 if (tcp->lines_to_print == 0) 2104 make_context_saver(tcp->fob); 2105 return SVN_NO_ERROR; 2106} 2107 2108 2109static void 2110make_trailing_context_printer(svn_diff3__file_output_baton_t *btn) 2111{ 2112 struct trailing_context_printer *tcp; 2113 svn_stream_t *s; 2114 2115 svn_pool_clear(btn->pool); 2116 2117 tcp = apr_pcalloc(btn->pool, sizeof(*tcp)); 2118 tcp->lines_to_print = btn->context_size; 2119 tcp->fob = btn; 2120 s = svn_stream_empty(btn->pool); 2121 svn_stream_set_baton(s, tcp); 2122 svn_stream_set_write(s, trailing_context_printer_write); 2123 btn->output_stream = s; 2124} 2125 2126 2127 2128typedef enum svn_diff3__file_output_type_e 2129{ 2130 svn_diff3__file_output_skip, 2131 svn_diff3__file_output_normal 2132} svn_diff3__file_output_type_e; 2133 2134 2135static svn_error_t * 2136output_line(svn_diff3__file_output_baton_t *baton, 2137 svn_diff3__file_output_type_e type, int idx) 2138{ 2139 char *curp; 2140 char *endp; 2141 char *eol; 2142 apr_size_t len; 2143 2144 curp = baton->curp[idx]; 2145 endp = baton->endp[idx]; 2146 2147 /* Lazily update the current line even if we're at EOF. 2148 */ 2149 baton->current_line[idx]++; 2150 2151 if (curp == endp) 2152 return SVN_NO_ERROR; 2153 2154 eol = svn_eol__find_eol_start(curp, endp - curp); 2155 if (!eol) 2156 eol = endp; 2157 else 2158 { 2159 svn_boolean_t had_cr = (*eol == '\r'); 2160 eol++; 2161 if (had_cr && eol != endp && *eol == '\n') 2162 eol++; 2163 } 2164 2165 if (type != svn_diff3__file_output_skip) 2166 { 2167 len = eol - curp; 2168 /* Note that the trailing context printer assumes that 2169 svn_stream_write is called exactly once per line. */ 2170 SVN_ERR(svn_stream_write(baton->output_stream, curp, &len)); 2171 } 2172 2173 baton->curp[idx] = eol; 2174 2175 return SVN_NO_ERROR; 2176} 2177 2178static svn_error_t * 2179output_marker_eol(svn_diff3__file_output_baton_t *btn) 2180{ 2181 return svn_stream_puts(btn->output_stream, btn->marker_eol); 2182} 2183 2184static svn_error_t * 2185output_hunk(void *baton, int idx, apr_off_t target_line, 2186 apr_off_t target_length) 2187{ 2188 svn_diff3__file_output_baton_t *output_baton = baton; 2189 2190 /* Skip lines until we are at the start of the changed range */ 2191 while (output_baton->current_line[idx] < target_line) 2192 { 2193 SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx)); 2194 } 2195 2196 target_line += target_length; 2197 2198 while (output_baton->current_line[idx] < target_line) 2199 { 2200 SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx)); 2201 } 2202 2203 return SVN_NO_ERROR; 2204} 2205 2206static svn_error_t * 2207output_common(void *baton, apr_off_t original_start, apr_off_t original_length, 2208 apr_off_t modified_start, apr_off_t modified_length, 2209 apr_off_t latest_start, apr_off_t latest_length) 2210{ 2211 return output_hunk(baton, 1, modified_start, modified_length); 2212} 2213 2214static svn_error_t * 2215output_diff_modified(void *baton, 2216 apr_off_t original_start, apr_off_t original_length, 2217 apr_off_t modified_start, apr_off_t modified_length, 2218 apr_off_t latest_start, apr_off_t latest_length) 2219{ 2220 return output_hunk(baton, 1, modified_start, modified_length); 2221} 2222 2223static svn_error_t * 2224output_diff_latest(void *baton, 2225 apr_off_t original_start, apr_off_t original_length, 2226 apr_off_t modified_start, apr_off_t modified_length, 2227 apr_off_t latest_start, apr_off_t latest_length) 2228{ 2229 return output_hunk(baton, 2, latest_start, latest_length); 2230} 2231 2232static svn_error_t * 2233output_conflict(void *baton, 2234 apr_off_t original_start, apr_off_t original_length, 2235 apr_off_t modified_start, apr_off_t modified_length, 2236 apr_off_t latest_start, apr_off_t latest_length, 2237 svn_diff_t *diff); 2238 2239static const svn_diff_output_fns_t svn_diff3__file_output_vtable = 2240{ 2241 output_common, 2242 output_diff_modified, 2243 output_diff_latest, 2244 output_diff_modified, /* output_diff_common */ 2245 output_conflict 2246}; 2247 2248static svn_error_t * 2249output_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn, 2250 const char *label, 2251 apr_off_t start, 2252 apr_off_t length) 2253{ 2254 if (length == 1) 2255 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2256 "%s (%" APR_OFF_T_FMT ")", 2257 label, start + 1)); 2258 else 2259 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2260 "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")", 2261 label, start + 1, length)); 2262 2263 SVN_ERR(output_marker_eol(btn)); 2264 2265 return SVN_NO_ERROR; 2266} 2267 2268static svn_error_t * 2269output_conflict_with_context(svn_diff3__file_output_baton_t *btn, 2270 apr_off_t original_start, 2271 apr_off_t original_length, 2272 apr_off_t modified_start, 2273 apr_off_t modified_length, 2274 apr_off_t latest_start, 2275 apr_off_t latest_length) 2276{ 2277 /* Are we currently saving starting context (as opposed to printing 2278 trailing context)? If so, flush it. */ 2279 if (btn->output_stream == btn->context_saver->stream) 2280 { 2281 if (btn->context_saver->total_writes > btn->context_size) 2282 SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n")); 2283 SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream)); 2284 } 2285 2286 /* Print to the real output stream. */ 2287 btn->output_stream = btn->real_output_stream; 2288 2289 /* Output the conflict itself. */ 2290 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified, 2291 modified_start, modified_length)); 2292 SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length)); 2293 2294 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original, 2295 original_start, original_length)); 2296 SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length)); 2297 2298 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2299 "%s%s", btn->conflict_separator, btn->marker_eol)); 2300 SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length)); 2301 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest, 2302 latest_start, latest_length)); 2303 2304 /* Go into print-trailing-context mode instead. */ 2305 make_trailing_context_printer(btn); 2306 2307 return SVN_NO_ERROR; 2308} 2309 2310 2311static svn_error_t * 2312output_conflict(void *baton, 2313 apr_off_t original_start, apr_off_t original_length, 2314 apr_off_t modified_start, apr_off_t modified_length, 2315 apr_off_t latest_start, apr_off_t latest_length, 2316 svn_diff_t *diff) 2317{ 2318 svn_diff3__file_output_baton_t *file_baton = baton; 2319 2320 svn_diff_conflict_display_style_t style = file_baton->conflict_style; 2321 2322 if (style == svn_diff_conflict_display_only_conflicts) 2323 return output_conflict_with_context(file_baton, 2324 original_start, original_length, 2325 modified_start, modified_length, 2326 latest_start, latest_length); 2327 2328 if (style == svn_diff_conflict_display_resolved_modified_latest) 2329 { 2330 if (diff) 2331 return svn_diff_output2(diff, baton, 2332 &svn_diff3__file_output_vtable, 2333 file_baton->cancel_func, 2334 file_baton->cancel_baton); 2335 else 2336 style = svn_diff_conflict_display_modified_latest; 2337 } 2338 2339 if (style == svn_diff_conflict_display_modified_latest || 2340 style == svn_diff_conflict_display_modified_original_latest) 2341 { 2342 SVN_ERR(svn_stream_puts(file_baton->output_stream, 2343 file_baton->conflict_modified)); 2344 SVN_ERR(output_marker_eol(file_baton)); 2345 2346 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length)); 2347 2348 if (style == svn_diff_conflict_display_modified_original_latest) 2349 { 2350 SVN_ERR(svn_stream_puts(file_baton->output_stream, 2351 file_baton->conflict_original)); 2352 SVN_ERR(output_marker_eol(file_baton)); 2353 SVN_ERR(output_hunk(baton, 0, original_start, original_length)); 2354 } 2355 2356 SVN_ERR(svn_stream_puts(file_baton->output_stream, 2357 file_baton->conflict_separator)); 2358 SVN_ERR(output_marker_eol(file_baton)); 2359 2360 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length)); 2361 2362 SVN_ERR(svn_stream_puts(file_baton->output_stream, 2363 file_baton->conflict_latest)); 2364 SVN_ERR(output_marker_eol(file_baton)); 2365 } 2366 else if (style == svn_diff_conflict_display_modified) 2367 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length)); 2368 else if (style == svn_diff_conflict_display_latest) 2369 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length)); 2370 else /* unknown style */ 2371 SVN_ERR_MALFUNCTION(); 2372 2373 return SVN_NO_ERROR; 2374} 2375 2376svn_error_t * 2377svn_diff_file_output_merge3(svn_stream_t *output_stream, 2378 svn_diff_t *diff, 2379 const char *original_path, 2380 const char *modified_path, 2381 const char *latest_path, 2382 const char *conflict_original, 2383 const char *conflict_modified, 2384 const char *conflict_latest, 2385 const char *conflict_separator, 2386 svn_diff_conflict_display_style_t style, 2387 svn_cancel_func_t cancel_func, 2388 void *cancel_baton, 2389 apr_pool_t *scratch_pool) 2390{ 2391 svn_diff3__file_output_baton_t baton; 2392 apr_file_t *file[3]; 2393 int idx; 2394#if APR_HAS_MMAP 2395 apr_mmap_t *mm[3] = { 0 }; 2396#endif /* APR_HAS_MMAP */ 2397 const char *eol; 2398 svn_boolean_t conflicts_only = 2399 (style == svn_diff_conflict_display_only_conflicts); 2400 2401 memset(&baton, 0, sizeof(baton)); 2402 baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE; 2403 if (conflicts_only) 2404 { 2405 baton.pool = svn_pool_create(scratch_pool); 2406 make_context_saver(&baton); 2407 baton.real_output_stream = output_stream; 2408 } 2409 else 2410 baton.output_stream = output_stream; 2411 baton.path[0] = original_path; 2412 baton.path[1] = modified_path; 2413 baton.path[2] = latest_path; 2414 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified, 2415 conflict_modified ? conflict_modified 2416 : apr_psprintf(scratch_pool, "<<<<<<< %s", 2417 modified_path), 2418 scratch_pool)); 2419 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original, 2420 conflict_original ? conflict_original 2421 : apr_psprintf(scratch_pool, "||||||| %s", 2422 original_path), 2423 scratch_pool)); 2424 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator, 2425 conflict_separator ? conflict_separator 2426 : "=======", scratch_pool)); 2427 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest, 2428 conflict_latest ? conflict_latest 2429 : apr_psprintf(scratch_pool, ">>>>>>> %s", 2430 latest_path), 2431 scratch_pool)); 2432 2433 baton.conflict_style = style; 2434 2435 for (idx = 0; idx < 3; idx++) 2436 { 2437 apr_size_t size; 2438 2439 SVN_ERR(map_or_read_file(&file[idx], 2440 MMAP_T_ARG(mm[idx]) 2441 &baton.buffer[idx], &size, 2442 baton.path[idx], scratch_pool)); 2443 2444 baton.curp[idx] = baton.buffer[idx]; 2445 baton.endp[idx] = baton.buffer[idx]; 2446 2447 if (baton.endp[idx]) 2448 baton.endp[idx] += size; 2449 } 2450 2451 /* Check what eol marker we should use for conflict markers. 2452 We use the eol marker of the modified file and fall back on the 2453 platform's eol marker if that file doesn't contain any newlines. */ 2454 eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1], 2455 NULL); 2456 if (! eol) 2457 eol = APR_EOL_STR; 2458 baton.marker_eol = eol; 2459 2460 baton.cancel_func = cancel_func; 2461 baton.cancel_baton = cancel_baton; 2462 2463 SVN_ERR(svn_diff_output2(diff, &baton, 2464 &svn_diff3__file_output_vtable, 2465 cancel_func, cancel_baton)); 2466 2467 for (idx = 0; idx < 3; idx++) 2468 { 2469#if APR_HAS_MMAP 2470 if (mm[idx]) 2471 { 2472 apr_status_t rv = apr_mmap_delete(mm[idx]); 2473 if (rv != APR_SUCCESS) 2474 { 2475 return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"), 2476 baton.path[idx]); 2477 } 2478 } 2479#endif /* APR_HAS_MMAP */ 2480 2481 if (file[idx]) 2482 { 2483 SVN_ERR(svn_io_file_close(file[idx], scratch_pool)); 2484 } 2485 } 2486 2487 if (conflicts_only) 2488 svn_pool_destroy(baton.pool); 2489 2490 return SVN_NO_ERROR; 2491} 2492 2493