1251881Speter/* 2251881Speter * diff_file.c : routines for doing diffs on files 3251881Speter * 4251881Speter * ==================================================================== 5251881Speter * Licensed to the Apache Software Foundation (ASF) under one 6251881Speter * or more contributor license agreements. See the NOTICE file 7251881Speter * distributed with this work for additional information 8251881Speter * regarding copyright ownership. The ASF licenses this file 9251881Speter * to you under the Apache License, Version 2.0 (the 10251881Speter * "License"); you may not use this file except in compliance 11251881Speter * with the License. You may obtain a copy of the License at 12251881Speter * 13251881Speter * http://www.apache.org/licenses/LICENSE-2.0 14251881Speter * 15251881Speter * Unless required by applicable law or agreed to in writing, 16251881Speter * software distributed under the License is distributed on an 17251881Speter * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18251881Speter * KIND, either express or implied. See the License for the 19251881Speter * specific language governing permissions and limitations 20251881Speter * under the License. 21251881Speter * ==================================================================== 22251881Speter */ 23251881Speter 24251881Speter 25251881Speter#include <apr.h> 26251881Speter#include <apr_pools.h> 27251881Speter#include <apr_general.h> 28251881Speter#include <apr_file_io.h> 29251881Speter#include <apr_file_info.h> 30251881Speter#include <apr_time.h> 31251881Speter#include <apr_mmap.h> 32251881Speter#include <apr_getopt.h> 33251881Speter 34289180Speter#include <assert.h> 35289180Speter 36251881Speter#include "svn_error.h" 37251881Speter#include "svn_diff.h" 38251881Speter#include "svn_types.h" 39251881Speter#include "svn_string.h" 40251881Speter#include "svn_subst.h" 41251881Speter#include "svn_io.h" 42251881Speter#include "svn_utf.h" 43251881Speter#include "svn_pools.h" 44251881Speter#include "diff.h" 45251881Speter#include "svn_private_config.h" 46251881Speter#include "svn_path.h" 47251881Speter#include "svn_ctype.h" 48251881Speter 49251881Speter#include "private/svn_utf_private.h" 50251881Speter#include "private/svn_eol_private.h" 51251881Speter#include "private/svn_dep_compat.h" 52251881Speter#include "private/svn_adler32.h" 53251881Speter#include "private/svn_diff_private.h" 54251881Speter 55251881Speter/* A token, i.e. a line read from a file. */ 56251881Spetertypedef struct svn_diff__file_token_t 57251881Speter{ 58251881Speter /* Next token in free list. */ 59251881Speter struct svn_diff__file_token_t *next; 60251881Speter svn_diff_datasource_e datasource; 61251881Speter /* Offset in the datasource. */ 62251881Speter apr_off_t offset; 63251881Speter /* Offset of the normalized token (may skip leading whitespace) */ 64251881Speter apr_off_t norm_offset; 65251881Speter /* Total length - before normalization. */ 66251881Speter apr_off_t raw_length; 67251881Speter /* Total length - after normalization. */ 68251881Speter apr_off_t length; 69251881Speter} svn_diff__file_token_t; 70251881Speter 71251881Speter 72251881Spetertypedef struct svn_diff__file_baton_t 73251881Speter{ 74251881Speter const svn_diff_file_options_t *options; 75251881Speter 76251881Speter struct file_info { 77251881Speter const char *path; /* path to this file, absolute or relative to CWD */ 78251881Speter 79251881Speter /* All the following fields are active while this datasource is open */ 80251881Speter apr_file_t *file; /* handle of this file */ 81251881Speter apr_off_t size; /* total raw size in bytes of this file */ 82251881Speter 83251881Speter /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */ 84251881Speter int chunk; /* the current chunk number, zero-based */ 85251881Speter char *buffer; /* a buffer containing the current chunk */ 86251881Speter char *curp; /* current position in the current chunk */ 87251881Speter char *endp; /* next memory address after the current chunk */ 88251881Speter 89251881Speter svn_diff__normalize_state_t normalize_state; 90251881Speter 91251881Speter /* Where the identical suffix starts in this datasource */ 92251881Speter int suffix_start_chunk; 93251881Speter apr_off_t suffix_offset_in_chunk; 94251881Speter } files[4]; 95251881Speter 96251881Speter /* List of free tokens that may be reused. */ 97251881Speter svn_diff__file_token_t *tokens; 98251881Speter 99251881Speter apr_pool_t *pool; 100251881Speter} svn_diff__file_baton_t; 101251881Speter 102251881Speterstatic int 103251881Speterdatasource_to_index(svn_diff_datasource_e datasource) 104251881Speter{ 105251881Speter switch (datasource) 106251881Speter { 107251881Speter case svn_diff_datasource_original: 108251881Speter return 0; 109251881Speter 110251881Speter case svn_diff_datasource_modified: 111251881Speter return 1; 112251881Speter 113251881Speter case svn_diff_datasource_latest: 114251881Speter return 2; 115251881Speter 116251881Speter case svn_diff_datasource_ancestor: 117251881Speter return 3; 118251881Speter } 119251881Speter 120251881Speter return -1; 121251881Speter} 122251881Speter 123251881Speter/* Files are read in chunks of 128k. There is no support for this number 124251881Speter * whatsoever. If there is a number someone comes up with that has some 125251881Speter * argumentation, let's use that. 126251881Speter */ 127251881Speter/* If you change this number, update test_norm_offset(), 128251881Speter * test_identical_suffix() and and test_token_compare() in diff-diff3-test.c. 129251881Speter */ 130251881Speter#define CHUNK_SHIFT 17 131251881Speter#define CHUNK_SIZE (1 << CHUNK_SHIFT) 132251881Speter 133251881Speter#define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT) 134251881Speter#define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT) 135251881Speter#define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1)) 136251881Speter 137251881Speter 138251881Speter/* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for 139251881Speter * *LENGTH. The actual bytes read are stored in *LENGTH on return. 140251881Speter */ 141251881Speterstatic APR_INLINE svn_error_t * 142289180Speterread_chunk(apr_file_t *file, 143251881Speter char *buffer, apr_off_t length, 144289180Speter apr_off_t offset, apr_pool_t *scratch_pool) 145251881Speter{ 146251881Speter /* XXX: The final offset may not be the one we asked for. 147251881Speter * XXX: Check. 148251881Speter */ 149289180Speter SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool)); 150251881Speter return svn_io_file_read_full2(file, buffer, (apr_size_t) length, 151289180Speter NULL, NULL, scratch_pool); 152251881Speter} 153251881Speter 154251881Speter 155251881Speter/* Map or read a file at PATH. *BUFFER will point to the file 156251881Speter * contents; if the file was mapped, *FILE and *MM will contain the 157251881Speter * mmap context; otherwise they will be NULL. SIZE will contain the 158251881Speter * file size. Allocate from POOL. 159251881Speter */ 160251881Speter#if APR_HAS_MMAP 161251881Speter#define MMAP_T_PARAM(NAME) apr_mmap_t **NAME, 162251881Speter#define MMAP_T_ARG(NAME) &(NAME), 163251881Speter#else 164251881Speter#define MMAP_T_PARAM(NAME) 165251881Speter#define MMAP_T_ARG(NAME) 166251881Speter#endif 167251881Speter 168251881Speterstatic svn_error_t * 169251881Spetermap_or_read_file(apr_file_t **file, 170251881Speter MMAP_T_PARAM(mm) 171257936Speter char **buffer, apr_size_t *size_p, 172251881Speter const char *path, apr_pool_t *pool) 173251881Speter{ 174251881Speter apr_finfo_t finfo; 175251881Speter apr_status_t rv; 176257936Speter apr_size_t size; 177251881Speter 178251881Speter *buffer = NULL; 179251881Speter 180251881Speter SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool)); 181251881Speter SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool)); 182251881Speter 183257936Speter if (finfo.size > APR_SIZE_MAX) 184257936Speter { 185257936Speter return svn_error_createf(APR_ENOMEM, NULL, 186257936Speter _("File '%s' is too large to be read in " 187257936Speter "to memory"), path); 188257936Speter } 189257936Speter 190257936Speter size = (apr_size_t) finfo.size; 191251881Speter#if APR_HAS_MMAP 192257936Speter if (size > APR_MMAP_THRESHOLD) 193251881Speter { 194257936Speter rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool); 195251881Speter if (rv == APR_SUCCESS) 196251881Speter { 197251881Speter *buffer = (*mm)->mm; 198251881Speter } 199257936Speter else 200257936Speter { 201257936Speter /* Clear *MM because output parameters are undefined on error. */ 202257936Speter *mm = NULL; 203257936Speter } 204251881Speter 205251881Speter /* On failure we just fall through and try reading the file into 206251881Speter * memory instead. 207251881Speter */ 208251881Speter } 209251881Speter#endif /* APR_HAS_MMAP */ 210251881Speter 211257936Speter if (*buffer == NULL && size > 0) 212251881Speter { 213257936Speter *buffer = apr_palloc(pool, size); 214251881Speter 215257936Speter SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool)); 216251881Speter 217251881Speter /* Since we have the entire contents of the file we can 218251881Speter * close it now. 219251881Speter */ 220251881Speter SVN_ERR(svn_io_file_close(*file, pool)); 221251881Speter 222251881Speter *file = NULL; 223251881Speter } 224251881Speter 225257936Speter *size_p = size; 226251881Speter 227251881Speter return SVN_NO_ERROR; 228251881Speter} 229251881Speter 230251881Speter 231251881Speter/* For all files in the FILE array, increment the curp pointer. If a file 232251881Speter * points before the beginning of file, let it point at the first byte again. 233251881Speter * If the end of the current chunk is reached, read the next chunk in the 234251881Speter * buffer and point curp to the start of the chunk. If EOF is reached, set 235251881Speter * curp equal to endp to indicate EOF. */ 236251881Speter#define INCREMENT_POINTERS(all_files, files_len, pool) \ 237251881Speter do { \ 238251881Speter apr_size_t svn_macro__i; \ 239251881Speter \ 240251881Speter for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \ 241251881Speter { \ 242251881Speter if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\ 243251881Speter (all_files)[svn_macro__i].curp++; \ 244251881Speter else \ 245251881Speter SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool))); \ 246251881Speter } \ 247251881Speter } while (0) 248251881Speter 249251881Speter 250251881Speter/* For all files in the FILE array, decrement the curp pointer. If the 251251881Speter * start of a chunk is reached, read the previous chunk in the buffer and 252251881Speter * point curp to the last byte of the chunk. If the beginning of a FILE is 253251881Speter * reached, set chunk to -1 to indicate BOF. */ 254251881Speter#define DECREMENT_POINTERS(all_files, files_len, pool) \ 255251881Speter do { \ 256251881Speter apr_size_t svn_macro__i; \ 257251881Speter \ 258251881Speter for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \ 259251881Speter { \ 260251881Speter if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \ 261251881Speter (all_files)[svn_macro__i].curp--; \ 262251881Speter else \ 263251881Speter SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool))); \ 264251881Speter } \ 265251881Speter } while (0) 266251881Speter 267251881Speter 268251881Speterstatic svn_error_t * 269251881Speterincrement_chunk(struct file_info *file, apr_pool_t *pool) 270251881Speter{ 271251881Speter apr_off_t length; 272251881Speter apr_off_t last_chunk = offset_to_chunk(file->size); 273251881Speter 274251881Speter if (file->chunk == -1) 275251881Speter { 276251881Speter /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */ 277251881Speter file->chunk = 0; 278251881Speter file->curp = file->buffer; 279251881Speter } 280251881Speter else if (file->chunk == last_chunk) 281251881Speter { 282251881Speter /* We are at the last chunk. Indicate EOF by setting curp == endp. */ 283251881Speter file->curp = file->endp; 284251881Speter } 285251881Speter else 286251881Speter { 287251881Speter /* There are still chunks left. Read next chunk and reset pointers. */ 288251881Speter file->chunk++; 289251881Speter length = file->chunk == last_chunk ? 290251881Speter offset_in_chunk(file->size) : CHUNK_SIZE; 291289180Speter SVN_ERR(read_chunk(file->file, file->buffer, 292251881Speter length, chunk_to_offset(file->chunk), 293251881Speter pool)); 294251881Speter file->endp = file->buffer + length; 295251881Speter file->curp = file->buffer; 296251881Speter } 297251881Speter 298251881Speter return SVN_NO_ERROR; 299251881Speter} 300251881Speter 301251881Speter 302251881Speterstatic svn_error_t * 303251881Speterdecrement_chunk(struct file_info *file, apr_pool_t *pool) 304251881Speter{ 305251881Speter if (file->chunk == 0) 306251881Speter { 307251881Speter /* We are already at the first chunk. Indicate BOF (Beginning Of File) 308251881Speter by setting chunk = -1 and curp = endp - 1. Both conditions are 309251881Speter important. They help the increment step to catch the BOF situation 310251881Speter in an efficient way. */ 311251881Speter file->chunk--; 312251881Speter file->curp = file->endp - 1; 313251881Speter } 314251881Speter else 315251881Speter { 316251881Speter /* Read previous chunk and reset pointers. */ 317251881Speter file->chunk--; 318289180Speter SVN_ERR(read_chunk(file->file, file->buffer, 319251881Speter CHUNK_SIZE, chunk_to_offset(file->chunk), 320251881Speter pool)); 321251881Speter file->endp = file->buffer + CHUNK_SIZE; 322251881Speter file->curp = file->endp - 1; 323251881Speter } 324251881Speter 325251881Speter return SVN_NO_ERROR; 326251881Speter} 327251881Speter 328251881Speter 329251881Speter/* Check whether one of the FILEs has its pointers 'before' the beginning of 330251881Speter * the file (this can happen while scanning backwards). This is the case if 331251881Speter * one of them has chunk == -1. */ 332251881Speterstatic svn_boolean_t 333251881Speteris_one_at_bof(struct file_info file[], apr_size_t file_len) 334251881Speter{ 335251881Speter apr_size_t i; 336251881Speter 337251881Speter for (i = 0; i < file_len; i++) 338251881Speter if (file[i].chunk == -1) 339251881Speter return TRUE; 340251881Speter 341251881Speter return FALSE; 342251881Speter} 343251881Speter 344251881Speter/* Check whether one of the FILEs has its pointers at EOF (this is the case if 345251881Speter * one of them has curp == endp (this can only happen at the last chunk)) */ 346251881Speterstatic svn_boolean_t 347251881Speteris_one_at_eof(struct file_info file[], apr_size_t file_len) 348251881Speter{ 349251881Speter apr_size_t i; 350251881Speter 351251881Speter for (i = 0; i < file_len; i++) 352251881Speter if (file[i].curp == file[i].endp) 353251881Speter return TRUE; 354251881Speter 355251881Speter return FALSE; 356251881Speter} 357251881Speter 358251881Speter/* Quickly determine whether there is a eol char in CHUNK. 359251881Speter * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start). 360251881Speter */ 361251881Speter 362251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK 363251881Speterstatic svn_boolean_t contains_eol(apr_uintptr_t chunk) 364251881Speter{ 365251881Speter apr_uintptr_t r_test = chunk ^ SVN__R_MASK; 366251881Speter apr_uintptr_t n_test = chunk ^ SVN__N_MASK; 367251881Speter 368251881Speter r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET; 369251881Speter n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET; 370251881Speter 371251881Speter return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET; 372251881Speter} 373251881Speter#endif 374251881Speter 375251881Speter/* Find the prefix which is identical between all elements of the FILE array. 376251881Speter * Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be 377251881Speter * set to TRUE if one of the FILEs reached its end while scanning prefix, 378251881Speter * i.e. at least one file consisted entirely of prefix. Otherwise, 379251881Speter * REACHED_ONE_EOF is set to FALSE. 380251881Speter * 381251881Speter * After this function is finished, the buffers, chunks, curp's and endp's 382251881Speter * of the FILEs are set to point at the first byte after the prefix. */ 383251881Speterstatic svn_error_t * 384251881Speterfind_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines, 385251881Speter struct file_info file[], apr_size_t file_len, 386251881Speter apr_pool_t *pool) 387251881Speter{ 388251881Speter svn_boolean_t had_cr = FALSE; 389251881Speter svn_boolean_t is_match; 390251881Speter apr_off_t lines = 0; 391251881Speter apr_size_t i; 392251881Speter 393251881Speter *reached_one_eof = FALSE; 394251881Speter 395251881Speter for (i = 1, is_match = TRUE; i < file_len; i++) 396251881Speter is_match = is_match && *file[0].curp == *file[i].curp; 397251881Speter while (is_match) 398251881Speter { 399251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK 400251881Speter apr_ssize_t max_delta, delta; 401251881Speter#endif /* SVN_UNALIGNED_ACCESS_IS_OK */ 402251881Speter 403251881Speter /* ### TODO: see if we can take advantage of 404251881Speter diff options like ignore_eol_style or ignore_space. */ 405251881Speter /* check for eol, and count */ 406251881Speter if (*file[0].curp == '\r') 407251881Speter { 408251881Speter lines++; 409251881Speter had_cr = TRUE; 410251881Speter } 411251881Speter else if (*file[0].curp == '\n' && !had_cr) 412251881Speter { 413251881Speter lines++; 414251881Speter } 415251881Speter else 416251881Speter { 417251881Speter had_cr = FALSE; 418251881Speter } 419251881Speter 420251881Speter INCREMENT_POINTERS(file, file_len, pool); 421251881Speter 422251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK 423251881Speter 424251881Speter /* Try to advance as far as possible with machine-word granularity. 425251881Speter * Determine how far we may advance with chunky ops without reaching 426251881Speter * endp for any of the files. 427251881Speter * Signedness is important here if curp gets close to endp. 428251881Speter */ 429251881Speter max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t); 430251881Speter for (i = 1; i < file_len; i++) 431251881Speter { 432251881Speter delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t); 433251881Speter if (delta < max_delta) 434251881Speter max_delta = delta; 435251881Speter } 436251881Speter 437251881Speter is_match = TRUE; 438251881Speter for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t)) 439251881Speter { 440251881Speter apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta); 441251881Speter if (contains_eol(chunk)) 442251881Speter break; 443251881Speter 444251881Speter for (i = 1; i < file_len; i++) 445251881Speter if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta)) 446251881Speter { 447251881Speter is_match = FALSE; 448251881Speter break; 449251881Speter } 450251881Speter 451251881Speter if (! is_match) 452251881Speter break; 453251881Speter } 454251881Speter 455251881Speter if (delta /* > 0*/) 456251881Speter { 457251881Speter /* We either found a mismatch or an EOL at or shortly behind curp+delta 458251881Speter * or we cannot proceed with chunky ops without exceeding endp. 459251881Speter * In any way, everything up to curp + delta is equal and not an EOL. 460251881Speter */ 461251881Speter for (i = 0; i < file_len; i++) 462251881Speter file[i].curp += delta; 463251881Speter 464251881Speter /* Skipped data without EOL markers, so last char was not a CR. */ 465251881Speter had_cr = FALSE; 466251881Speter } 467251881Speter#endif 468251881Speter 469251881Speter *reached_one_eof = is_one_at_eof(file, file_len); 470251881Speter if (*reached_one_eof) 471251881Speter break; 472251881Speter else 473251881Speter for (i = 1, is_match = TRUE; i < file_len; i++) 474251881Speter is_match = is_match && *file[0].curp == *file[i].curp; 475251881Speter } 476251881Speter 477251881Speter if (had_cr) 478251881Speter { 479251881Speter /* Check if we ended in the middle of a \r\n for one file, but \r for 480251881Speter another. If so, back up one byte, so the next loop will back up 481251881Speter the entire line. Also decrement lines, since we counted one 482251881Speter too many for the \r. */ 483251881Speter svn_boolean_t ended_at_nonmatching_newline = FALSE; 484251881Speter for (i = 0; i < file_len; i++) 485251881Speter if (file[i].curp < file[i].endp) 486251881Speter ended_at_nonmatching_newline = ended_at_nonmatching_newline 487251881Speter || *file[i].curp == '\n'; 488251881Speter if (ended_at_nonmatching_newline) 489251881Speter { 490251881Speter lines--; 491251881Speter DECREMENT_POINTERS(file, file_len, pool); 492251881Speter } 493251881Speter } 494251881Speter 495251881Speter /* Back up one byte, so we point at the last identical byte */ 496251881Speter DECREMENT_POINTERS(file, file_len, pool); 497251881Speter 498251881Speter /* Back up to the last eol sequence (\n, \r\n or \r) */ 499251881Speter while (!is_one_at_bof(file, file_len) && 500251881Speter *file[0].curp != '\n' && *file[0].curp != '\r') 501251881Speter DECREMENT_POINTERS(file, file_len, pool); 502251881Speter 503251881Speter /* Slide one byte forward, to point past the eol sequence */ 504251881Speter INCREMENT_POINTERS(file, file_len, pool); 505251881Speter 506251881Speter *prefix_lines = lines; 507251881Speter 508251881Speter return SVN_NO_ERROR; 509251881Speter} 510251881Speter 511251881Speter 512251881Speter/* The number of identical suffix lines to keep with the middle section. These 513251881Speter * lines are not eliminated as suffix, and can be picked up by the token 514251881Speter * parsing and lcs steps. This is mainly for backward compatibility with 515251881Speter * the previous diff (and blame) output (if there are multiple diff solutions, 516251881Speter * our lcs algorithm prefers taking common lines from the start, rather than 517251881Speter * from the end. By giving it back some suffix lines, we give it some wiggle 518251881Speter * room to find the exact same diff as before). 519251881Speter * 520251881Speter * The number 50 is more or less arbitrary, based on some real-world tests 521251881Speter * with big files (and then doubling the required number to be on the safe 522251881Speter * side). This has a negligible effect on the power of the optimization. */ 523251881Speter/* If you change this number, update test_identical_suffix() in diff-diff3-test.c */ 524251881Speter#ifndef SUFFIX_LINES_TO_KEEP 525251881Speter#define SUFFIX_LINES_TO_KEEP 50 526251881Speter#endif 527251881Speter 528251881Speter/* Find the suffix which is identical between all elements of the FILE array. 529251881Speter * Return the number of suffix lines in SUFFIX_LINES. 530251881Speter * 531251881Speter * Before this function is called the FILEs' pointers and chunks should be 532251881Speter * positioned right after the identical prefix (which is the case after 533251881Speter * find_identical_prefix), so we can determine where suffix scanning should 534251881Speter * ultimately stop. */ 535251881Speterstatic svn_error_t * 536251881Speterfind_identical_suffix(apr_off_t *suffix_lines, struct file_info file[], 537251881Speter apr_size_t file_len, apr_pool_t *pool) 538251881Speter{ 539251881Speter struct file_info file_for_suffix[4] = { { 0 } }; 540251881Speter apr_off_t length[4]; 541251881Speter apr_off_t suffix_min_chunk0; 542251881Speter apr_off_t suffix_min_offset0; 543251881Speter apr_off_t min_file_size; 544251881Speter int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP; 545251881Speter svn_boolean_t is_match; 546251881Speter apr_off_t lines = 0; 547251881Speter svn_boolean_t had_nl; 548251881Speter apr_size_t i; 549251881Speter 550251881Speter /* Initialize file_for_suffix[]. 551251881Speter Read last chunk, position curp at last byte. */ 552251881Speter for (i = 0; i < file_len; i++) 553251881Speter { 554251881Speter file_for_suffix[i].path = file[i].path; 555251881Speter file_for_suffix[i].file = file[i].file; 556251881Speter file_for_suffix[i].size = file[i].size; 557251881Speter file_for_suffix[i].chunk = 558251881Speter (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */ 559251881Speter length[i] = offset_in_chunk(file_for_suffix[i].size); 560251881Speter if (length[i] == 0) 561251881Speter { 562251881Speter /* last chunk is an empty chunk -> start at next-to-last chunk */ 563251881Speter file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1; 564251881Speter length[i] = CHUNK_SIZE; 565251881Speter } 566251881Speter 567251881Speter if (file_for_suffix[i].chunk == file[i].chunk) 568251881Speter { 569251881Speter /* Prefix ended in last chunk, so we can reuse the prefix buffer */ 570251881Speter file_for_suffix[i].buffer = file[i].buffer; 571251881Speter } 572251881Speter else 573251881Speter { 574251881Speter /* There is at least more than 1 chunk, 575251881Speter so allocate full chunk size buffer */ 576251881Speter file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE); 577289180Speter SVN_ERR(read_chunk(file_for_suffix[i].file, 578251881Speter file_for_suffix[i].buffer, length[i], 579251881Speter chunk_to_offset(file_for_suffix[i].chunk), 580251881Speter pool)); 581251881Speter } 582251881Speter file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i]; 583251881Speter file_for_suffix[i].curp = file_for_suffix[i].endp - 1; 584251881Speter } 585251881Speter 586251881Speter /* Get the chunk and pointer offset (for file[0]) at which we should stop 587251881Speter scanning backward for the identical suffix, i.e. when we reach prefix. */ 588251881Speter suffix_min_chunk0 = file[0].chunk; 589251881Speter suffix_min_offset0 = file[0].curp - file[0].buffer; 590251881Speter 591251881Speter /* Compensate if other files are smaller than file[0] */ 592251881Speter for (i = 1, min_file_size = file[0].size; i < file_len; i++) 593251881Speter if (file[i].size < min_file_size) 594251881Speter min_file_size = file[i].size; 595251881Speter if (file[0].size > min_file_size) 596251881Speter { 597251881Speter suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE; 598251881Speter suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE; 599251881Speter } 600251881Speter 601251881Speter /* Scan backwards until mismatch or until we reach the prefix. */ 602251881Speter for (i = 1, is_match = TRUE; i < file_len; i++) 603251881Speter is_match = is_match 604251881Speter && *file_for_suffix[0].curp == *file_for_suffix[i].curp; 605251881Speter if (is_match && *file_for_suffix[0].curp != '\r' 606251881Speter && *file_for_suffix[0].curp != '\n') 607251881Speter /* Count an extra line for the last line not ending in an eol. */ 608251881Speter lines++; 609251881Speter 610251881Speter had_nl = FALSE; 611251881Speter while (is_match) 612251881Speter { 613251881Speter svn_boolean_t reached_prefix; 614251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK 615251881Speter /* Initialize the minimum pointer positions. */ 616251881Speter const char *min_curp[4]; 617251881Speter svn_boolean_t can_read_word; 618251881Speter#endif /* SVN_UNALIGNED_ACCESS_IS_OK */ 619251881Speter 620251881Speter /* ### TODO: see if we can take advantage of 621251881Speter diff options like ignore_eol_style or ignore_space. */ 622251881Speter /* check for eol, and count */ 623251881Speter if (*file_for_suffix[0].curp == '\n') 624251881Speter { 625251881Speter lines++; 626251881Speter had_nl = TRUE; 627251881Speter } 628251881Speter else if (*file_for_suffix[0].curp == '\r' && !had_nl) 629251881Speter { 630251881Speter lines++; 631251881Speter } 632251881Speter else 633251881Speter { 634251881Speter had_nl = FALSE; 635251881Speter } 636251881Speter 637251881Speter DECREMENT_POINTERS(file_for_suffix, file_len, pool); 638251881Speter 639251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK 640251881Speter for (i = 0; i < file_len; i++) 641251881Speter min_curp[i] = file_for_suffix[i].buffer; 642251881Speter 643251881Speter /* If we are in the same chunk that contains the last part of the common 644251881Speter prefix, use the min_curp[0] pointer to make sure we don't get a 645251881Speter suffix that overlaps the already determined common prefix. */ 646251881Speter if (file_for_suffix[0].chunk == suffix_min_chunk0) 647251881Speter min_curp[0] += suffix_min_offset0; 648251881Speter 649251881Speter /* Scan quickly by reading with machine-word granularity. */ 650289180Speter for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++) 651289180Speter can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t)) 652289180Speter > min_curp[i]); 653289180Speter 654251881Speter while (can_read_word) 655251881Speter { 656251881Speter apr_uintptr_t chunk; 657251881Speter 658251881Speter /* For each file curp is positioned at the current byte, but we 659251881Speter want to examine the current byte and the ones before the current 660251881Speter location as one machine word. */ 661251881Speter 662251881Speter chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1 663251881Speter - sizeof(apr_uintptr_t)); 664251881Speter if (contains_eol(chunk)) 665251881Speter break; 666251881Speter 667289180Speter for (i = 1, is_match = TRUE; is_match && i < file_len; i++) 668289180Speter is_match = (chunk 669251881Speter == *(const apr_uintptr_t *) 670251881Speter (file_for_suffix[i].curp + 1 671251881Speter - sizeof(apr_uintptr_t))); 672251881Speter 673251881Speter if (! is_match) 674251881Speter break; 675251881Speter 676251881Speter for (i = 0; i < file_len; i++) 677251881Speter { 678251881Speter file_for_suffix[i].curp -= sizeof(apr_uintptr_t); 679251881Speter can_read_word = can_read_word 680251881Speter && ( (file_for_suffix[i].curp + 1 681251881Speter - sizeof(apr_uintptr_t)) 682251881Speter > min_curp[i]); 683251881Speter } 684251881Speter 685251881Speter /* We skipped some bytes, so there are no closing EOLs */ 686251881Speter had_nl = FALSE; 687251881Speter } 688251881Speter 689251881Speter /* The > min_curp[i] check leaves at least one final byte for checking 690251881Speter in the non block optimized case below. */ 691251881Speter#endif 692251881Speter 693251881Speter reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0 694251881Speter && (file_for_suffix[0].curp - file_for_suffix[0].buffer) 695251881Speter == suffix_min_offset0; 696251881Speter if (reached_prefix || is_one_at_bof(file_for_suffix, file_len)) 697251881Speter break; 698251881Speter 699251881Speter is_match = TRUE; 700251881Speter for (i = 1; i < file_len; i++) 701251881Speter is_match = is_match 702251881Speter && *file_for_suffix[0].curp == *file_for_suffix[i].curp; 703251881Speter } 704251881Speter 705251881Speter /* Slide one byte forward, to point at the first byte of identical suffix */ 706251881Speter INCREMENT_POINTERS(file_for_suffix, file_len, pool); 707251881Speter 708251881Speter /* Slide forward until we find an eol sequence to add the rest of the line 709251881Speter we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least 710251881Speter one file reaches its end. */ 711251881Speter do 712251881Speter { 713289180Speter svn_boolean_t had_cr = FALSE; 714251881Speter while (!is_one_at_eof(file_for_suffix, file_len) 715251881Speter && *file_for_suffix[0].curp != '\n' 716251881Speter && *file_for_suffix[0].curp != '\r') 717251881Speter INCREMENT_POINTERS(file_for_suffix, file_len, pool); 718251881Speter 719251881Speter /* Slide one or two more bytes, to point past the eol. */ 720251881Speter if (!is_one_at_eof(file_for_suffix, file_len) 721251881Speter && *file_for_suffix[0].curp == '\r') 722251881Speter { 723251881Speter lines--; 724251881Speter had_cr = TRUE; 725251881Speter INCREMENT_POINTERS(file_for_suffix, file_len, pool); 726251881Speter } 727251881Speter if (!is_one_at_eof(file_for_suffix, file_len) 728251881Speter && *file_for_suffix[0].curp == '\n') 729251881Speter { 730251881Speter if (!had_cr) 731251881Speter lines--; 732251881Speter INCREMENT_POINTERS(file_for_suffix, file_len, pool); 733251881Speter } 734251881Speter } 735251881Speter while (!is_one_at_eof(file_for_suffix, file_len) 736251881Speter && suffix_lines_to_keep--); 737251881Speter 738251881Speter if (is_one_at_eof(file_for_suffix, file_len)) 739251881Speter lines = 0; 740251881Speter 741251881Speter /* Save the final suffix information in the original file_info */ 742251881Speter for (i = 0; i < file_len; i++) 743251881Speter { 744251881Speter file[i].suffix_start_chunk = file_for_suffix[i].chunk; 745251881Speter file[i].suffix_offset_in_chunk = 746251881Speter file_for_suffix[i].curp - file_for_suffix[i].buffer; 747251881Speter } 748251881Speter 749251881Speter *suffix_lines = lines; 750251881Speter 751251881Speter return SVN_NO_ERROR; 752251881Speter} 753251881Speter 754251881Speter 755251881Speter/* Let FILE stand for the array of file_info struct elements of BATON->files 756251881Speter * that are indexed by the elements of the DATASOURCE array. 757251881Speter * BATON's type is (svn_diff__file_baton_t *). 758251881Speter * 759251881Speter * For each file in the FILE array, open the file at FILE.path; initialize 760251881Speter * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a 761251881Speter * buffer and read the first chunk. Then find the prefix and suffix lines 762251881Speter * which are identical between all the files. Return the number of identical 763251881Speter * prefix lines in PREFIX_LINES, and the number of identical suffix lines in 764251881Speter * SUFFIX_LINES. 765251881Speter * 766251881Speter * Finding the identical prefix and suffix allows us to exclude those from the 767251881Speter * rest of the diff algorithm, which increases performance by reducing the 768251881Speter * problem space. 769251881Speter * 770251881Speter * Implements svn_diff_fns2_t::datasources_open. */ 771251881Speterstatic svn_error_t * 772251881Speterdatasources_open(void *baton, 773251881Speter apr_off_t *prefix_lines, 774251881Speter apr_off_t *suffix_lines, 775251881Speter const svn_diff_datasource_e *datasources, 776251881Speter apr_size_t datasources_len) 777251881Speter{ 778251881Speter svn_diff__file_baton_t *file_baton = baton; 779251881Speter struct file_info files[4]; 780251881Speter apr_finfo_t finfo[4]; 781251881Speter apr_off_t length[4]; 782251881Speter#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING 783251881Speter svn_boolean_t reached_one_eof; 784251881Speter#endif 785251881Speter apr_size_t i; 786251881Speter 787251881Speter /* Make sure prefix_lines and suffix_lines are set correctly, even if we 788251881Speter * exit early because one of the files is empty. */ 789251881Speter *prefix_lines = 0; 790251881Speter *suffix_lines = 0; 791251881Speter 792251881Speter /* Open datasources and read first chunk */ 793251881Speter for (i = 0; i < datasources_len; i++) 794251881Speter { 795251881Speter struct file_info *file 796251881Speter = &file_baton->files[datasource_to_index(datasources[i])]; 797251881Speter SVN_ERR(svn_io_file_open(&file->file, file->path, 798251881Speter APR_READ, APR_OS_DEFAULT, file_baton->pool)); 799251881Speter SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE, 800251881Speter file->file, file_baton->pool)); 801251881Speter file->size = finfo[i].size; 802251881Speter length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size; 803251881Speter file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]); 804289180Speter SVN_ERR(read_chunk(file->file, file->buffer, 805251881Speter length[i], 0, file_baton->pool)); 806251881Speter file->endp = file->buffer + length[i]; 807251881Speter file->curp = file->buffer; 808251881Speter /* Set suffix_start_chunk to a guard value, so if suffix scanning is 809251881Speter * skipped because one of the files is empty, or because of 810251881Speter * reached_one_eof, we can still easily check for the suffix during 811251881Speter * token reading (datasource_get_next_token). */ 812251881Speter file->suffix_start_chunk = -1; 813251881Speter 814251881Speter files[i] = *file; 815251881Speter } 816251881Speter 817251881Speter for (i = 0; i < datasources_len; i++) 818251881Speter if (length[i] == 0) 819251881Speter /* There will not be any identical prefix/suffix, so we're done. */ 820251881Speter return SVN_NO_ERROR; 821251881Speter 822251881Speter#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING 823251881Speter 824251881Speter SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines, 825251881Speter files, datasources_len, file_baton->pool)); 826251881Speter 827251881Speter if (!reached_one_eof) 828251881Speter /* No file consisted totally of identical prefix, 829251881Speter * so there may be some identical suffix. */ 830251881Speter SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len, 831251881Speter file_baton->pool)); 832251881Speter 833251881Speter#endif 834251881Speter 835251881Speter /* Copy local results back to baton. */ 836251881Speter for (i = 0; i < datasources_len; i++) 837251881Speter file_baton->files[datasource_to_index(datasources[i])] = files[i]; 838251881Speter 839251881Speter return SVN_NO_ERROR; 840251881Speter} 841251881Speter 842251881Speter 843251881Speter/* Implements svn_diff_fns2_t::datasource_close */ 844251881Speterstatic svn_error_t * 845251881Speterdatasource_close(void *baton, svn_diff_datasource_e datasource) 846251881Speter{ 847251881Speter /* Do nothing. The compare_token function needs previous datasources 848251881Speter * to stay available until all datasources are processed. 849251881Speter */ 850251881Speter 851251881Speter return SVN_NO_ERROR; 852251881Speter} 853251881Speter 854251881Speter/* Implements svn_diff_fns2_t::datasource_get_next_token */ 855251881Speterstatic svn_error_t * 856251881Speterdatasource_get_next_token(apr_uint32_t *hash, void **token, void *baton, 857251881Speter svn_diff_datasource_e datasource) 858251881Speter{ 859251881Speter svn_diff__file_baton_t *file_baton = baton; 860251881Speter svn_diff__file_token_t *file_token; 861251881Speter struct file_info *file = &file_baton->files[datasource_to_index(datasource)]; 862251881Speter char *endp; 863251881Speter char *curp; 864251881Speter char *eol; 865251881Speter apr_off_t last_chunk; 866251881Speter apr_off_t length; 867251881Speter apr_uint32_t h = 0; 868251881Speter /* Did the last chunk end in a CR character? */ 869251881Speter svn_boolean_t had_cr = FALSE; 870251881Speter 871251881Speter *token = NULL; 872251881Speter 873251881Speter curp = file->curp; 874251881Speter endp = file->endp; 875251881Speter 876251881Speter last_chunk = offset_to_chunk(file->size); 877251881Speter 878251881Speter /* Are we already at the end of a chunk? */ 879251881Speter if (curp == endp) 880251881Speter { 881251881Speter /* Are we at EOF */ 882251881Speter if (last_chunk == file->chunk) 883251881Speter return SVN_NO_ERROR; /* EOF */ 884251881Speter 885251881Speter /* Or right before an identical suffix in the next chunk? */ 886251881Speter if (file->chunk + 1 == file->suffix_start_chunk 887251881Speter && file->suffix_offset_in_chunk == 0) 888251881Speter return SVN_NO_ERROR; 889251881Speter } 890251881Speter 891251881Speter /* Stop when we encounter the identical suffix. If suffix scanning was not 892251881Speter * performed, suffix_start_chunk will be -1, so this condition will never 893251881Speter * be true. */ 894251881Speter if (file->chunk == file->suffix_start_chunk 895251881Speter && (curp - file->buffer) == file->suffix_offset_in_chunk) 896251881Speter return SVN_NO_ERROR; 897251881Speter 898251881Speter /* Allocate a new token, or fetch one from the "reusable tokens" list. */ 899251881Speter file_token = file_baton->tokens; 900251881Speter if (file_token) 901251881Speter { 902251881Speter file_baton->tokens = file_token->next; 903251881Speter } 904251881Speter else 905251881Speter { 906251881Speter file_token = apr_palloc(file_baton->pool, sizeof(*file_token)); 907251881Speter } 908251881Speter 909251881Speter file_token->datasource = datasource; 910251881Speter file_token->offset = chunk_to_offset(file->chunk) 911251881Speter + (curp - file->buffer); 912251881Speter file_token->norm_offset = file_token->offset; 913251881Speter file_token->raw_length = 0; 914251881Speter file_token->length = 0; 915251881Speter 916251881Speter while (1) 917251881Speter { 918251881Speter eol = svn_eol__find_eol_start(curp, endp - curp); 919251881Speter if (eol) 920251881Speter { 921251881Speter had_cr = (*eol == '\r'); 922251881Speter eol++; 923251881Speter /* If we have the whole eol sequence in the chunk... */ 924251881Speter if (!(had_cr && eol == endp)) 925251881Speter { 926251881Speter /* Also skip past the '\n' in an '\r\n' sequence. */ 927251881Speter if (had_cr && *eol == '\n') 928251881Speter eol++; 929251881Speter break; 930251881Speter } 931251881Speter } 932251881Speter 933251881Speter if (file->chunk == last_chunk) 934251881Speter { 935251881Speter eol = endp; 936251881Speter break; 937251881Speter } 938251881Speter 939251881Speter length = endp - curp; 940251881Speter file_token->raw_length += length; 941251881Speter { 942251881Speter char *c = curp; 943251881Speter 944251881Speter svn_diff__normalize_buffer(&c, &length, 945251881Speter &file->normalize_state, 946251881Speter curp, file_baton->options); 947251881Speter if (file_token->length == 0) 948251881Speter { 949251881Speter /* When we are reading the first part of the token, move the 950251881Speter normalized offset past leading ignored characters, if any. */ 951251881Speter file_token->norm_offset += (c - curp); 952251881Speter } 953251881Speter file_token->length += length; 954251881Speter h = svn__adler32(h, c, length); 955251881Speter } 956251881Speter 957251881Speter curp = endp = file->buffer; 958251881Speter file->chunk++; 959251881Speter length = file->chunk == last_chunk ? 960251881Speter offset_in_chunk(file->size) : CHUNK_SIZE; 961251881Speter endp += length; 962251881Speter file->endp = endp; 963251881Speter 964251881Speter /* Issue #4283: Normally we should have checked for reaching the skipped 965251881Speter suffix here, but because we assume that a suffix always starts on a 966251881Speter line and token boundary we rely on catching the suffix earlier in this 967251881Speter function. 968251881Speter 969251881Speter When changing things here, make sure the whitespace settings are 970289180Speter applied, or we might not reach the exact suffix boundary as token 971251881Speter boundary. */ 972289180Speter SVN_ERR(read_chunk(file->file, 973251881Speter curp, length, 974251881Speter chunk_to_offset(file->chunk), 975251881Speter file_baton->pool)); 976251881Speter 977251881Speter /* If the last chunk ended in a CR, we're done. */ 978251881Speter if (had_cr) 979251881Speter { 980251881Speter eol = curp; 981251881Speter if (*curp == '\n') 982251881Speter ++eol; 983251881Speter break; 984251881Speter } 985251881Speter } 986251881Speter 987251881Speter length = eol - curp; 988251881Speter file_token->raw_length += length; 989251881Speter file->curp = eol; 990251881Speter 991251881Speter /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up 992251881Speter * with a spurious empty token. Avoid returning it. 993251881Speter * Note that we use the unnormalized length; we don't want a line containing 994251881Speter * only spaces (and no trailing newline) to appear like a non-existent 995251881Speter * line. */ 996251881Speter if (file_token->raw_length > 0) 997251881Speter { 998251881Speter char *c = curp; 999251881Speter svn_diff__normalize_buffer(&c, &length, 1000251881Speter &file->normalize_state, 1001251881Speter curp, file_baton->options); 1002251881Speter if (file_token->length == 0) 1003251881Speter { 1004251881Speter /* When we are reading the first part of the token, move the 1005251881Speter normalized offset past leading ignored characters, if any. */ 1006251881Speter file_token->norm_offset += (c - curp); 1007251881Speter } 1008251881Speter 1009251881Speter file_token->length += length; 1010251881Speter 1011251881Speter *hash = svn__adler32(h, c, length); 1012251881Speter *token = file_token; 1013251881Speter } 1014251881Speter 1015251881Speter return SVN_NO_ERROR; 1016251881Speter} 1017251881Speter 1018251881Speter#define COMPARE_CHUNK_SIZE 4096 1019251881Speter 1020251881Speter/* Implements svn_diff_fns2_t::token_compare */ 1021251881Speterstatic svn_error_t * 1022251881Spetertoken_compare(void *baton, void *token1, void *token2, int *compare) 1023251881Speter{ 1024251881Speter svn_diff__file_baton_t *file_baton = baton; 1025251881Speter svn_diff__file_token_t *file_token[2]; 1026251881Speter char buffer[2][COMPARE_CHUNK_SIZE]; 1027251881Speter char *bufp[2]; 1028251881Speter apr_off_t offset[2]; 1029251881Speter struct file_info *file[2]; 1030251881Speter apr_off_t length[2]; 1031251881Speter apr_off_t total_length; 1032251881Speter /* How much is left to read of each token from the file. */ 1033251881Speter apr_off_t raw_length[2]; 1034251881Speter int i; 1035251881Speter svn_diff__normalize_state_t state[2]; 1036251881Speter 1037251881Speter file_token[0] = token1; 1038251881Speter file_token[1] = token2; 1039251881Speter if (file_token[0]->length < file_token[1]->length) 1040251881Speter { 1041251881Speter *compare = -1; 1042251881Speter return SVN_NO_ERROR; 1043251881Speter } 1044251881Speter 1045251881Speter if (file_token[0]->length > file_token[1]->length) 1046251881Speter { 1047251881Speter *compare = 1; 1048251881Speter return SVN_NO_ERROR; 1049251881Speter } 1050251881Speter 1051251881Speter total_length = file_token[0]->length; 1052251881Speter if (total_length == 0) 1053251881Speter { 1054251881Speter *compare = 0; 1055251881Speter return SVN_NO_ERROR; 1056251881Speter } 1057251881Speter 1058251881Speter for (i = 0; i < 2; ++i) 1059251881Speter { 1060251881Speter int idx = datasource_to_index(file_token[i]->datasource); 1061251881Speter 1062251881Speter file[i] = &file_baton->files[idx]; 1063251881Speter offset[i] = file_token[i]->norm_offset; 1064251881Speter state[i] = svn_diff__normalize_state_normal; 1065251881Speter 1066251881Speter if (offset_to_chunk(offset[i]) == file[i]->chunk) 1067251881Speter { 1068251881Speter /* If the start of the token is in memory, the entire token is 1069251881Speter * in memory. 1070251881Speter */ 1071251881Speter bufp[i] = file[i]->buffer; 1072251881Speter bufp[i] += offset_in_chunk(offset[i]); 1073251881Speter 1074251881Speter length[i] = total_length; 1075251881Speter raw_length[i] = 0; 1076251881Speter } 1077251881Speter else 1078251881Speter { 1079251881Speter apr_off_t skipped; 1080251881Speter 1081251881Speter length[i] = 0; 1082251881Speter 1083251881Speter /* When we skipped the first part of the token via the whitespace 1084251881Speter normalization we must reduce the raw length of the token */ 1085251881Speter skipped = (file_token[i]->norm_offset - file_token[i]->offset); 1086251881Speter 1087251881Speter raw_length[i] = file_token[i]->raw_length - skipped; 1088251881Speter } 1089251881Speter } 1090251881Speter 1091251881Speter do 1092251881Speter { 1093251881Speter apr_off_t len; 1094251881Speter for (i = 0; i < 2; i++) 1095251881Speter { 1096251881Speter if (length[i] == 0) 1097251881Speter { 1098251881Speter /* Error if raw_length is 0, that's an unexpected change 1099251881Speter * of the file that can happen when ingoring whitespace 1100251881Speter * and that can lead to an infinite loop. */ 1101251881Speter if (raw_length[i] == 0) 1102251881Speter return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED, 1103251881Speter NULL, 1104251881Speter _("The file '%s' changed unexpectedly" 1105251881Speter " during diff"), 1106251881Speter file[i]->path); 1107251881Speter 1108251881Speter /* Read a chunk from disk into a buffer */ 1109251881Speter bufp[i] = buffer[i]; 1110251881Speter length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ? 1111251881Speter COMPARE_CHUNK_SIZE : raw_length[i]; 1112251881Speter 1113251881Speter SVN_ERR(read_chunk(file[i]->file, 1114251881Speter bufp[i], length[i], offset[i], 1115251881Speter file_baton->pool)); 1116251881Speter offset[i] += length[i]; 1117251881Speter raw_length[i] -= length[i]; 1118251881Speter /* bufp[i] gets reset to buffer[i] before reading each chunk, 1119251881Speter so, overwriting it isn't a problem */ 1120251881Speter svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i], 1121251881Speter bufp[i], file_baton->options); 1122251881Speter 1123251881Speter /* assert(length[i] == file_token[i]->length); */ 1124251881Speter } 1125251881Speter } 1126251881Speter 1127251881Speter len = length[0] > length[1] ? length[1] : length[0]; 1128251881Speter 1129251881Speter /* Compare two chunks (that could be entire tokens if they both reside 1130251881Speter * in memory). 1131251881Speter */ 1132251881Speter *compare = memcmp(bufp[0], bufp[1], (size_t) len); 1133251881Speter if (*compare != 0) 1134251881Speter return SVN_NO_ERROR; 1135251881Speter 1136251881Speter total_length -= len; 1137251881Speter length[0] -= len; 1138251881Speter length[1] -= len; 1139251881Speter bufp[0] += len; 1140251881Speter bufp[1] += len; 1141251881Speter } 1142251881Speter while(total_length > 0); 1143251881Speter 1144251881Speter *compare = 0; 1145251881Speter return SVN_NO_ERROR; 1146251881Speter} 1147251881Speter 1148251881Speter 1149251881Speter/* Implements svn_diff_fns2_t::token_discard */ 1150251881Speterstatic void 1151251881Spetertoken_discard(void *baton, void *token) 1152251881Speter{ 1153251881Speter svn_diff__file_baton_t *file_baton = baton; 1154251881Speter svn_diff__file_token_t *file_token = token; 1155251881Speter 1156251881Speter /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */ 1157251881Speter file_token->next = file_baton->tokens; 1158251881Speter file_baton->tokens = file_token; 1159251881Speter} 1160251881Speter 1161251881Speter 1162251881Speter/* Implements svn_diff_fns2_t::token_discard_all */ 1163251881Speterstatic void 1164251881Spetertoken_discard_all(void *baton) 1165251881Speter{ 1166251881Speter svn_diff__file_baton_t *file_baton = baton; 1167251881Speter 1168251881Speter /* Discard all memory in use by the tokens, and close all open files. */ 1169251881Speter svn_pool_clear(file_baton->pool); 1170251881Speter} 1171251881Speter 1172251881Speter 1173251881Speterstatic const svn_diff_fns2_t svn_diff__file_vtable = 1174251881Speter{ 1175251881Speter datasources_open, 1176251881Speter datasource_close, 1177251881Speter datasource_get_next_token, 1178251881Speter token_compare, 1179251881Speter token_discard, 1180251881Speter token_discard_all 1181251881Speter}; 1182251881Speter 1183251881Speter/* Id for the --ignore-eol-style option, which doesn't have a short name. */ 1184251881Speter#define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256 1185251881Speter 1186251881Speter/* Options supported by svn_diff_file_options_parse(). */ 1187251881Speterstatic const apr_getopt_option_t diff_options[] = 1188251881Speter{ 1189251881Speter { "ignore-space-change", 'b', 0, NULL }, 1190251881Speter { "ignore-all-space", 'w', 0, NULL }, 1191251881Speter { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL }, 1192251881Speter { "show-c-function", 'p', 0, NULL }, 1193251881Speter /* ### For compatibility; we don't support the argument to -u, because 1194251881Speter * ### we don't have optional argument support. */ 1195251881Speter { "unified", 'u', 0, NULL }, 1196289180Speter { "context", 'U', 1, NULL }, 1197251881Speter { NULL, 0, 0, NULL } 1198251881Speter}; 1199251881Speter 1200251881Spetersvn_diff_file_options_t * 1201251881Spetersvn_diff_file_options_create(apr_pool_t *pool) 1202251881Speter{ 1203289180Speter svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts)); 1204289180Speter 1205289180Speter opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE; 1206289180Speter 1207289180Speter return opts; 1208251881Speter} 1209251881Speter 1210251881Speter/* A baton for use with opt_parsing_error_func(). */ 1211251881Speterstruct opt_parsing_error_baton_t 1212251881Speter{ 1213251881Speter svn_error_t *err; 1214251881Speter apr_pool_t *pool; 1215251881Speter}; 1216251881Speter 1217251881Speter/* Store an error message from apr_getopt_long(). Set BATON->err to a new 1218251881Speter * error with a message generated from FMT and the remaining arguments. 1219251881Speter * Implements apr_getopt_err_fn_t. */ 1220251881Speterstatic void 1221251881Speteropt_parsing_error_func(void *baton, 1222251881Speter const char *fmt, ...) 1223251881Speter{ 1224251881Speter struct opt_parsing_error_baton_t *b = baton; 1225251881Speter const char *message; 1226251881Speter va_list ap; 1227251881Speter 1228251881Speter va_start(ap, fmt); 1229251881Speter message = apr_pvsprintf(b->pool, fmt, ap); 1230251881Speter va_end(ap); 1231251881Speter 1232251881Speter /* Skip leading ": " (if present, which it always is in known cases). */ 1233251881Speter if (strncmp(message, ": ", 2) == 0) 1234251881Speter message += 2; 1235251881Speter 1236251881Speter b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message); 1237251881Speter} 1238251881Speter 1239251881Spetersvn_error_t * 1240251881Spetersvn_diff_file_options_parse(svn_diff_file_options_t *options, 1241251881Speter const apr_array_header_t *args, 1242251881Speter apr_pool_t *pool) 1243251881Speter{ 1244251881Speter apr_getopt_t *os; 1245251881Speter struct opt_parsing_error_baton_t opt_parsing_error_baton; 1246251881Speter /* Make room for each option (starting at index 1) plus trailing NULL. */ 1247251881Speter const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2)); 1248251881Speter 1249251881Speter opt_parsing_error_baton.err = NULL; 1250251881Speter opt_parsing_error_baton.pool = pool; 1251251881Speter 1252251881Speter argv[0] = ""; 1253289180Speter memcpy(argv + 1, args->elts, sizeof(char*) * args->nelts); 1254251881Speter argv[args->nelts + 1] = NULL; 1255251881Speter 1256251881Speter apr_getopt_init(&os, pool, args->nelts + 1, argv); 1257251881Speter 1258251881Speter /* Capture any error message from apr_getopt_long(). This will typically 1259251881Speter * say which option is wrong, which we would not otherwise know. */ 1260251881Speter os->errfn = opt_parsing_error_func; 1261251881Speter os->errarg = &opt_parsing_error_baton; 1262251881Speter 1263251881Speter while (1) 1264251881Speter { 1265251881Speter const char *opt_arg; 1266251881Speter int opt_id; 1267251881Speter apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg); 1268251881Speter 1269251881Speter if (APR_STATUS_IS_EOF(err)) 1270251881Speter break; 1271251881Speter if (err) 1272251881Speter /* Wrap apr_getopt_long()'s error message. Its doc string implies 1273251881Speter * it always will produce one, but never mind if it doesn't. Avoid 1274251881Speter * using the message associated with the return code ERR, because 1275251881Speter * it refers to the "command line" which may be misleading here. */ 1276251881Speter return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, 1277251881Speter opt_parsing_error_baton.err, 1278251881Speter _("Error in options to internal diff")); 1279251881Speter 1280251881Speter switch (opt_id) 1281251881Speter { 1282251881Speter case 'b': 1283251881Speter /* -w takes precedence over -b. */ 1284251881Speter if (! options->ignore_space) 1285251881Speter options->ignore_space = svn_diff_file_ignore_space_change; 1286251881Speter break; 1287251881Speter case 'w': 1288251881Speter options->ignore_space = svn_diff_file_ignore_space_all; 1289251881Speter break; 1290251881Speter case SVN_DIFF__OPT_IGNORE_EOL_STYLE: 1291251881Speter options->ignore_eol_style = TRUE; 1292251881Speter break; 1293251881Speter case 'p': 1294251881Speter options->show_c_function = TRUE; 1295251881Speter break; 1296289180Speter case 'U': 1297289180Speter SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg)); 1298289180Speter break; 1299251881Speter default: 1300251881Speter break; 1301251881Speter } 1302251881Speter } 1303251881Speter 1304251881Speter /* Check for spurious arguments. */ 1305251881Speter if (os->ind < os->argc) 1306251881Speter return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL, 1307251881Speter _("Invalid argument '%s' in diff options"), 1308251881Speter os->argv[os->ind]); 1309251881Speter 1310251881Speter return SVN_NO_ERROR; 1311251881Speter} 1312251881Speter 1313251881Spetersvn_error_t * 1314251881Spetersvn_diff_file_diff_2(svn_diff_t **diff, 1315251881Speter const char *original, 1316251881Speter const char *modified, 1317251881Speter const svn_diff_file_options_t *options, 1318251881Speter apr_pool_t *pool) 1319251881Speter{ 1320251881Speter svn_diff__file_baton_t baton = { 0 }; 1321251881Speter 1322251881Speter baton.options = options; 1323251881Speter baton.files[0].path = original; 1324251881Speter baton.files[1].path = modified; 1325251881Speter baton.pool = svn_pool_create(pool); 1326251881Speter 1327251881Speter SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool)); 1328251881Speter 1329251881Speter svn_pool_destroy(baton.pool); 1330251881Speter return SVN_NO_ERROR; 1331251881Speter} 1332251881Speter 1333251881Spetersvn_error_t * 1334251881Spetersvn_diff_file_diff3_2(svn_diff_t **diff, 1335251881Speter const char *original, 1336251881Speter const char *modified, 1337251881Speter const char *latest, 1338251881Speter const svn_diff_file_options_t *options, 1339251881Speter apr_pool_t *pool) 1340251881Speter{ 1341251881Speter svn_diff__file_baton_t baton = { 0 }; 1342251881Speter 1343251881Speter baton.options = options; 1344251881Speter baton.files[0].path = original; 1345251881Speter baton.files[1].path = modified; 1346251881Speter baton.files[2].path = latest; 1347251881Speter baton.pool = svn_pool_create(pool); 1348251881Speter 1349251881Speter SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool)); 1350251881Speter 1351251881Speter svn_pool_destroy(baton.pool); 1352251881Speter return SVN_NO_ERROR; 1353251881Speter} 1354251881Speter 1355251881Spetersvn_error_t * 1356251881Spetersvn_diff_file_diff4_2(svn_diff_t **diff, 1357251881Speter const char *original, 1358251881Speter const char *modified, 1359251881Speter const char *latest, 1360251881Speter const char *ancestor, 1361251881Speter const svn_diff_file_options_t *options, 1362251881Speter apr_pool_t *pool) 1363251881Speter{ 1364251881Speter svn_diff__file_baton_t baton = { 0 }; 1365251881Speter 1366251881Speter baton.options = options; 1367251881Speter baton.files[0].path = original; 1368251881Speter baton.files[1].path = modified; 1369251881Speter baton.files[2].path = latest; 1370251881Speter baton.files[3].path = ancestor; 1371251881Speter baton.pool = svn_pool_create(pool); 1372251881Speter 1373251881Speter SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool)); 1374251881Speter 1375251881Speter svn_pool_destroy(baton.pool); 1376251881Speter return SVN_NO_ERROR; 1377251881Speter} 1378251881Speter 1379251881Speter 1380251881Speter/** Display unified context diffs **/ 1381251881Speter 1382251881Speter/* Maximum length of the extra context to show when show_c_function is set. 1383251881Speter * GNU diff uses 40, let's be brave and use 50 instead. */ 1384251881Speter#define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50 1385251881Spetertypedef struct svn_diff__file_output_baton_t 1386251881Speter{ 1387251881Speter svn_stream_t *output_stream; 1388251881Speter const char *header_encoding; 1389251881Speter 1390251881Speter /* Cached markers, in header_encoding. */ 1391251881Speter const char *context_str; 1392251881Speter const char *delete_str; 1393251881Speter const char *insert_str; 1394251881Speter 1395251881Speter const char *path[2]; 1396251881Speter apr_file_t *file[2]; 1397251881Speter 1398251881Speter apr_off_t current_line[2]; 1399251881Speter 1400251881Speter char buffer[2][4096]; 1401251881Speter apr_size_t length[2]; 1402251881Speter char *curp[2]; 1403251881Speter 1404251881Speter apr_off_t hunk_start[2]; 1405251881Speter apr_off_t hunk_length[2]; 1406251881Speter svn_stringbuf_t *hunk; 1407251881Speter 1408251881Speter /* Should we emit C functions in the unified diff header */ 1409251881Speter svn_boolean_t show_c_function; 1410251881Speter /* Extra strings to skip over if we match. */ 1411251881Speter apr_array_header_t *extra_skip_match; 1412251881Speter /* "Context" to append to the @@ line when the show_c_function option 1413251881Speter * is set. */ 1414251881Speter svn_stringbuf_t *extra_context; 1415251881Speter /* Extra context for the current hunk. */ 1416251881Speter char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1]; 1417251881Speter 1418289180Speter int context_size; 1419289180Speter 1420251881Speter apr_pool_t *pool; 1421251881Speter} svn_diff__file_output_baton_t; 1422251881Speter 1423251881Spetertypedef enum svn_diff__file_output_unified_type_e 1424251881Speter{ 1425251881Speter svn_diff__file_output_unified_skip, 1426251881Speter svn_diff__file_output_unified_context, 1427251881Speter svn_diff__file_output_unified_delete, 1428251881Speter svn_diff__file_output_unified_insert 1429251881Speter} svn_diff__file_output_unified_type_e; 1430251881Speter 1431251881Speter 1432251881Speterstatic svn_error_t * 1433251881Speteroutput_unified_line(svn_diff__file_output_baton_t *baton, 1434251881Speter svn_diff__file_output_unified_type_e type, int idx) 1435251881Speter{ 1436251881Speter char *curp; 1437251881Speter char *eol; 1438251881Speter apr_size_t length; 1439251881Speter svn_error_t *err; 1440251881Speter svn_boolean_t bytes_processed = FALSE; 1441251881Speter svn_boolean_t had_cr = FALSE; 1442251881Speter /* Are we collecting extra context? */ 1443251881Speter svn_boolean_t collect_extra = FALSE; 1444251881Speter 1445251881Speter length = baton->length[idx]; 1446251881Speter curp = baton->curp[idx]; 1447251881Speter 1448251881Speter /* Lazily update the current line even if we're at EOF. 1449251881Speter * This way we fake output of context at EOF 1450251881Speter */ 1451251881Speter baton->current_line[idx]++; 1452251881Speter 1453251881Speter if (length == 0 && apr_file_eof(baton->file[idx])) 1454251881Speter { 1455251881Speter return SVN_NO_ERROR; 1456251881Speter } 1457251881Speter 1458251881Speter do 1459251881Speter { 1460251881Speter if (length > 0) 1461251881Speter { 1462251881Speter if (!bytes_processed) 1463251881Speter { 1464251881Speter switch (type) 1465251881Speter { 1466251881Speter case svn_diff__file_output_unified_context: 1467251881Speter svn_stringbuf_appendcstr(baton->hunk, baton->context_str); 1468251881Speter baton->hunk_length[0]++; 1469251881Speter baton->hunk_length[1]++; 1470251881Speter break; 1471251881Speter case svn_diff__file_output_unified_delete: 1472251881Speter svn_stringbuf_appendcstr(baton->hunk, baton->delete_str); 1473251881Speter baton->hunk_length[0]++; 1474251881Speter break; 1475251881Speter case svn_diff__file_output_unified_insert: 1476251881Speter svn_stringbuf_appendcstr(baton->hunk, baton->insert_str); 1477251881Speter baton->hunk_length[1]++; 1478251881Speter break; 1479251881Speter default: 1480251881Speter break; 1481251881Speter } 1482251881Speter 1483251881Speter if (baton->show_c_function 1484251881Speter && (type == svn_diff__file_output_unified_skip 1485251881Speter || type == svn_diff__file_output_unified_context) 1486251881Speter && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_') 1487251881Speter && !svn_cstring_match_glob_list(curp, 1488251881Speter baton->extra_skip_match)) 1489251881Speter { 1490251881Speter svn_stringbuf_setempty(baton->extra_context); 1491251881Speter collect_extra = TRUE; 1492251881Speter } 1493251881Speter } 1494251881Speter 1495251881Speter eol = svn_eol__find_eol_start(curp, length); 1496251881Speter 1497251881Speter if (eol != NULL) 1498251881Speter { 1499251881Speter apr_size_t len; 1500251881Speter 1501251881Speter had_cr = (*eol == '\r'); 1502251881Speter eol++; 1503251881Speter len = (apr_size_t)(eol - curp); 1504251881Speter 1505251881Speter if (! had_cr || len < length) 1506251881Speter { 1507251881Speter if (had_cr && *eol == '\n') 1508251881Speter { 1509251881Speter ++eol; 1510251881Speter ++len; 1511251881Speter } 1512251881Speter 1513251881Speter length -= len; 1514251881Speter 1515251881Speter if (type != svn_diff__file_output_unified_skip) 1516251881Speter { 1517251881Speter svn_stringbuf_appendbytes(baton->hunk, curp, len); 1518251881Speter } 1519251881Speter if (collect_extra) 1520251881Speter { 1521251881Speter svn_stringbuf_appendbytes(baton->extra_context, 1522251881Speter curp, len); 1523251881Speter } 1524251881Speter 1525251881Speter baton->curp[idx] = eol; 1526251881Speter baton->length[idx] = length; 1527251881Speter 1528251881Speter err = SVN_NO_ERROR; 1529251881Speter 1530251881Speter break; 1531251881Speter } 1532251881Speter } 1533251881Speter 1534251881Speter if (type != svn_diff__file_output_unified_skip) 1535251881Speter { 1536251881Speter svn_stringbuf_appendbytes(baton->hunk, curp, length); 1537251881Speter } 1538251881Speter 1539251881Speter if (collect_extra) 1540251881Speter { 1541251881Speter svn_stringbuf_appendbytes(baton->extra_context, curp, length); 1542251881Speter } 1543251881Speter 1544251881Speter bytes_processed = TRUE; 1545251881Speter } 1546251881Speter 1547251881Speter curp = baton->buffer[idx]; 1548251881Speter length = sizeof(baton->buffer[idx]); 1549251881Speter 1550251881Speter err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool); 1551251881Speter 1552251881Speter /* If the last chunk ended with a CR, we look for an LF at the start 1553251881Speter of this chunk. */ 1554251881Speter if (had_cr) 1555251881Speter { 1556251881Speter if (! err && length > 0 && *curp == '\n') 1557251881Speter { 1558251881Speter if (type != svn_diff__file_output_unified_skip) 1559251881Speter { 1560251881Speter svn_stringbuf_appendbyte(baton->hunk, *curp); 1561251881Speter } 1562251881Speter /* We don't append the LF to extra_context, since it would 1563251881Speter * just be stripped anyway. */ 1564251881Speter ++curp; 1565251881Speter --length; 1566251881Speter } 1567251881Speter 1568251881Speter baton->curp[idx] = curp; 1569251881Speter baton->length[idx] = length; 1570251881Speter 1571251881Speter break; 1572251881Speter } 1573251881Speter } 1574251881Speter while (! err); 1575251881Speter 1576251881Speter if (err && ! APR_STATUS_IS_EOF(err->apr_err)) 1577251881Speter return err; 1578251881Speter 1579251881Speter if (err && APR_STATUS_IS_EOF(err->apr_err)) 1580251881Speter { 1581251881Speter svn_error_clear(err); 1582251881Speter /* Special case if we reach the end of file AND the last line is in the 1583251881Speter changed range AND the file doesn't end with a newline */ 1584251881Speter if (bytes_processed && (type != svn_diff__file_output_unified_skip) 1585251881Speter && ! had_cr) 1586251881Speter { 1587251881Speter SVN_ERR(svn_diff__unified_append_no_newline_msg( 1588251881Speter baton->hunk, baton->header_encoding, baton->pool)); 1589251881Speter } 1590251881Speter 1591251881Speter baton->length[idx] = 0; 1592251881Speter } 1593251881Speter 1594251881Speter return SVN_NO_ERROR; 1595251881Speter} 1596251881Speter 1597251881Speterstatic APR_INLINE svn_error_t * 1598251881Speteroutput_unified_diff_range(svn_diff__file_output_baton_t *output_baton, 1599251881Speter int source, 1600251881Speter svn_diff__file_output_unified_type_e type, 1601251881Speter apr_off_t until) 1602251881Speter{ 1603251881Speter while (output_baton->current_line[source] < until) 1604251881Speter { 1605251881Speter SVN_ERR(output_unified_line(output_baton, type, source)); 1606251881Speter } 1607251881Speter return SVN_NO_ERROR; 1608251881Speter} 1609251881Speter 1610251881Speterstatic svn_error_t * 1611251881Speteroutput_unified_flush_hunk(svn_diff__file_output_baton_t *baton) 1612251881Speter{ 1613251881Speter apr_off_t target_line; 1614251881Speter apr_size_t hunk_len; 1615251881Speter apr_off_t old_start; 1616251881Speter apr_off_t new_start; 1617251881Speter 1618251881Speter if (svn_stringbuf_isempty(baton->hunk)) 1619251881Speter { 1620251881Speter /* Nothing to flush */ 1621251881Speter return SVN_NO_ERROR; 1622251881Speter } 1623251881Speter 1624251881Speter target_line = baton->hunk_start[0] + baton->hunk_length[0] 1625289180Speter + baton->context_size; 1626251881Speter 1627251881Speter /* Add trailing context to the hunk */ 1628251881Speter SVN_ERR(output_unified_diff_range(baton, 0 /* original */, 1629251881Speter svn_diff__file_output_unified_context, 1630251881Speter target_line)); 1631251881Speter 1632251881Speter old_start = baton->hunk_start[0]; 1633251881Speter new_start = baton->hunk_start[1]; 1634251881Speter 1635251881Speter /* If the file is non-empty, convert the line indexes from 1636251881Speter zero based to one based */ 1637251881Speter if (baton->hunk_length[0]) 1638251881Speter old_start++; 1639251881Speter if (baton->hunk_length[1]) 1640251881Speter new_start++; 1641251881Speter 1642251881Speter /* Write the hunk header */ 1643251881Speter SVN_ERR(svn_diff__unified_write_hunk_header( 1644251881Speter baton->output_stream, baton->header_encoding, "@@", 1645251881Speter old_start, baton->hunk_length[0], 1646251881Speter new_start, baton->hunk_length[1], 1647251881Speter baton->hunk_extra_context, 1648251881Speter baton->pool)); 1649251881Speter 1650251881Speter /* Output the hunk content */ 1651251881Speter hunk_len = baton->hunk->len; 1652251881Speter SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data, 1653251881Speter &hunk_len)); 1654251881Speter 1655251881Speter /* Prepare for the next hunk */ 1656251881Speter baton->hunk_length[0] = 0; 1657251881Speter baton->hunk_length[1] = 0; 1658251881Speter baton->hunk_start[0] = 0; 1659251881Speter baton->hunk_start[1] = 0; 1660251881Speter svn_stringbuf_setempty(baton->hunk); 1661251881Speter 1662251881Speter return SVN_NO_ERROR; 1663251881Speter} 1664251881Speter 1665251881Speterstatic svn_error_t * 1666251881Speteroutput_unified_diff_modified(void *baton, 1667251881Speter apr_off_t original_start, apr_off_t original_length, 1668251881Speter apr_off_t modified_start, apr_off_t modified_length, 1669251881Speter apr_off_t latest_start, apr_off_t latest_length) 1670251881Speter{ 1671251881Speter svn_diff__file_output_baton_t *output_baton = baton; 1672251881Speter apr_off_t context_prefix_length; 1673251881Speter apr_off_t prev_context_end; 1674251881Speter svn_boolean_t init_hunk = FALSE; 1675251881Speter 1676289180Speter if (original_start > output_baton->context_size) 1677289180Speter context_prefix_length = output_baton->context_size; 1678251881Speter else 1679251881Speter context_prefix_length = original_start; 1680251881Speter 1681251881Speter /* Calculate where the previous hunk will end if we would write it now 1682251881Speter (including the necessary context at the end) */ 1683251881Speter if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0) 1684251881Speter { 1685251881Speter prev_context_end = output_baton->hunk_start[0] 1686251881Speter + output_baton->hunk_length[0] 1687289180Speter + output_baton->context_size; 1688251881Speter } 1689251881Speter else 1690251881Speter { 1691251881Speter prev_context_end = -1; 1692251881Speter 1693251881Speter if (output_baton->hunk_start[0] == 0 1694251881Speter && (original_length > 0 || modified_length > 0)) 1695251881Speter init_hunk = TRUE; 1696251881Speter } 1697251881Speter 1698251881Speter /* If the changed range is far enough from the previous range, flush the current 1699251881Speter hunk. */ 1700251881Speter { 1701251881Speter apr_off_t new_hunk_start = (original_start - context_prefix_length); 1702251881Speter 1703251881Speter if (output_baton->current_line[0] < new_hunk_start 1704251881Speter && prev_context_end <= new_hunk_start) 1705251881Speter { 1706251881Speter SVN_ERR(output_unified_flush_hunk(output_baton)); 1707251881Speter init_hunk = TRUE; 1708251881Speter } 1709251881Speter else if (output_baton->hunk_length[0] > 0 1710251881Speter || output_baton->hunk_length[1] > 0) 1711251881Speter { 1712251881Speter /* We extend the current hunk */ 1713251881Speter 1714251881Speter 1715251881Speter /* Original: Output the context preceding the changed range */ 1716251881Speter SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1717251881Speter svn_diff__file_output_unified_context, 1718251881Speter original_start)); 1719251881Speter } 1720251881Speter } 1721251881Speter 1722251881Speter /* Original: Skip lines until we are at the beginning of the context we want 1723251881Speter to display */ 1724251881Speter SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1725251881Speter svn_diff__file_output_unified_skip, 1726251881Speter original_start - context_prefix_length)); 1727251881Speter 1728251881Speter /* Note that the above skip stores data for the show_c_function support below */ 1729251881Speter 1730251881Speter if (init_hunk) 1731251881Speter { 1732251881Speter SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0 1733251881Speter && output_baton->hunk_length[1] == 0); 1734251881Speter 1735251881Speter output_baton->hunk_start[0] = original_start - context_prefix_length; 1736251881Speter output_baton->hunk_start[1] = modified_start - context_prefix_length; 1737251881Speter } 1738251881Speter 1739251881Speter if (init_hunk && output_baton->show_c_function) 1740251881Speter { 1741251881Speter apr_size_t p; 1742251881Speter const char *invalid_character; 1743251881Speter 1744251881Speter /* Save the extra context for later use. 1745251881Speter * Note that the last byte of the hunk_extra_context array is never 1746251881Speter * touched after it is zero-initialized, so the array is always 1747251881Speter * 0-terminated. */ 1748251881Speter strncpy(output_baton->hunk_extra_context, 1749251881Speter output_baton->extra_context->data, 1750251881Speter SVN_DIFF__EXTRA_CONTEXT_LENGTH); 1751251881Speter /* Trim whitespace at the end, most notably to get rid of any 1752251881Speter * newline characters. */ 1753251881Speter p = strlen(output_baton->hunk_extra_context); 1754251881Speter while (p > 0 1755251881Speter && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1])) 1756251881Speter { 1757251881Speter output_baton->hunk_extra_context[--p] = '\0'; 1758251881Speter } 1759251881Speter invalid_character = 1760251881Speter svn_utf__last_valid(output_baton->hunk_extra_context, 1761251881Speter SVN_DIFF__EXTRA_CONTEXT_LENGTH); 1762251881Speter for (p = invalid_character - output_baton->hunk_extra_context; 1763251881Speter p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++) 1764251881Speter { 1765251881Speter output_baton->hunk_extra_context[p] = '\0'; 1766251881Speter } 1767251881Speter } 1768251881Speter 1769251881Speter /* Modified: Skip lines until we are at the start of the changed range */ 1770251881Speter SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */, 1771251881Speter svn_diff__file_output_unified_skip, 1772251881Speter modified_start)); 1773251881Speter 1774251881Speter /* Original: Output the context preceding the changed range */ 1775251881Speter SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1776251881Speter svn_diff__file_output_unified_context, 1777251881Speter original_start)); 1778251881Speter 1779251881Speter /* Both: Output the changed range */ 1780251881Speter SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1781251881Speter svn_diff__file_output_unified_delete, 1782251881Speter original_start + original_length)); 1783251881Speter SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */, 1784251881Speter svn_diff__file_output_unified_insert, 1785251881Speter modified_start + modified_length)); 1786251881Speter 1787251881Speter return SVN_NO_ERROR; 1788251881Speter} 1789251881Speter 1790251881Speter/* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */ 1791251881Speterstatic svn_error_t * 1792251881Speteroutput_unified_default_hdr(const char **header, const char *path, 1793251881Speter apr_pool_t *pool) 1794251881Speter{ 1795251881Speter apr_finfo_t file_info; 1796251881Speter apr_time_exp_t exploded_time; 1797251881Speter char time_buffer[64]; 1798251881Speter apr_size_t time_len; 1799251881Speter const char *utf8_timestr; 1800251881Speter 1801251881Speter SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool)); 1802251881Speter apr_time_exp_lt(&exploded_time, file_info.mtime); 1803251881Speter 1804251881Speter apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1, 1805251881Speter /* Order of date components can be different in different languages */ 1806251881Speter _("%a %b %e %H:%M:%S %Y"), &exploded_time); 1807251881Speter 1808251881Speter SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool)); 1809251881Speter 1810251881Speter *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr); 1811251881Speter 1812251881Speter return SVN_NO_ERROR; 1813251881Speter} 1814251881Speter 1815251881Speterstatic const svn_diff_output_fns_t svn_diff__file_output_unified_vtable = 1816251881Speter{ 1817251881Speter NULL, /* output_common */ 1818251881Speter output_unified_diff_modified, 1819251881Speter NULL, /* output_diff_latest */ 1820251881Speter NULL, /* output_diff_common */ 1821251881Speter NULL /* output_conflict */ 1822251881Speter}; 1823251881Speter 1824251881Spetersvn_error_t * 1825289180Spetersvn_diff_file_output_unified4(svn_stream_t *output_stream, 1826251881Speter svn_diff_t *diff, 1827251881Speter const char *original_path, 1828251881Speter const char *modified_path, 1829251881Speter const char *original_header, 1830251881Speter const char *modified_header, 1831251881Speter const char *header_encoding, 1832251881Speter const char *relative_to_dir, 1833251881Speter svn_boolean_t show_c_function, 1834289180Speter int context_size, 1835289180Speter svn_cancel_func_t cancel_func, 1836289180Speter void *cancel_baton, 1837251881Speter apr_pool_t *pool) 1838251881Speter{ 1839251881Speter if (svn_diff_contains_diffs(diff)) 1840251881Speter { 1841251881Speter svn_diff__file_output_baton_t baton; 1842251881Speter int i; 1843251881Speter 1844251881Speter memset(&baton, 0, sizeof(baton)); 1845251881Speter baton.output_stream = output_stream; 1846251881Speter baton.pool = pool; 1847251881Speter baton.header_encoding = header_encoding; 1848251881Speter baton.path[0] = original_path; 1849251881Speter baton.path[1] = modified_path; 1850251881Speter baton.hunk = svn_stringbuf_create_empty(pool); 1851251881Speter baton.show_c_function = show_c_function; 1852251881Speter baton.extra_context = svn_stringbuf_create_empty(pool); 1853289180Speter baton.context_size = (context_size >= 0) ? context_size 1854289180Speter : SVN_DIFF__UNIFIED_CONTEXT_SIZE; 1855251881Speter 1856251881Speter if (show_c_function) 1857251881Speter { 1858251881Speter baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **)); 1859251881Speter 1860251881Speter APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*"; 1861251881Speter APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*"; 1862251881Speter APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*"; 1863251881Speter } 1864251881Speter 1865251881Speter SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ", 1866251881Speter header_encoding, pool)); 1867251881Speter SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-", 1868251881Speter header_encoding, pool)); 1869251881Speter SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+", 1870251881Speter header_encoding, pool)); 1871251881Speter 1872251881Speter if (relative_to_dir) 1873251881Speter { 1874251881Speter /* Possibly adjust the "original" and "modified" paths shown in 1875251881Speter the output (see issue #2723). */ 1876251881Speter const char *child_path; 1877251881Speter 1878251881Speter if (! original_header) 1879251881Speter { 1880251881Speter child_path = svn_dirent_is_child(relative_to_dir, 1881251881Speter original_path, pool); 1882251881Speter if (child_path) 1883251881Speter original_path = child_path; 1884251881Speter else 1885251881Speter return svn_error_createf( 1886251881Speter SVN_ERR_BAD_RELATIVE_PATH, NULL, 1887251881Speter _("Path '%s' must be inside " 1888251881Speter "the directory '%s'"), 1889251881Speter svn_dirent_local_style(original_path, pool), 1890251881Speter svn_dirent_local_style(relative_to_dir, 1891251881Speter pool)); 1892251881Speter } 1893251881Speter 1894251881Speter if (! modified_header) 1895251881Speter { 1896251881Speter child_path = svn_dirent_is_child(relative_to_dir, 1897251881Speter modified_path, pool); 1898251881Speter if (child_path) 1899251881Speter modified_path = child_path; 1900251881Speter else 1901251881Speter return svn_error_createf( 1902251881Speter SVN_ERR_BAD_RELATIVE_PATH, NULL, 1903251881Speter _("Path '%s' must be inside " 1904251881Speter "the directory '%s'"), 1905251881Speter svn_dirent_local_style(modified_path, pool), 1906251881Speter svn_dirent_local_style(relative_to_dir, 1907251881Speter pool)); 1908251881Speter } 1909251881Speter } 1910251881Speter 1911251881Speter for (i = 0; i < 2; i++) 1912251881Speter { 1913251881Speter SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i], 1914251881Speter APR_READ, APR_OS_DEFAULT, pool)); 1915251881Speter } 1916251881Speter 1917251881Speter if (original_header == NULL) 1918251881Speter { 1919251881Speter SVN_ERR(output_unified_default_hdr(&original_header, original_path, 1920251881Speter pool)); 1921251881Speter } 1922251881Speter 1923251881Speter if (modified_header == NULL) 1924251881Speter { 1925251881Speter SVN_ERR(output_unified_default_hdr(&modified_header, modified_path, 1926251881Speter pool)); 1927251881Speter } 1928251881Speter 1929251881Speter SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding, 1930251881Speter original_header, modified_header, 1931251881Speter pool)); 1932251881Speter 1933289180Speter SVN_ERR(svn_diff_output2(diff, &baton, 1934289180Speter &svn_diff__file_output_unified_vtable, 1935289180Speter cancel_func, cancel_baton)); 1936251881Speter SVN_ERR(output_unified_flush_hunk(&baton)); 1937251881Speter 1938251881Speter for (i = 0; i < 2; i++) 1939251881Speter { 1940251881Speter SVN_ERR(svn_io_file_close(baton.file[i], pool)); 1941251881Speter } 1942251881Speter } 1943251881Speter 1944251881Speter return SVN_NO_ERROR; 1945251881Speter} 1946251881Speter 1947251881Speter 1948251881Speter/** Display diff3 **/ 1949251881Speter 1950251881Speter/* A stream to remember *leading* context. Note that this stream does 1951251881Speter *not* copy the data that it is remembering; it just saves 1952251881Speter *pointers! */ 1953251881Spetertypedef struct context_saver_t { 1954251881Speter svn_stream_t *stream; 1955289180Speter int context_size; 1956289180Speter const char **data; /* const char *data[context_size] */ 1957289180Speter apr_size_t *len; /* apr_size_t len[context_size] */ 1958251881Speter apr_size_t next_slot; 1959251881Speter apr_size_t total_written; 1960251881Speter} context_saver_t; 1961251881Speter 1962251881Speter 1963251881Speterstatic svn_error_t * 1964251881Spetercontext_saver_stream_write(void *baton, 1965251881Speter const char *data, 1966251881Speter apr_size_t *len) 1967251881Speter{ 1968251881Speter context_saver_t *cs = baton; 1969289180Speter 1970289180Speter if (cs->context_size > 0) 1971289180Speter { 1972289180Speter cs->data[cs->next_slot] = data; 1973289180Speter cs->len[cs->next_slot] = *len; 1974289180Speter cs->next_slot = (cs->next_slot + 1) % cs->context_size; 1975289180Speter cs->total_written++; 1976289180Speter } 1977251881Speter return SVN_NO_ERROR; 1978251881Speter} 1979251881Speter 1980251881Spetertypedef struct svn_diff3__file_output_baton_t 1981251881Speter{ 1982251881Speter svn_stream_t *output_stream; 1983251881Speter 1984251881Speter const char *path[3]; 1985251881Speter 1986251881Speter apr_off_t current_line[3]; 1987251881Speter 1988251881Speter char *buffer[3]; 1989251881Speter char *endp[3]; 1990251881Speter char *curp[3]; 1991251881Speter 1992251881Speter /* The following four members are in the encoding used for the output. */ 1993251881Speter const char *conflict_modified; 1994251881Speter const char *conflict_original; 1995251881Speter const char *conflict_separator; 1996251881Speter const char *conflict_latest; 1997251881Speter 1998251881Speter const char *marker_eol; 1999251881Speter 2000251881Speter svn_diff_conflict_display_style_t conflict_style; 2001289180Speter int context_size; 2002251881Speter 2003289180Speter /* cancel support */ 2004289180Speter svn_cancel_func_t cancel_func; 2005289180Speter void *cancel_baton; 2006289180Speter 2007251881Speter /* The rest of the fields are for 2008251881Speter svn_diff_conflict_display_only_conflicts only. Note that for 2009251881Speter these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or 2010251881Speter (soon after a conflict) a "trailing context stream", never the 2011251881Speter actual output stream.*/ 2012251881Speter /* The actual output stream. */ 2013251881Speter svn_stream_t *real_output_stream; 2014251881Speter context_saver_t *context_saver; 2015251881Speter /* Used to allocate context_saver and trailing context streams, and 2016251881Speter for some printfs. */ 2017251881Speter apr_pool_t *pool; 2018251881Speter} svn_diff3__file_output_baton_t; 2019251881Speter 2020251881Speterstatic svn_error_t * 2021251881Speterflush_context_saver(context_saver_t *cs, 2022251881Speter svn_stream_t *output_stream) 2023251881Speter{ 2024251881Speter int i; 2025289180Speter for (i = 0; i < cs->context_size; i++) 2026251881Speter { 2027289180Speter apr_size_t slot = (i + cs->next_slot) % cs->context_size; 2028251881Speter if (cs->data[slot]) 2029251881Speter { 2030251881Speter apr_size_t len = cs->len[slot]; 2031251881Speter SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len)); 2032251881Speter } 2033251881Speter } 2034251881Speter return SVN_NO_ERROR; 2035251881Speter} 2036251881Speter 2037251881Speterstatic void 2038251881Spetermake_context_saver(svn_diff3__file_output_baton_t *fob) 2039251881Speter{ 2040251881Speter context_saver_t *cs; 2041251881Speter 2042289180Speter assert(fob->context_size > 0); /* Or nothing to save */ 2043289180Speter 2044251881Speter svn_pool_clear(fob->pool); 2045251881Speter cs = apr_pcalloc(fob->pool, sizeof(*cs)); 2046251881Speter cs->stream = svn_stream_empty(fob->pool); 2047251881Speter svn_stream_set_baton(cs->stream, cs); 2048251881Speter svn_stream_set_write(cs->stream, context_saver_stream_write); 2049251881Speter fob->context_saver = cs; 2050251881Speter fob->output_stream = cs->stream; 2051289180Speter cs->context_size = fob->context_size; 2052289180Speter cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size); 2053289180Speter cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size); 2054251881Speter} 2055251881Speter 2056251881Speter 2057289180Speter/* A stream which prints LINES_TO_PRINT (based on context size) lines to 2058251881Speter BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to 2059251881Speter a context_saver; used for *trailing* context. */ 2060251881Speter 2061251881Speterstruct trailing_context_printer { 2062251881Speter apr_size_t lines_to_print; 2063251881Speter svn_diff3__file_output_baton_t *fob; 2064251881Speter}; 2065251881Speter 2066251881Speter 2067251881Speter 2068251881Speterstatic svn_error_t * 2069251881Spetertrailing_context_printer_write(void *baton, 2070251881Speter const char *data, 2071251881Speter apr_size_t *len) 2072251881Speter{ 2073251881Speter struct trailing_context_printer *tcp = baton; 2074251881Speter SVN_ERR_ASSERT(tcp->lines_to_print > 0); 2075251881Speter SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len)); 2076251881Speter tcp->lines_to_print--; 2077251881Speter if (tcp->lines_to_print == 0) 2078251881Speter make_context_saver(tcp->fob); 2079251881Speter return SVN_NO_ERROR; 2080251881Speter} 2081251881Speter 2082251881Speter 2083251881Speterstatic void 2084251881Spetermake_trailing_context_printer(svn_diff3__file_output_baton_t *btn) 2085251881Speter{ 2086251881Speter struct trailing_context_printer *tcp; 2087251881Speter svn_stream_t *s; 2088251881Speter 2089251881Speter svn_pool_clear(btn->pool); 2090251881Speter 2091251881Speter tcp = apr_pcalloc(btn->pool, sizeof(*tcp)); 2092289180Speter tcp->lines_to_print = btn->context_size; 2093251881Speter tcp->fob = btn; 2094251881Speter s = svn_stream_empty(btn->pool); 2095251881Speter svn_stream_set_baton(s, tcp); 2096251881Speter svn_stream_set_write(s, trailing_context_printer_write); 2097251881Speter btn->output_stream = s; 2098251881Speter} 2099251881Speter 2100251881Speter 2101251881Speter 2102251881Spetertypedef enum svn_diff3__file_output_type_e 2103251881Speter{ 2104251881Speter svn_diff3__file_output_skip, 2105251881Speter svn_diff3__file_output_normal 2106251881Speter} svn_diff3__file_output_type_e; 2107251881Speter 2108251881Speter 2109251881Speterstatic svn_error_t * 2110251881Speteroutput_line(svn_diff3__file_output_baton_t *baton, 2111251881Speter svn_diff3__file_output_type_e type, int idx) 2112251881Speter{ 2113251881Speter char *curp; 2114251881Speter char *endp; 2115251881Speter char *eol; 2116251881Speter apr_size_t len; 2117251881Speter 2118251881Speter curp = baton->curp[idx]; 2119251881Speter endp = baton->endp[idx]; 2120251881Speter 2121251881Speter /* Lazily update the current line even if we're at EOF. 2122251881Speter */ 2123251881Speter baton->current_line[idx]++; 2124251881Speter 2125251881Speter if (curp == endp) 2126251881Speter return SVN_NO_ERROR; 2127251881Speter 2128251881Speter eol = svn_eol__find_eol_start(curp, endp - curp); 2129251881Speter if (!eol) 2130251881Speter eol = endp; 2131251881Speter else 2132251881Speter { 2133251881Speter svn_boolean_t had_cr = (*eol == '\r'); 2134251881Speter eol++; 2135251881Speter if (had_cr && eol != endp && *eol == '\n') 2136251881Speter eol++; 2137251881Speter } 2138251881Speter 2139251881Speter if (type != svn_diff3__file_output_skip) 2140251881Speter { 2141251881Speter len = eol - curp; 2142251881Speter /* Note that the trailing context printer assumes that 2143251881Speter svn_stream_write is called exactly once per line. */ 2144251881Speter SVN_ERR(svn_stream_write(baton->output_stream, curp, &len)); 2145251881Speter } 2146251881Speter 2147251881Speter baton->curp[idx] = eol; 2148251881Speter 2149251881Speter return SVN_NO_ERROR; 2150251881Speter} 2151251881Speter 2152251881Speterstatic svn_error_t * 2153251881Speteroutput_marker_eol(svn_diff3__file_output_baton_t *btn) 2154251881Speter{ 2155251881Speter return svn_stream_puts(btn->output_stream, btn->marker_eol); 2156251881Speter} 2157251881Speter 2158251881Speterstatic svn_error_t * 2159251881Speteroutput_hunk(void *baton, int idx, apr_off_t target_line, 2160251881Speter apr_off_t target_length) 2161251881Speter{ 2162251881Speter svn_diff3__file_output_baton_t *output_baton = baton; 2163251881Speter 2164251881Speter /* Skip lines until we are at the start of the changed range */ 2165251881Speter while (output_baton->current_line[idx] < target_line) 2166251881Speter { 2167251881Speter SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx)); 2168251881Speter } 2169251881Speter 2170251881Speter target_line += target_length; 2171251881Speter 2172251881Speter while (output_baton->current_line[idx] < target_line) 2173251881Speter { 2174251881Speter SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx)); 2175251881Speter } 2176251881Speter 2177251881Speter return SVN_NO_ERROR; 2178251881Speter} 2179251881Speter 2180251881Speterstatic svn_error_t * 2181251881Speteroutput_common(void *baton, apr_off_t original_start, apr_off_t original_length, 2182251881Speter apr_off_t modified_start, apr_off_t modified_length, 2183251881Speter apr_off_t latest_start, apr_off_t latest_length) 2184251881Speter{ 2185251881Speter return output_hunk(baton, 1, modified_start, modified_length); 2186251881Speter} 2187251881Speter 2188251881Speterstatic svn_error_t * 2189251881Speteroutput_diff_modified(void *baton, 2190251881Speter apr_off_t original_start, apr_off_t original_length, 2191251881Speter apr_off_t modified_start, apr_off_t modified_length, 2192251881Speter apr_off_t latest_start, apr_off_t latest_length) 2193251881Speter{ 2194251881Speter return output_hunk(baton, 1, modified_start, modified_length); 2195251881Speter} 2196251881Speter 2197251881Speterstatic svn_error_t * 2198251881Speteroutput_diff_latest(void *baton, 2199251881Speter apr_off_t original_start, apr_off_t original_length, 2200251881Speter apr_off_t modified_start, apr_off_t modified_length, 2201251881Speter apr_off_t latest_start, apr_off_t latest_length) 2202251881Speter{ 2203251881Speter return output_hunk(baton, 2, latest_start, latest_length); 2204251881Speter} 2205251881Speter 2206251881Speterstatic svn_error_t * 2207251881Speteroutput_conflict(void *baton, 2208251881Speter apr_off_t original_start, apr_off_t original_length, 2209251881Speter apr_off_t modified_start, apr_off_t modified_length, 2210251881Speter apr_off_t latest_start, apr_off_t latest_length, 2211251881Speter svn_diff_t *diff); 2212251881Speter 2213251881Speterstatic const svn_diff_output_fns_t svn_diff3__file_output_vtable = 2214251881Speter{ 2215251881Speter output_common, 2216251881Speter output_diff_modified, 2217251881Speter output_diff_latest, 2218251881Speter output_diff_modified, /* output_diff_common */ 2219251881Speter output_conflict 2220251881Speter}; 2221251881Speter 2222289180Speterstatic svn_error_t * 2223289180Speteroutput_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn, 2224289180Speter const char *label, 2225289180Speter apr_off_t start, 2226289180Speter apr_off_t length) 2227289180Speter{ 2228289180Speter if (length == 1) 2229289180Speter SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2230289180Speter "%s (%" APR_OFF_T_FMT ")", 2231289180Speter label, start + 1)); 2232289180Speter else 2233289180Speter SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2234289180Speter "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")", 2235289180Speter label, start + 1, length)); 2236251881Speter 2237289180Speter SVN_ERR(output_marker_eol(btn)); 2238251881Speter 2239289180Speter return SVN_NO_ERROR; 2240289180Speter} 2241289180Speter 2242251881Speterstatic svn_error_t * 2243251881Speteroutput_conflict_with_context(svn_diff3__file_output_baton_t *btn, 2244251881Speter apr_off_t original_start, 2245251881Speter apr_off_t original_length, 2246251881Speter apr_off_t modified_start, 2247251881Speter apr_off_t modified_length, 2248251881Speter apr_off_t latest_start, 2249251881Speter apr_off_t latest_length) 2250251881Speter{ 2251251881Speter /* Are we currently saving starting context (as opposed to printing 2252251881Speter trailing context)? If so, flush it. */ 2253251881Speter if (btn->output_stream == btn->context_saver->stream) 2254251881Speter { 2255289180Speter if (btn->context_saver->total_written > btn->context_size) 2256251881Speter SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n")); 2257251881Speter SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream)); 2258251881Speter } 2259251881Speter 2260251881Speter /* Print to the real output stream. */ 2261251881Speter btn->output_stream = btn->real_output_stream; 2262251881Speter 2263251881Speter /* Output the conflict itself. */ 2264289180Speter SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified, 2265289180Speter modified_start, modified_length)); 2266251881Speter SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length)); 2267251881Speter 2268289180Speter SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original, 2269289180Speter original_start, original_length)); 2270251881Speter SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length)); 2271251881Speter 2272251881Speter SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2273251881Speter "%s%s", btn->conflict_separator, btn->marker_eol)); 2274251881Speter SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length)); 2275289180Speter SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest, 2276289180Speter latest_start, latest_length)); 2277251881Speter 2278251881Speter /* Go into print-trailing-context mode instead. */ 2279251881Speter make_trailing_context_printer(btn); 2280251881Speter 2281251881Speter return SVN_NO_ERROR; 2282251881Speter} 2283251881Speter 2284251881Speter 2285251881Speterstatic svn_error_t * 2286251881Speteroutput_conflict(void *baton, 2287251881Speter apr_off_t original_start, apr_off_t original_length, 2288251881Speter apr_off_t modified_start, apr_off_t modified_length, 2289251881Speter apr_off_t latest_start, apr_off_t latest_length, 2290251881Speter svn_diff_t *diff) 2291251881Speter{ 2292251881Speter svn_diff3__file_output_baton_t *file_baton = baton; 2293251881Speter 2294251881Speter svn_diff_conflict_display_style_t style = file_baton->conflict_style; 2295251881Speter 2296251881Speter if (style == svn_diff_conflict_display_only_conflicts) 2297251881Speter return output_conflict_with_context(file_baton, 2298251881Speter original_start, original_length, 2299251881Speter modified_start, modified_length, 2300251881Speter latest_start, latest_length); 2301251881Speter 2302251881Speter if (style == svn_diff_conflict_display_resolved_modified_latest) 2303251881Speter { 2304251881Speter if (diff) 2305289180Speter return svn_diff_output2(diff, baton, 2306289180Speter &svn_diff3__file_output_vtable, 2307289180Speter file_baton->cancel_func, 2308289180Speter file_baton->cancel_baton); 2309251881Speter else 2310251881Speter style = svn_diff_conflict_display_modified_latest; 2311251881Speter } 2312251881Speter 2313251881Speter if (style == svn_diff_conflict_display_modified_latest || 2314251881Speter style == svn_diff_conflict_display_modified_original_latest) 2315251881Speter { 2316251881Speter SVN_ERR(svn_stream_puts(file_baton->output_stream, 2317251881Speter file_baton->conflict_modified)); 2318251881Speter SVN_ERR(output_marker_eol(file_baton)); 2319251881Speter 2320251881Speter SVN_ERR(output_hunk(baton, 1, modified_start, modified_length)); 2321251881Speter 2322251881Speter if (style == svn_diff_conflict_display_modified_original_latest) 2323251881Speter { 2324251881Speter SVN_ERR(svn_stream_puts(file_baton->output_stream, 2325251881Speter file_baton->conflict_original)); 2326251881Speter SVN_ERR(output_marker_eol(file_baton)); 2327251881Speter SVN_ERR(output_hunk(baton, 0, original_start, original_length)); 2328251881Speter } 2329251881Speter 2330251881Speter SVN_ERR(svn_stream_puts(file_baton->output_stream, 2331251881Speter file_baton->conflict_separator)); 2332251881Speter SVN_ERR(output_marker_eol(file_baton)); 2333251881Speter 2334251881Speter SVN_ERR(output_hunk(baton, 2, latest_start, latest_length)); 2335251881Speter 2336251881Speter SVN_ERR(svn_stream_puts(file_baton->output_stream, 2337251881Speter file_baton->conflict_latest)); 2338251881Speter SVN_ERR(output_marker_eol(file_baton)); 2339251881Speter } 2340251881Speter else if (style == svn_diff_conflict_display_modified) 2341251881Speter SVN_ERR(output_hunk(baton, 1, modified_start, modified_length)); 2342251881Speter else if (style == svn_diff_conflict_display_latest) 2343251881Speter SVN_ERR(output_hunk(baton, 2, latest_start, latest_length)); 2344251881Speter else /* unknown style */ 2345251881Speter SVN_ERR_MALFUNCTION(); 2346251881Speter 2347251881Speter return SVN_NO_ERROR; 2348251881Speter} 2349251881Speter 2350251881Spetersvn_error_t * 2351289180Spetersvn_diff_file_output_merge3(svn_stream_t *output_stream, 2352251881Speter svn_diff_t *diff, 2353251881Speter const char *original_path, 2354251881Speter const char *modified_path, 2355251881Speter const char *latest_path, 2356251881Speter const char *conflict_original, 2357251881Speter const char *conflict_modified, 2358251881Speter const char *conflict_latest, 2359251881Speter const char *conflict_separator, 2360251881Speter svn_diff_conflict_display_style_t style, 2361289180Speter svn_cancel_func_t cancel_func, 2362289180Speter void *cancel_baton, 2363289180Speter apr_pool_t *scratch_pool) 2364251881Speter{ 2365251881Speter svn_diff3__file_output_baton_t baton; 2366251881Speter apr_file_t *file[3]; 2367251881Speter int idx; 2368251881Speter#if APR_HAS_MMAP 2369251881Speter apr_mmap_t *mm[3] = { 0 }; 2370251881Speter#endif /* APR_HAS_MMAP */ 2371251881Speter const char *eol; 2372251881Speter svn_boolean_t conflicts_only = 2373251881Speter (style == svn_diff_conflict_display_only_conflicts); 2374251881Speter 2375251881Speter memset(&baton, 0, sizeof(baton)); 2376289180Speter baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE; 2377251881Speter if (conflicts_only) 2378251881Speter { 2379289180Speter baton.pool = svn_pool_create(scratch_pool); 2380251881Speter make_context_saver(&baton); 2381251881Speter baton.real_output_stream = output_stream; 2382251881Speter } 2383251881Speter else 2384251881Speter baton.output_stream = output_stream; 2385251881Speter baton.path[0] = original_path; 2386251881Speter baton.path[1] = modified_path; 2387251881Speter baton.path[2] = latest_path; 2388251881Speter SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified, 2389251881Speter conflict_modified ? conflict_modified 2390289180Speter : apr_psprintf(scratch_pool, "<<<<<<< %s", 2391251881Speter modified_path), 2392289180Speter scratch_pool)); 2393251881Speter SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original, 2394251881Speter conflict_original ? conflict_original 2395289180Speter : apr_psprintf(scratch_pool, "||||||| %s", 2396251881Speter original_path), 2397289180Speter scratch_pool)); 2398251881Speter SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator, 2399251881Speter conflict_separator ? conflict_separator 2400289180Speter : "=======", scratch_pool)); 2401251881Speter SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest, 2402251881Speter conflict_latest ? conflict_latest 2403289180Speter : apr_psprintf(scratch_pool, ">>>>>>> %s", 2404251881Speter latest_path), 2405289180Speter scratch_pool)); 2406251881Speter 2407251881Speter baton.conflict_style = style; 2408251881Speter 2409251881Speter for (idx = 0; idx < 3; idx++) 2410251881Speter { 2411257936Speter apr_size_t size; 2412251881Speter 2413251881Speter SVN_ERR(map_or_read_file(&file[idx], 2414251881Speter MMAP_T_ARG(mm[idx]) 2415251881Speter &baton.buffer[idx], &size, 2416289180Speter baton.path[idx], scratch_pool)); 2417251881Speter 2418251881Speter baton.curp[idx] = baton.buffer[idx]; 2419251881Speter baton.endp[idx] = baton.buffer[idx]; 2420251881Speter 2421251881Speter if (baton.endp[idx]) 2422251881Speter baton.endp[idx] += size; 2423251881Speter } 2424251881Speter 2425251881Speter /* Check what eol marker we should use for conflict markers. 2426251881Speter We use the eol marker of the modified file and fall back on the 2427251881Speter platform's eol marker if that file doesn't contain any newlines. */ 2428251881Speter eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1], 2429251881Speter NULL); 2430251881Speter if (! eol) 2431251881Speter eol = APR_EOL_STR; 2432251881Speter baton.marker_eol = eol; 2433251881Speter 2434289180Speter baton.cancel_func = cancel_func; 2435289180Speter baton.cancel_baton = cancel_baton; 2436251881Speter 2437289180Speter SVN_ERR(svn_diff_output2(diff, &baton, 2438289180Speter &svn_diff3__file_output_vtable, 2439289180Speter cancel_func, cancel_baton)); 2440289180Speter 2441251881Speter for (idx = 0; idx < 3; idx++) 2442251881Speter { 2443251881Speter#if APR_HAS_MMAP 2444251881Speter if (mm[idx]) 2445251881Speter { 2446251881Speter apr_status_t rv = apr_mmap_delete(mm[idx]); 2447251881Speter if (rv != APR_SUCCESS) 2448251881Speter { 2449251881Speter return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"), 2450251881Speter baton.path[idx]); 2451251881Speter } 2452251881Speter } 2453251881Speter#endif /* APR_HAS_MMAP */ 2454251881Speter 2455251881Speter if (file[idx]) 2456251881Speter { 2457289180Speter SVN_ERR(svn_io_file_close(file[idx], scratch_pool)); 2458251881Speter } 2459251881Speter } 2460251881Speter 2461251881Speter if (conflicts_only) 2462251881Speter svn_pool_destroy(baton.pool); 2463251881Speter 2464251881Speter return SVN_NO_ERROR; 2465251881Speter} 2466251881Speter 2467