diff_file.c revision 299742
1/*
2 * diff_file.c :  routines for doing diffs on files
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <apr.h>
26#include <apr_pools.h>
27#include <apr_general.h>
28#include <apr_file_io.h>
29#include <apr_file_info.h>
30#include <apr_time.h>
31#include <apr_mmap.h>
32#include <apr_getopt.h>
33
34#include <assert.h>
35
36#include "svn_error.h"
37#include "svn_diff.h"
38#include "svn_types.h"
39#include "svn_string.h"
40#include "svn_subst.h"
41#include "svn_io.h"
42#include "svn_utf.h"
43#include "svn_pools.h"
44#include "diff.h"
45#include "svn_private_config.h"
46#include "svn_path.h"
47#include "svn_ctype.h"
48
49#include "private/svn_utf_private.h"
50#include "private/svn_eol_private.h"
51#include "private/svn_dep_compat.h"
52#include "private/svn_adler32.h"
53#include "private/svn_diff_private.h"
54
55/* A token, i.e. a line read from a file. */
56typedef struct svn_diff__file_token_t
57{
58  /* Next token in free list. */
59  struct svn_diff__file_token_t *next;
60  svn_diff_datasource_e datasource;
61  /* Offset in the datasource. */
62  apr_off_t offset;
63  /* Offset of the normalized token (may skip leading whitespace) */
64  apr_off_t norm_offset;
65  /* Total length - before normalization. */
66  apr_off_t raw_length;
67  /* Total length - after normalization. */
68  apr_off_t length;
69} svn_diff__file_token_t;
70
71
72typedef struct svn_diff__file_baton_t
73{
74  const svn_diff_file_options_t *options;
75
76  struct file_info {
77    const char *path;  /* path to this file, absolute or relative to CWD */
78
79    /* All the following fields are active while this datasource is open */
80    apr_file_t *file;  /* handle of this file */
81    apr_off_t size;    /* total raw size in bytes of this file */
82
83    /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
84    int chunk;     /* the current chunk number, zero-based */
85    char *buffer;  /* a buffer containing the current chunk */
86    char *curp;    /* current position in the current chunk */
87    char *endp;    /* next memory address after the current chunk */
88
89    svn_diff__normalize_state_t normalize_state;
90
91    /* Where the identical suffix starts in this datasource */
92    int suffix_start_chunk;
93    apr_off_t suffix_offset_in_chunk;
94  } files[4];
95
96  /* List of free tokens that may be reused. */
97  svn_diff__file_token_t *tokens;
98
99  apr_pool_t *pool;
100} svn_diff__file_baton_t;
101
102static int
103datasource_to_index(svn_diff_datasource_e datasource)
104{
105  switch (datasource)
106    {
107    case svn_diff_datasource_original:
108      return 0;
109
110    case svn_diff_datasource_modified:
111      return 1;
112
113    case svn_diff_datasource_latest:
114      return 2;
115
116    case svn_diff_datasource_ancestor:
117      return 3;
118    }
119
120  return -1;
121}
122
123/* Files are read in chunks of 128k.  There is no support for this number
124 * whatsoever.  If there is a number someone comes up with that has some
125 * argumentation, let's use that.
126 */
127/* If you change this number, update test_norm_offset(),
128 * test_identical_suffix() and and test_token_compare()  in diff-diff3-test.c.
129 */
130#define CHUNK_SHIFT 17
131#define CHUNK_SIZE (1 << CHUNK_SHIFT)
132
133#define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
134#define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
135#define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
136
137
138/* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
139 * *LENGTH.  The actual bytes read are stored in *LENGTH on return.
140 */
141static APR_INLINE svn_error_t *
142read_chunk(apr_file_t *file,
143           char *buffer, apr_off_t length,
144           apr_off_t offset, apr_pool_t *scratch_pool)
145{
146  /* XXX: The final offset may not be the one we asked for.
147   * XXX: Check.
148   */
149  SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool));
150  return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
151                                NULL, NULL, scratch_pool);
152}
153
154
155/* Map or read a file at PATH. *BUFFER will point to the file
156 * contents; if the file was mapped, *FILE and *MM will contain the
157 * mmap context; otherwise they will be NULL.  SIZE will contain the
158 * file size.  Allocate from POOL.
159 */
160#if APR_HAS_MMAP
161#define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
162#define MMAP_T_ARG(NAME)   &(NAME),
163#else
164#define MMAP_T_PARAM(NAME)
165#define MMAP_T_ARG(NAME)
166#endif
167
168static svn_error_t *
169map_or_read_file(apr_file_t **file,
170                 MMAP_T_PARAM(mm)
171                 char **buffer, apr_size_t *size_p,
172                 const char *path, apr_pool_t *pool)
173{
174  apr_finfo_t finfo;
175  apr_status_t rv;
176  apr_size_t size;
177
178  *buffer = NULL;
179
180  SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
181  SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
182
183  if (finfo.size > APR_SIZE_MAX)
184    {
185      return svn_error_createf(APR_ENOMEM, NULL,
186                               _("File '%s' is too large to be read in "
187                                 "to memory"), path);
188    }
189
190  size = (apr_size_t) finfo.size;
191#if APR_HAS_MMAP
192  if (size > APR_MMAP_THRESHOLD)
193    {
194      rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
195      if (rv == APR_SUCCESS)
196        {
197          *buffer = (*mm)->mm;
198        }
199      else
200        {
201          /* Clear *MM because output parameters are undefined on error. */
202          *mm = NULL;
203        }
204
205      /* On failure we just fall through and try reading the file into
206       * memory instead.
207       */
208    }
209#endif /* APR_HAS_MMAP */
210
211   if (*buffer == NULL && size > 0)
212    {
213      *buffer = apr_palloc(pool, size);
214
215      SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
216
217      /* Since we have the entire contents of the file we can
218       * close it now.
219       */
220      SVN_ERR(svn_io_file_close(*file, pool));
221
222      *file = NULL;
223    }
224
225  *size_p = size;
226
227  return SVN_NO_ERROR;
228}
229
230
231/* For all files in the FILE array, increment the curp pointer.  If a file
232 * points before the beginning of file, let it point at the first byte again.
233 * If the end of the current chunk is reached, read the next chunk in the
234 * buffer and point curp to the start of the chunk.  If EOF is reached, set
235 * curp equal to endp to indicate EOF. */
236#define INCREMENT_POINTERS(all_files, files_len, pool)                       \
237  do {                                                                       \
238    apr_size_t svn_macro__i;                                                 \
239                                                                             \
240    for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
241    {                                                                        \
242      if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
243        (all_files)[svn_macro__i].curp++;                                    \
244      else                                                                   \
245        SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool)));        \
246    }                                                                        \
247  } while (0)
248
249
250/* For all files in the FILE array, decrement the curp pointer.  If the
251 * start of a chunk is reached, read the previous chunk in the buffer and
252 * point curp to the last byte of the chunk.  If the beginning of a FILE is
253 * reached, set chunk to -1 to indicate BOF. */
254#define DECREMENT_POINTERS(all_files, files_len, pool)                       \
255  do {                                                                       \
256    apr_size_t svn_macro__i;                                                 \
257                                                                             \
258    for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
259    {                                                                        \
260      if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
261        (all_files)[svn_macro__i].curp--;                                    \
262      else                                                                   \
263        SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool)));        \
264    }                                                                        \
265  } while (0)
266
267
268static svn_error_t *
269increment_chunk(struct file_info *file, apr_pool_t *pool)
270{
271  apr_off_t length;
272  apr_off_t last_chunk = offset_to_chunk(file->size);
273
274  if (file->chunk == -1)
275    {
276      /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
277      file->chunk = 0;
278      file->curp = file->buffer;
279    }
280  else if (file->chunk == last_chunk)
281    {
282      /* We are at the last chunk. Indicate EOF by setting curp == endp. */
283      file->curp = file->endp;
284    }
285  else
286    {
287      /* There are still chunks left. Read next chunk and reset pointers. */
288      file->chunk++;
289      length = file->chunk == last_chunk ?
290        offset_in_chunk(file->size) : CHUNK_SIZE;
291      SVN_ERR(read_chunk(file->file, file->buffer,
292                         length, chunk_to_offset(file->chunk),
293                         pool));
294      file->endp = file->buffer + length;
295      file->curp = file->buffer;
296    }
297
298  return SVN_NO_ERROR;
299}
300
301
302static svn_error_t *
303decrement_chunk(struct file_info *file, apr_pool_t *pool)
304{
305  if (file->chunk == 0)
306    {
307      /* We are already at the first chunk. Indicate BOF (Beginning Of File)
308         by setting chunk = -1 and curp = endp - 1. Both conditions are
309         important. They help the increment step to catch the BOF situation
310         in an efficient way. */
311      file->chunk--;
312      file->curp = file->endp - 1;
313    }
314  else
315    {
316      /* Read previous chunk and reset pointers. */
317      file->chunk--;
318      SVN_ERR(read_chunk(file->file, file->buffer,
319                         CHUNK_SIZE, chunk_to_offset(file->chunk),
320                         pool));
321      file->endp = file->buffer + CHUNK_SIZE;
322      file->curp = file->endp - 1;
323    }
324
325  return SVN_NO_ERROR;
326}
327
328
329/* Check whether one of the FILEs has its pointers 'before' the beginning of
330 * the file (this can happen while scanning backwards). This is the case if
331 * one of them has chunk == -1. */
332static svn_boolean_t
333is_one_at_bof(struct file_info file[], apr_size_t file_len)
334{
335  apr_size_t i;
336
337  for (i = 0; i < file_len; i++)
338    if (file[i].chunk == -1)
339      return TRUE;
340
341  return FALSE;
342}
343
344/* Check whether one of the FILEs has its pointers at EOF (this is the case if
345 * one of them has curp == endp (this can only happen at the last chunk)) */
346static svn_boolean_t
347is_one_at_eof(struct file_info file[], apr_size_t file_len)
348{
349  apr_size_t i;
350
351  for (i = 0; i < file_len; i++)
352    if (file[i].curp == file[i].endp)
353      return TRUE;
354
355  return FALSE;
356}
357
358/* Quickly determine whether there is a eol char in CHUNK.
359 * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
360 */
361
362#if SVN_UNALIGNED_ACCESS_IS_OK
363static svn_boolean_t contains_eol(apr_uintptr_t chunk)
364{
365  apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
366  apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
367
368  r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
369  n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
370
371  return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
372}
373#endif
374
375/* Find the prefix which is identical between all elements of the FILE array.
376 * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
377 * set to TRUE if one of the FILEs reached its end while scanning prefix,
378 * i.e. at least one file consisted entirely of prefix.  Otherwise,
379 * REACHED_ONE_EOF is set to FALSE.
380 *
381 * After this function is finished, the buffers, chunks, curp's and endp's
382 * of the FILEs are set to point at the first byte after the prefix. */
383static svn_error_t *
384find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
385                      struct file_info file[], apr_size_t file_len,
386                      apr_pool_t *pool)
387{
388  svn_boolean_t had_cr = FALSE;
389  svn_boolean_t is_match;
390  apr_off_t lines = 0;
391  apr_size_t i;
392
393  *reached_one_eof = FALSE;
394
395  for (i = 1, is_match = TRUE; i < file_len; i++)
396    is_match = is_match && *file[0].curp == *file[i].curp;
397  while (is_match)
398    {
399#if SVN_UNALIGNED_ACCESS_IS_OK
400      apr_ssize_t max_delta, delta;
401#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
402
403      /* ### TODO: see if we can take advantage of
404         diff options like ignore_eol_style or ignore_space. */
405      /* check for eol, and count */
406      if (*file[0].curp == '\r')
407        {
408          lines++;
409          had_cr = TRUE;
410        }
411      else if (*file[0].curp == '\n' && !had_cr)
412        {
413          lines++;
414        }
415      else
416        {
417          had_cr = FALSE;
418        }
419
420      INCREMENT_POINTERS(file, file_len, pool);
421
422#if SVN_UNALIGNED_ACCESS_IS_OK
423
424      /* Try to advance as far as possible with machine-word granularity.
425       * Determine how far we may advance with chunky ops without reaching
426       * endp for any of the files.
427       * Signedness is important here if curp gets close to endp.
428       */
429      max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
430      for (i = 1; i < file_len; i++)
431        {
432          delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
433          if (delta < max_delta)
434            max_delta = delta;
435        }
436
437      is_match = TRUE;
438      for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
439        {
440          apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
441          if (contains_eol(chunk))
442            break;
443
444          for (i = 1; i < file_len; i++)
445            if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
446              {
447                is_match = FALSE;
448                break;
449              }
450
451          if (! is_match)
452            break;
453        }
454
455      if (delta /* > 0*/)
456        {
457          /* We either found a mismatch or an EOL at or shortly behind curp+delta
458           * or we cannot proceed with chunky ops without exceeding endp.
459           * In any way, everything up to curp + delta is equal and not an EOL.
460           */
461          for (i = 0; i < file_len; i++)
462            file[i].curp += delta;
463
464          /* Skipped data without EOL markers, so last char was not a CR. */
465          had_cr = FALSE;
466        }
467#endif
468
469      *reached_one_eof = is_one_at_eof(file, file_len);
470      if (*reached_one_eof)
471        break;
472      else
473        for (i = 1, is_match = TRUE; i < file_len; i++)
474          is_match = is_match && *file[0].curp == *file[i].curp;
475    }
476
477  if (had_cr)
478    {
479      /* Check if we ended in the middle of a \r\n for one file, but \r for
480         another. If so, back up one byte, so the next loop will back up
481         the entire line. Also decrement lines, since we counted one
482         too many for the \r. */
483      svn_boolean_t ended_at_nonmatching_newline = FALSE;
484      for (i = 0; i < file_len; i++)
485        if (file[i].curp < file[i].endp)
486          ended_at_nonmatching_newline = ended_at_nonmatching_newline
487                                         || *file[i].curp == '\n';
488      if (ended_at_nonmatching_newline)
489        {
490          lines--;
491          DECREMENT_POINTERS(file, file_len, pool);
492        }
493    }
494
495  /* Back up one byte, so we point at the last identical byte */
496  DECREMENT_POINTERS(file, file_len, pool);
497
498  /* Back up to the last eol sequence (\n, \r\n or \r) */
499  while (!is_one_at_bof(file, file_len) &&
500         *file[0].curp != '\n' && *file[0].curp != '\r')
501    DECREMENT_POINTERS(file, file_len, pool);
502
503  /* Slide one byte forward, to point past the eol sequence */
504  INCREMENT_POINTERS(file, file_len, pool);
505
506  *prefix_lines = lines;
507
508  return SVN_NO_ERROR;
509}
510
511
512/* The number of identical suffix lines to keep with the middle section. These
513 * lines are not eliminated as suffix, and can be picked up by the token
514 * parsing and lcs steps. This is mainly for backward compatibility with
515 * the previous diff (and blame) output (if there are multiple diff solutions,
516 * our lcs algorithm prefers taking common lines from the start, rather than
517 * from the end. By giving it back some suffix lines, we give it some wiggle
518 * room to find the exact same diff as before).
519 *
520 * The number 50 is more or less arbitrary, based on some real-world tests
521 * with big files (and then doubling the required number to be on the safe
522 * side). This has a negligible effect on the power of the optimization. */
523/* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
524#ifndef SUFFIX_LINES_TO_KEEP
525#define SUFFIX_LINES_TO_KEEP 50
526#endif
527
528/* Find the suffix which is identical between all elements of the FILE array.
529 * Return the number of suffix lines in SUFFIX_LINES.
530 *
531 * Before this function is called the FILEs' pointers and chunks should be
532 * positioned right after the identical prefix (which is the case after
533 * find_identical_prefix), so we can determine where suffix scanning should
534 * ultimately stop. */
535static svn_error_t *
536find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
537                      apr_size_t file_len, apr_pool_t *pool)
538{
539  struct file_info file_for_suffix[4] = { { 0 }  };
540  apr_off_t length[4];
541  apr_off_t suffix_min_chunk0;
542  apr_off_t suffix_min_offset0;
543  apr_off_t min_file_size;
544  int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
545  svn_boolean_t is_match;
546  apr_off_t lines = 0;
547  svn_boolean_t had_nl;
548  apr_size_t i;
549
550  /* Initialize file_for_suffix[].
551     Read last chunk, position curp at last byte. */
552  for (i = 0; i < file_len; i++)
553    {
554      file_for_suffix[i].path = file[i].path;
555      file_for_suffix[i].file = file[i].file;
556      file_for_suffix[i].size = file[i].size;
557      file_for_suffix[i].chunk =
558        (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
559      length[i] = offset_in_chunk(file_for_suffix[i].size);
560      if (length[i] == 0)
561        {
562          /* last chunk is an empty chunk -> start at next-to-last chunk */
563          file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
564          length[i] = CHUNK_SIZE;
565        }
566
567      if (file_for_suffix[i].chunk == file[i].chunk)
568        {
569          /* Prefix ended in last chunk, so we can reuse the prefix buffer */
570          file_for_suffix[i].buffer = file[i].buffer;
571        }
572      else
573        {
574          /* There is at least more than 1 chunk,
575             so allocate full chunk size buffer */
576          file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
577          SVN_ERR(read_chunk(file_for_suffix[i].file,
578                             file_for_suffix[i].buffer, length[i],
579                             chunk_to_offset(file_for_suffix[i].chunk),
580                             pool));
581        }
582      file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
583      file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
584    }
585
586  /* Get the chunk and pointer offset (for file[0]) at which we should stop
587     scanning backward for the identical suffix, i.e. when we reach prefix. */
588  suffix_min_chunk0 = file[0].chunk;
589  suffix_min_offset0 = file[0].curp - file[0].buffer;
590
591  /* Compensate if other files are smaller than file[0] */
592  for (i = 1, min_file_size = file[0].size; i < file_len; i++)
593    if (file[i].size < min_file_size)
594      min_file_size = file[i].size;
595  if (file[0].size > min_file_size)
596    {
597      suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
598      suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
599    }
600
601  /* Scan backwards until mismatch or until we reach the prefix. */
602  for (i = 1, is_match = TRUE; i < file_len; i++)
603    is_match = is_match
604               && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
605  if (is_match && *file_for_suffix[0].curp != '\r'
606               && *file_for_suffix[0].curp != '\n')
607    /* Count an extra line for the last line not ending in an eol. */
608    lines++;
609
610  had_nl = FALSE;
611  while (is_match)
612    {
613      svn_boolean_t reached_prefix;
614#if SVN_UNALIGNED_ACCESS_IS_OK
615      /* Initialize the minimum pointer positions. */
616      const char *min_curp[4];
617      svn_boolean_t can_read_word;
618#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
619
620      /* ### TODO: see if we can take advantage of
621         diff options like ignore_eol_style or ignore_space. */
622      /* check for eol, and count */
623      if (*file_for_suffix[0].curp == '\n')
624        {
625          lines++;
626          had_nl = TRUE;
627        }
628      else if (*file_for_suffix[0].curp == '\r' && !had_nl)
629        {
630          lines++;
631        }
632      else
633        {
634          had_nl = FALSE;
635        }
636
637      DECREMENT_POINTERS(file_for_suffix, file_len, pool);
638
639#if SVN_UNALIGNED_ACCESS_IS_OK
640      for (i = 0; i < file_len; i++)
641        min_curp[i] = file_for_suffix[i].buffer;
642
643      /* If we are in the same chunk that contains the last part of the common
644         prefix, use the min_curp[0] pointer to make sure we don't get a
645         suffix that overlaps the already determined common prefix. */
646      if (file_for_suffix[0].chunk == suffix_min_chunk0)
647        min_curp[0] += suffix_min_offset0;
648
649      /* Scan quickly by reading with machine-word granularity. */
650      for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
651        can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
652                         > min_curp[i]);
653
654      while (can_read_word)
655        {
656          apr_uintptr_t chunk;
657
658          /* For each file curp is positioned at the current byte, but we
659             want to examine the current byte and the ones before the current
660             location as one machine word. */
661
662          chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
663                                             - sizeof(apr_uintptr_t));
664          if (contains_eol(chunk))
665            break;
666
667          for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
668            is_match = (chunk
669                           == *(const apr_uintptr_t *)
670                                    (file_for_suffix[i].curp + 1
671                                       - sizeof(apr_uintptr_t)));
672
673          if (! is_match)
674            break;
675
676          for (i = 0; i < file_len; i++)
677            {
678              file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
679              can_read_word = can_read_word
680                              && (  (file_for_suffix[i].curp + 1
681                                       - sizeof(apr_uintptr_t))
682                                  > min_curp[i]);
683            }
684
685          /* We skipped some bytes, so there are no closing EOLs */
686          had_nl = FALSE;
687        }
688
689      /* The > min_curp[i] check leaves at least one final byte for checking
690         in the non block optimized case below. */
691#endif
692
693      reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
694                       && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
695                          == suffix_min_offset0;
696      if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
697        break;
698
699      is_match = TRUE;
700      for (i = 1; i < file_len; i++)
701        is_match = is_match
702                   && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
703    }
704
705  /* Slide one byte forward, to point at the first byte of identical suffix */
706  INCREMENT_POINTERS(file_for_suffix, file_len, pool);
707
708  /* Slide forward until we find an eol sequence to add the rest of the line
709     we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
710     one file reaches its end. */
711  do
712    {
713      svn_boolean_t had_cr = FALSE;
714      while (!is_one_at_eof(file_for_suffix, file_len)
715             && *file_for_suffix[0].curp != '\n'
716             && *file_for_suffix[0].curp != '\r')
717        INCREMENT_POINTERS(file_for_suffix, file_len, pool);
718
719      /* Slide one or two more bytes, to point past the eol. */
720      if (!is_one_at_eof(file_for_suffix, file_len)
721          && *file_for_suffix[0].curp == '\r')
722        {
723          lines--;
724          had_cr = TRUE;
725          INCREMENT_POINTERS(file_for_suffix, file_len, pool);
726        }
727      if (!is_one_at_eof(file_for_suffix, file_len)
728          && *file_for_suffix[0].curp == '\n')
729        {
730          if (!had_cr)
731            lines--;
732          INCREMENT_POINTERS(file_for_suffix, file_len, pool);
733        }
734    }
735  while (!is_one_at_eof(file_for_suffix, file_len)
736         && suffix_lines_to_keep--);
737
738  if (is_one_at_eof(file_for_suffix, file_len))
739    lines = 0;
740
741  /* Save the final suffix information in the original file_info */
742  for (i = 0; i < file_len; i++)
743    {
744      file[i].suffix_start_chunk = file_for_suffix[i].chunk;
745      file[i].suffix_offset_in_chunk =
746        file_for_suffix[i].curp - file_for_suffix[i].buffer;
747    }
748
749  *suffix_lines = lines;
750
751  return SVN_NO_ERROR;
752}
753
754
755/* Let FILE stand for the array of file_info struct elements of BATON->files
756 * that are indexed by the elements of the DATASOURCE array.
757 * BATON's type is (svn_diff__file_baton_t *).
758 *
759 * For each file in the FILE array, open the file at FILE.path; initialize
760 * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
761 * buffer and read the first chunk.  Then find the prefix and suffix lines
762 * which are identical between all the files.  Return the number of identical
763 * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
764 * SUFFIX_LINES.
765 *
766 * Finding the identical prefix and suffix allows us to exclude those from the
767 * rest of the diff algorithm, which increases performance by reducing the
768 * problem space.
769 *
770 * Implements svn_diff_fns2_t::datasources_open. */
771static svn_error_t *
772datasources_open(void *baton,
773                 apr_off_t *prefix_lines,
774                 apr_off_t *suffix_lines,
775                 const svn_diff_datasource_e *datasources,
776                 apr_size_t datasources_len)
777{
778  svn_diff__file_baton_t *file_baton = baton;
779  struct file_info files[4];
780  apr_finfo_t finfo[4];
781  apr_off_t length[4];
782#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
783  svn_boolean_t reached_one_eof;
784#endif
785  apr_size_t i;
786
787  /* Make sure prefix_lines and suffix_lines are set correctly, even if we
788   * exit early because one of the files is empty. */
789  *prefix_lines = 0;
790  *suffix_lines = 0;
791
792  /* Open datasources and read first chunk */
793  for (i = 0; i < datasources_len; i++)
794    {
795      struct file_info *file
796          = &file_baton->files[datasource_to_index(datasources[i])];
797      SVN_ERR(svn_io_file_open(&file->file, file->path,
798                               APR_READ, APR_OS_DEFAULT, file_baton->pool));
799      SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
800                                   file->file, file_baton->pool));
801      file->size = finfo[i].size;
802      length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
803      file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
804      SVN_ERR(read_chunk(file->file, file->buffer,
805                         length[i], 0, file_baton->pool));
806      file->endp = file->buffer + length[i];
807      file->curp = file->buffer;
808      /* Set suffix_start_chunk to a guard value, so if suffix scanning is
809       * skipped because one of the files is empty, or because of
810       * reached_one_eof, we can still easily check for the suffix during
811       * token reading (datasource_get_next_token). */
812      file->suffix_start_chunk = -1;
813
814      files[i] = *file;
815    }
816
817  for (i = 0; i < datasources_len; i++)
818    if (length[i] == 0)
819      /* There will not be any identical prefix/suffix, so we're done. */
820      return SVN_NO_ERROR;
821
822#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
823
824  SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
825                                files, datasources_len, file_baton->pool));
826
827  if (!reached_one_eof)
828    /* No file consisted totally of identical prefix,
829     * so there may be some identical suffix.  */
830    SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
831                                  file_baton->pool));
832
833#endif
834
835  /* Copy local results back to baton. */
836  for (i = 0; i < datasources_len; i++)
837    file_baton->files[datasource_to_index(datasources[i])] = files[i];
838
839  return SVN_NO_ERROR;
840}
841
842
843/* Implements svn_diff_fns2_t::datasource_close */
844static svn_error_t *
845datasource_close(void *baton, svn_diff_datasource_e datasource)
846{
847  /* Do nothing.  The compare_token function needs previous datasources
848   * to stay available until all datasources are processed.
849   */
850
851  return SVN_NO_ERROR;
852}
853
854/* Implements svn_diff_fns2_t::datasource_get_next_token */
855static svn_error_t *
856datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
857                          svn_diff_datasource_e datasource)
858{
859  svn_diff__file_baton_t *file_baton = baton;
860  svn_diff__file_token_t *file_token;
861  struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
862  char *endp;
863  char *curp;
864  char *eol;
865  apr_off_t last_chunk;
866  apr_off_t length;
867  apr_uint32_t h = 0;
868  /* Did the last chunk end in a CR character? */
869  svn_boolean_t had_cr = FALSE;
870
871  *token = NULL;
872
873  curp = file->curp;
874  endp = file->endp;
875
876  last_chunk = offset_to_chunk(file->size);
877
878  /* Are we already at the end of a chunk? */
879  if (curp == endp)
880    {
881      /* Are we at EOF */
882      if (last_chunk == file->chunk)
883        return SVN_NO_ERROR; /* EOF */
884
885      /* Or right before an identical suffix in the next chunk? */
886      if (file->chunk + 1 == file->suffix_start_chunk
887          && file->suffix_offset_in_chunk == 0)
888        return SVN_NO_ERROR;
889    }
890
891  /* Stop when we encounter the identical suffix. If suffix scanning was not
892   * performed, suffix_start_chunk will be -1, so this condition will never
893   * be true. */
894  if (file->chunk == file->suffix_start_chunk
895      && (curp - file->buffer) == file->suffix_offset_in_chunk)
896    return SVN_NO_ERROR;
897
898  /* Allocate a new token, or fetch one from the "reusable tokens" list. */
899  file_token = file_baton->tokens;
900  if (file_token)
901    {
902      file_baton->tokens = file_token->next;
903    }
904  else
905    {
906      file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
907    }
908
909  file_token->datasource = datasource;
910  file_token->offset = chunk_to_offset(file->chunk)
911                       + (curp - file->buffer);
912  file_token->norm_offset = file_token->offset;
913  file_token->raw_length = 0;
914  file_token->length = 0;
915
916  while (1)
917    {
918      eol = svn_eol__find_eol_start(curp, endp - curp);
919      if (eol)
920        {
921          had_cr = (*eol == '\r');
922          eol++;
923          /* If we have the whole eol sequence in the chunk... */
924          if (!(had_cr && eol == endp))
925            {
926              /* Also skip past the '\n' in an '\r\n' sequence. */
927              if (had_cr && *eol == '\n')
928                eol++;
929              break;
930            }
931        }
932
933      if (file->chunk == last_chunk)
934        {
935          eol = endp;
936          break;
937        }
938
939      length = endp - curp;
940      file_token->raw_length += length;
941      {
942        char *c = curp;
943
944        svn_diff__normalize_buffer(&c, &length,
945                                   &file->normalize_state,
946                                   curp, file_baton->options);
947        if (file_token->length == 0)
948          {
949            /* When we are reading the first part of the token, move the
950               normalized offset past leading ignored characters, if any. */
951            file_token->norm_offset += (c - curp);
952          }
953        file_token->length += length;
954        h = svn__adler32(h, c, length);
955      }
956
957      curp = endp = file->buffer;
958      file->chunk++;
959      length = file->chunk == last_chunk ?
960        offset_in_chunk(file->size) : CHUNK_SIZE;
961      endp += length;
962      file->endp = endp;
963
964      /* Issue #4283: Normally we should have checked for reaching the skipped
965         suffix here, but because we assume that a suffix always starts on a
966         line and token boundary we rely on catching the suffix earlier in this
967         function.
968
969         When changing things here, make sure the whitespace settings are
970         applied, or we might not reach the exact suffix boundary as token
971         boundary. */
972      SVN_ERR(read_chunk(file->file,
973                         curp, length,
974                         chunk_to_offset(file->chunk),
975                         file_baton->pool));
976
977      /* If the last chunk ended in a CR, we're done. */
978      if (had_cr)
979        {
980          eol = curp;
981          if (*curp == '\n')
982            ++eol;
983          break;
984        }
985    }
986
987  length = eol - curp;
988  file_token->raw_length += length;
989  file->curp = eol;
990
991  /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
992   * with a spurious empty token.  Avoid returning it.
993   * Note that we use the unnormalized length; we don't want a line containing
994   * only spaces (and no trailing newline) to appear like a non-existent
995   * line. */
996  if (file_token->raw_length > 0)
997    {
998      char *c = curp;
999      svn_diff__normalize_buffer(&c, &length,
1000                                 &file->normalize_state,
1001                                 curp, file_baton->options);
1002      if (file_token->length == 0)
1003        {
1004          /* When we are reading the first part of the token, move the
1005             normalized offset past leading ignored characters, if any. */
1006          file_token->norm_offset += (c - curp);
1007        }
1008
1009      file_token->length += length;
1010
1011      *hash = svn__adler32(h, c, length);
1012      *token = file_token;
1013    }
1014
1015  return SVN_NO_ERROR;
1016}
1017
1018#define COMPARE_CHUNK_SIZE 4096
1019
1020/* Implements svn_diff_fns2_t::token_compare */
1021static svn_error_t *
1022token_compare(void *baton, void *token1, void *token2, int *compare)
1023{
1024  svn_diff__file_baton_t *file_baton = baton;
1025  svn_diff__file_token_t *file_token[2];
1026  char buffer[2][COMPARE_CHUNK_SIZE];
1027  char *bufp[2];
1028  apr_off_t offset[2];
1029  struct file_info *file[2];
1030  apr_off_t length[2];
1031  apr_off_t total_length;
1032  /* How much is left to read of each token from the file. */
1033  apr_off_t raw_length[2];
1034  int i;
1035  svn_diff__normalize_state_t state[2];
1036
1037  file_token[0] = token1;
1038  file_token[1] = token2;
1039  if (file_token[0]->length < file_token[1]->length)
1040    {
1041      *compare = -1;
1042      return SVN_NO_ERROR;
1043    }
1044
1045  if (file_token[0]->length > file_token[1]->length)
1046    {
1047      *compare = 1;
1048      return SVN_NO_ERROR;
1049    }
1050
1051  total_length = file_token[0]->length;
1052  if (total_length == 0)
1053    {
1054      *compare = 0;
1055      return SVN_NO_ERROR;
1056    }
1057
1058  for (i = 0; i < 2; ++i)
1059    {
1060      int idx = datasource_to_index(file_token[i]->datasource);
1061
1062      file[i] = &file_baton->files[idx];
1063      offset[i] = file_token[i]->norm_offset;
1064      state[i] = svn_diff__normalize_state_normal;
1065
1066      if (offset_to_chunk(offset[i]) == file[i]->chunk)
1067        {
1068          /* If the start of the token is in memory, the entire token is
1069           * in memory.
1070           */
1071          bufp[i] = file[i]->buffer;
1072          bufp[i] += offset_in_chunk(offset[i]);
1073
1074          length[i] = total_length;
1075          raw_length[i] = 0;
1076        }
1077      else
1078        {
1079          apr_off_t skipped;
1080
1081          length[i] = 0;
1082
1083          /* When we skipped the first part of the token via the whitespace
1084             normalization we must reduce the raw length of the token */
1085          skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1086
1087          raw_length[i] = file_token[i]->raw_length - skipped;
1088        }
1089    }
1090
1091  do
1092    {
1093      apr_off_t len;
1094      for (i = 0; i < 2; i++)
1095        {
1096          if (length[i] == 0)
1097            {
1098              /* Error if raw_length is 0, that's an unexpected change
1099               * of the file that can happen when ingoring whitespace
1100               * and that can lead to an infinite loop. */
1101              if (raw_length[i] == 0)
1102                return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1103                                         NULL,
1104                                         _("The file '%s' changed unexpectedly"
1105                                           " during diff"),
1106                                         file[i]->path);
1107
1108              /* Read a chunk from disk into a buffer */
1109              bufp[i] = buffer[i];
1110              length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1111                COMPARE_CHUNK_SIZE : raw_length[i];
1112
1113              SVN_ERR(read_chunk(file[i]->file,
1114                                 bufp[i], length[i], offset[i],
1115                                 file_baton->pool));
1116              offset[i] += length[i];
1117              raw_length[i] -= length[i];
1118              /* bufp[i] gets reset to buffer[i] before reading each chunk,
1119                 so, overwriting it isn't a problem */
1120              svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1121                                         bufp[i], file_baton->options);
1122
1123              /* assert(length[i] == file_token[i]->length); */
1124            }
1125        }
1126
1127      len = length[0] > length[1] ? length[1] : length[0];
1128
1129      /* Compare two chunks (that could be entire tokens if they both reside
1130       * in memory).
1131       */
1132      *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1133      if (*compare != 0)
1134        return SVN_NO_ERROR;
1135
1136      total_length -= len;
1137      length[0] -= len;
1138      length[1] -= len;
1139      bufp[0] += len;
1140      bufp[1] += len;
1141    }
1142  while(total_length > 0);
1143
1144  *compare = 0;
1145  return SVN_NO_ERROR;
1146}
1147
1148
1149/* Implements svn_diff_fns2_t::token_discard */
1150static void
1151token_discard(void *baton, void *token)
1152{
1153  svn_diff__file_baton_t *file_baton = baton;
1154  svn_diff__file_token_t *file_token = token;
1155
1156  /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1157  file_token->next = file_baton->tokens;
1158  file_baton->tokens = file_token;
1159}
1160
1161
1162/* Implements svn_diff_fns2_t::token_discard_all */
1163static void
1164token_discard_all(void *baton)
1165{
1166  svn_diff__file_baton_t *file_baton = baton;
1167
1168  /* Discard all memory in use by the tokens, and close all open files. */
1169  svn_pool_clear(file_baton->pool);
1170}
1171
1172
1173static const svn_diff_fns2_t svn_diff__file_vtable =
1174{
1175  datasources_open,
1176  datasource_close,
1177  datasource_get_next_token,
1178  token_compare,
1179  token_discard,
1180  token_discard_all
1181};
1182
1183/* Id for the --ignore-eol-style option, which doesn't have a short name. */
1184#define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1185
1186/* Options supported by svn_diff_file_options_parse(). */
1187static const apr_getopt_option_t diff_options[] =
1188{
1189  { "ignore-space-change", 'b', 0, NULL },
1190  { "ignore-all-space", 'w', 0, NULL },
1191  { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1192  { "show-c-function", 'p', 0, NULL },
1193  /* ### For compatibility; we don't support the argument to -u, because
1194   * ### we don't have optional argument support. */
1195  { "unified", 'u', 0, NULL },
1196  { "context", 'U', 1, NULL },
1197  { NULL, 0, 0, NULL }
1198};
1199
1200svn_diff_file_options_t *
1201svn_diff_file_options_create(apr_pool_t *pool)
1202{
1203  svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts));
1204
1205  opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1206
1207  return opts;
1208}
1209
1210/* A baton for use with opt_parsing_error_func(). */
1211struct opt_parsing_error_baton_t
1212{
1213  svn_error_t *err;
1214  apr_pool_t *pool;
1215};
1216
1217/* Store an error message from apr_getopt_long().  Set BATON->err to a new
1218 * error with a message generated from FMT and the remaining arguments.
1219 * Implements apr_getopt_err_fn_t. */
1220static void
1221opt_parsing_error_func(void *baton,
1222                       const char *fmt, ...)
1223{
1224  struct opt_parsing_error_baton_t *b = baton;
1225  const char *message;
1226  va_list ap;
1227
1228  va_start(ap, fmt);
1229  message = apr_pvsprintf(b->pool, fmt, ap);
1230  va_end(ap);
1231
1232  /* Skip leading ": " (if present, which it always is in known cases). */
1233  if (strncmp(message, ": ", 2) == 0)
1234    message += 2;
1235
1236  b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1237}
1238
1239svn_error_t *
1240svn_diff_file_options_parse(svn_diff_file_options_t *options,
1241                            const apr_array_header_t *args,
1242                            apr_pool_t *pool)
1243{
1244  apr_getopt_t *os;
1245  struct opt_parsing_error_baton_t opt_parsing_error_baton;
1246  /* Make room for each option (starting at index 1) plus trailing NULL. */
1247  const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2));
1248
1249  opt_parsing_error_baton.err = NULL;
1250  opt_parsing_error_baton.pool = pool;
1251
1252  argv[0] = "";
1253  memcpy(argv + 1, args->elts, sizeof(char*) * args->nelts);
1254  argv[args->nelts + 1] = NULL;
1255
1256  apr_getopt_init(&os, pool, args->nelts + 1, argv);
1257
1258  /* Capture any error message from apr_getopt_long().  This will typically
1259   * say which option is wrong, which we would not otherwise know. */
1260  os->errfn = opt_parsing_error_func;
1261  os->errarg = &opt_parsing_error_baton;
1262
1263  while (1)
1264    {
1265      const char *opt_arg;
1266      int opt_id;
1267      apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1268
1269      if (APR_STATUS_IS_EOF(err))
1270        break;
1271      if (err)
1272        /* Wrap apr_getopt_long()'s error message.  Its doc string implies
1273         * it always will produce one, but never mind if it doesn't.  Avoid
1274         * using the message associated with the return code ERR, because
1275         * it refers to the "command line" which may be misleading here. */
1276        return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1277                                opt_parsing_error_baton.err,
1278                                _("Error in options to internal diff"));
1279
1280      switch (opt_id)
1281        {
1282        case 'b':
1283          /* -w takes precedence over -b. */
1284          if (! options->ignore_space)
1285            options->ignore_space = svn_diff_file_ignore_space_change;
1286          break;
1287        case 'w':
1288          options->ignore_space = svn_diff_file_ignore_space_all;
1289          break;
1290        case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1291          options->ignore_eol_style = TRUE;
1292          break;
1293        case 'p':
1294          options->show_c_function = TRUE;
1295          break;
1296        case 'U':
1297          SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg));
1298          break;
1299        default:
1300          break;
1301        }
1302    }
1303
1304  /* Check for spurious arguments. */
1305  if (os->ind < os->argc)
1306    return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1307                             _("Invalid argument '%s' in diff options"),
1308                             os->argv[os->ind]);
1309
1310  return SVN_NO_ERROR;
1311}
1312
1313svn_error_t *
1314svn_diff_file_diff_2(svn_diff_t **diff,
1315                     const char *original,
1316                     const char *modified,
1317                     const svn_diff_file_options_t *options,
1318                     apr_pool_t *pool)
1319{
1320  svn_diff__file_baton_t baton = { 0 };
1321
1322  baton.options = options;
1323  baton.files[0].path = original;
1324  baton.files[1].path = modified;
1325  baton.pool = svn_pool_create(pool);
1326
1327  SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1328
1329  svn_pool_destroy(baton.pool);
1330  return SVN_NO_ERROR;
1331}
1332
1333svn_error_t *
1334svn_diff_file_diff3_2(svn_diff_t **diff,
1335                      const char *original,
1336                      const char *modified,
1337                      const char *latest,
1338                      const svn_diff_file_options_t *options,
1339                      apr_pool_t *pool)
1340{
1341  svn_diff__file_baton_t baton = { 0 };
1342
1343  baton.options = options;
1344  baton.files[0].path = original;
1345  baton.files[1].path = modified;
1346  baton.files[2].path = latest;
1347  baton.pool = svn_pool_create(pool);
1348
1349  SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1350
1351  svn_pool_destroy(baton.pool);
1352  return SVN_NO_ERROR;
1353}
1354
1355svn_error_t *
1356svn_diff_file_diff4_2(svn_diff_t **diff,
1357                      const char *original,
1358                      const char *modified,
1359                      const char *latest,
1360                      const char *ancestor,
1361                      const svn_diff_file_options_t *options,
1362                      apr_pool_t *pool)
1363{
1364  svn_diff__file_baton_t baton = { 0 };
1365
1366  baton.options = options;
1367  baton.files[0].path = original;
1368  baton.files[1].path = modified;
1369  baton.files[2].path = latest;
1370  baton.files[3].path = ancestor;
1371  baton.pool = svn_pool_create(pool);
1372
1373  SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1374
1375  svn_pool_destroy(baton.pool);
1376  return SVN_NO_ERROR;
1377}
1378
1379
1380/** Display unified context diffs **/
1381
1382/* Maximum length of the extra context to show when show_c_function is set.
1383 * GNU diff uses 40, let's be brave and use 50 instead. */
1384#define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1385typedef struct svn_diff__file_output_baton_t
1386{
1387  svn_stream_t *output_stream;
1388  const char *header_encoding;
1389
1390  /* Cached markers, in header_encoding. */
1391  const char *context_str;
1392  const char *delete_str;
1393  const char *insert_str;
1394
1395  const char *path[2];
1396  apr_file_t *file[2];
1397
1398  apr_off_t   current_line[2];
1399
1400  char        buffer[2][4096];
1401  apr_size_t  length[2];
1402  char       *curp[2];
1403
1404  apr_off_t   hunk_start[2];
1405  apr_off_t   hunk_length[2];
1406  svn_stringbuf_t *hunk;
1407
1408  /* Should we emit C functions in the unified diff header */
1409  svn_boolean_t show_c_function;
1410  /* Extra strings to skip over if we match. */
1411  apr_array_header_t *extra_skip_match;
1412  /* "Context" to append to the @@ line when the show_c_function option
1413   * is set. */
1414  svn_stringbuf_t *extra_context;
1415  /* Extra context for the current hunk. */
1416  char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1417
1418  int context_size;
1419
1420  apr_pool_t *pool;
1421} svn_diff__file_output_baton_t;
1422
1423typedef enum svn_diff__file_output_unified_type_e
1424{
1425  svn_diff__file_output_unified_skip,
1426  svn_diff__file_output_unified_context,
1427  svn_diff__file_output_unified_delete,
1428  svn_diff__file_output_unified_insert
1429} svn_diff__file_output_unified_type_e;
1430
1431
1432static svn_error_t *
1433output_unified_line(svn_diff__file_output_baton_t *baton,
1434                    svn_diff__file_output_unified_type_e type, int idx)
1435{
1436  char *curp;
1437  char *eol;
1438  apr_size_t length;
1439  svn_error_t *err;
1440  svn_boolean_t bytes_processed = FALSE;
1441  svn_boolean_t had_cr = FALSE;
1442  /* Are we collecting extra context? */
1443  svn_boolean_t collect_extra = FALSE;
1444
1445  length = baton->length[idx];
1446  curp = baton->curp[idx];
1447
1448  /* Lazily update the current line even if we're at EOF.
1449   * This way we fake output of context at EOF
1450   */
1451  baton->current_line[idx]++;
1452
1453  if (length == 0 && apr_file_eof(baton->file[idx]))
1454    {
1455      return SVN_NO_ERROR;
1456    }
1457
1458  do
1459    {
1460      if (length > 0)
1461        {
1462          if (!bytes_processed)
1463            {
1464              switch (type)
1465                {
1466                case svn_diff__file_output_unified_context:
1467                  svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1468                  baton->hunk_length[0]++;
1469                  baton->hunk_length[1]++;
1470                  break;
1471                case svn_diff__file_output_unified_delete:
1472                  svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1473                  baton->hunk_length[0]++;
1474                  break;
1475                case svn_diff__file_output_unified_insert:
1476                  svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1477                  baton->hunk_length[1]++;
1478                  break;
1479                default:
1480                  break;
1481                }
1482
1483              if (baton->show_c_function
1484                  && (type == svn_diff__file_output_unified_skip
1485                      || type == svn_diff__file_output_unified_context)
1486                  && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1487                  && !svn_cstring_match_glob_list(curp,
1488                                                  baton->extra_skip_match))
1489                {
1490                  svn_stringbuf_setempty(baton->extra_context);
1491                  collect_extra = TRUE;
1492                }
1493            }
1494
1495          eol = svn_eol__find_eol_start(curp, length);
1496
1497          if (eol != NULL)
1498            {
1499              apr_size_t len;
1500
1501              had_cr = (*eol == '\r');
1502              eol++;
1503              len = (apr_size_t)(eol - curp);
1504
1505              if (! had_cr || len < length)
1506                {
1507                  if (had_cr && *eol == '\n')
1508                    {
1509                      ++eol;
1510                      ++len;
1511                    }
1512
1513                  length -= len;
1514
1515                  if (type != svn_diff__file_output_unified_skip)
1516                    {
1517                      svn_stringbuf_appendbytes(baton->hunk, curp, len);
1518                    }
1519                  if (collect_extra)
1520                    {
1521                      svn_stringbuf_appendbytes(baton->extra_context,
1522                                                curp, len);
1523                    }
1524
1525                  baton->curp[idx] = eol;
1526                  baton->length[idx] = length;
1527
1528                  err = SVN_NO_ERROR;
1529
1530                  break;
1531                }
1532            }
1533
1534          if (type != svn_diff__file_output_unified_skip)
1535            {
1536              svn_stringbuf_appendbytes(baton->hunk, curp, length);
1537            }
1538
1539          if (collect_extra)
1540            {
1541              svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1542            }
1543
1544          bytes_processed = TRUE;
1545        }
1546
1547      curp = baton->buffer[idx];
1548      length = sizeof(baton->buffer[idx]);
1549
1550      err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1551
1552      /* If the last chunk ended with a CR, we look for an LF at the start
1553         of this chunk. */
1554      if (had_cr)
1555        {
1556          if (! err && length > 0 && *curp == '\n')
1557            {
1558              if (type != svn_diff__file_output_unified_skip)
1559                {
1560                  svn_stringbuf_appendbyte(baton->hunk, *curp);
1561                }
1562              /* We don't append the LF to extra_context, since it would
1563               * just be stripped anyway. */
1564              ++curp;
1565              --length;
1566            }
1567
1568          baton->curp[idx] = curp;
1569          baton->length[idx] = length;
1570
1571          break;
1572        }
1573    }
1574  while (! err);
1575
1576  if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1577    return err;
1578
1579  if (err && APR_STATUS_IS_EOF(err->apr_err))
1580    {
1581      svn_error_clear(err);
1582      /* Special case if we reach the end of file AND the last line is in the
1583         changed range AND the file doesn't end with a newline */
1584      if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1585          && ! had_cr)
1586        {
1587          SVN_ERR(svn_diff__unified_append_no_newline_msg(
1588                    baton->hunk, baton->header_encoding, baton->pool));
1589        }
1590
1591      baton->length[idx] = 0;
1592    }
1593
1594  return SVN_NO_ERROR;
1595}
1596
1597static APR_INLINE svn_error_t *
1598output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1599                          int source,
1600                          svn_diff__file_output_unified_type_e type,
1601                          apr_off_t until)
1602{
1603  while (output_baton->current_line[source] < until)
1604    {
1605      SVN_ERR(output_unified_line(output_baton, type, source));
1606    }
1607  return SVN_NO_ERROR;
1608}
1609
1610static svn_error_t *
1611output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1612{
1613  apr_off_t target_line;
1614  apr_size_t hunk_len;
1615  apr_off_t old_start;
1616  apr_off_t new_start;
1617
1618  if (svn_stringbuf_isempty(baton->hunk))
1619    {
1620      /* Nothing to flush */
1621      return SVN_NO_ERROR;
1622    }
1623
1624  target_line = baton->hunk_start[0] + baton->hunk_length[0]
1625                + baton->context_size;
1626
1627  /* Add trailing context to the hunk */
1628  SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1629                                    svn_diff__file_output_unified_context,
1630                                    target_line));
1631
1632  old_start = baton->hunk_start[0];
1633  new_start = baton->hunk_start[1];
1634
1635  /* If the file is non-empty, convert the line indexes from
1636     zero based to one based */
1637  if (baton->hunk_length[0])
1638    old_start++;
1639  if (baton->hunk_length[1])
1640    new_start++;
1641
1642  /* Write the hunk header */
1643  SVN_ERR(svn_diff__unified_write_hunk_header(
1644            baton->output_stream, baton->header_encoding, "@@",
1645            old_start, baton->hunk_length[0],
1646            new_start, baton->hunk_length[1],
1647            baton->hunk_extra_context,
1648            baton->pool));
1649
1650  /* Output the hunk content */
1651  hunk_len = baton->hunk->len;
1652  SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1653                           &hunk_len));
1654
1655  /* Prepare for the next hunk */
1656  baton->hunk_length[0] = 0;
1657  baton->hunk_length[1] = 0;
1658  baton->hunk_start[0] = 0;
1659  baton->hunk_start[1] = 0;
1660  svn_stringbuf_setempty(baton->hunk);
1661
1662  return SVN_NO_ERROR;
1663}
1664
1665static svn_error_t *
1666output_unified_diff_modified(void *baton,
1667  apr_off_t original_start, apr_off_t original_length,
1668  apr_off_t modified_start, apr_off_t modified_length,
1669  apr_off_t latest_start, apr_off_t latest_length)
1670{
1671  svn_diff__file_output_baton_t *output_baton = baton;
1672  apr_off_t context_prefix_length;
1673  apr_off_t prev_context_end;
1674  svn_boolean_t init_hunk = FALSE;
1675
1676  if (original_start > output_baton->context_size)
1677    context_prefix_length = output_baton->context_size;
1678  else
1679    context_prefix_length = original_start;
1680
1681  /* Calculate where the previous hunk will end if we would write it now
1682     (including the necessary context at the end) */
1683  if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1684    {
1685      prev_context_end = output_baton->hunk_start[0]
1686                         + output_baton->hunk_length[0]
1687                         + output_baton->context_size;
1688    }
1689  else
1690    {
1691      prev_context_end = -1;
1692
1693      if (output_baton->hunk_start[0] == 0
1694          && (original_length > 0 || modified_length > 0))
1695        init_hunk = TRUE;
1696    }
1697
1698  /* If the changed range is far enough from the previous range, flush the current
1699     hunk. */
1700  {
1701    apr_off_t new_hunk_start = (original_start - context_prefix_length);
1702
1703    if (output_baton->current_line[0] < new_hunk_start
1704          && prev_context_end <= new_hunk_start)
1705      {
1706        SVN_ERR(output_unified_flush_hunk(output_baton));
1707        init_hunk = TRUE;
1708      }
1709    else if (output_baton->hunk_length[0] > 0
1710             || output_baton->hunk_length[1] > 0)
1711      {
1712        /* We extend the current hunk */
1713
1714
1715        /* Original: Output the context preceding the changed range */
1716        SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1717                                          svn_diff__file_output_unified_context,
1718                                          original_start));
1719      }
1720  }
1721
1722  /* Original: Skip lines until we are at the beginning of the context we want
1723     to display */
1724  SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1725                                    svn_diff__file_output_unified_skip,
1726                                    original_start - context_prefix_length));
1727
1728  /* Note that the above skip stores data for the show_c_function support below */
1729
1730  if (init_hunk)
1731    {
1732      SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1733                     && output_baton->hunk_length[1] == 0);
1734
1735      output_baton->hunk_start[0] = original_start - context_prefix_length;
1736      output_baton->hunk_start[1] = modified_start - context_prefix_length;
1737    }
1738
1739  if (init_hunk && output_baton->show_c_function)
1740    {
1741      apr_size_t p;
1742      const char *invalid_character;
1743
1744      /* Save the extra context for later use.
1745       * Note that the last byte of the hunk_extra_context array is never
1746       * touched after it is zero-initialized, so the array is always
1747       * 0-terminated. */
1748      strncpy(output_baton->hunk_extra_context,
1749              output_baton->extra_context->data,
1750              SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1751      /* Trim whitespace at the end, most notably to get rid of any
1752       * newline characters. */
1753      p = strlen(output_baton->hunk_extra_context);
1754      while (p > 0
1755             && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1756        {
1757          output_baton->hunk_extra_context[--p] = '\0';
1758        }
1759      invalid_character =
1760        svn_utf__last_valid(output_baton->hunk_extra_context,
1761                            SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1762      for (p = invalid_character - output_baton->hunk_extra_context;
1763           p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1764        {
1765          output_baton->hunk_extra_context[p] = '\0';
1766        }
1767    }
1768
1769  /* Modified: Skip lines until we are at the start of the changed range */
1770  SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1771                                    svn_diff__file_output_unified_skip,
1772                                    modified_start));
1773
1774  /* Original: Output the context preceding the changed range */
1775  SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1776                                    svn_diff__file_output_unified_context,
1777                                    original_start));
1778
1779  /* Both: Output the changed range */
1780  SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1781                                    svn_diff__file_output_unified_delete,
1782                                    original_start + original_length));
1783  SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1784                                    svn_diff__file_output_unified_insert,
1785                                    modified_start + modified_length));
1786
1787  return SVN_NO_ERROR;
1788}
1789
1790/* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1791static svn_error_t *
1792output_unified_default_hdr(const char **header, const char *path,
1793                           apr_pool_t *pool)
1794{
1795  apr_finfo_t file_info;
1796  apr_time_exp_t exploded_time;
1797  char time_buffer[64];
1798  apr_size_t time_len;
1799  const char *utf8_timestr;
1800
1801  SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1802  apr_time_exp_lt(&exploded_time, file_info.mtime);
1803
1804  apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1805  /* Order of date components can be different in different languages */
1806               _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1807
1808  SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1809
1810  *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1811
1812  return SVN_NO_ERROR;
1813}
1814
1815static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1816{
1817  NULL, /* output_common */
1818  output_unified_diff_modified,
1819  NULL, /* output_diff_latest */
1820  NULL, /* output_diff_common */
1821  NULL  /* output_conflict */
1822};
1823
1824svn_error_t *
1825svn_diff_file_output_unified4(svn_stream_t *output_stream,
1826                              svn_diff_t *diff,
1827                              const char *original_path,
1828                              const char *modified_path,
1829                              const char *original_header,
1830                              const char *modified_header,
1831                              const char *header_encoding,
1832                              const char *relative_to_dir,
1833                              svn_boolean_t show_c_function,
1834                              int context_size,
1835                              svn_cancel_func_t cancel_func,
1836                              void *cancel_baton,
1837                              apr_pool_t *pool)
1838{
1839  if (svn_diff_contains_diffs(diff))
1840    {
1841      svn_diff__file_output_baton_t baton;
1842      int i;
1843
1844      memset(&baton, 0, sizeof(baton));
1845      baton.output_stream = output_stream;
1846      baton.pool = pool;
1847      baton.header_encoding = header_encoding;
1848      baton.path[0] = original_path;
1849      baton.path[1] = modified_path;
1850      baton.hunk = svn_stringbuf_create_empty(pool);
1851      baton.show_c_function = show_c_function;
1852      baton.extra_context = svn_stringbuf_create_empty(pool);
1853      baton.context_size = (context_size >= 0) ? context_size
1854                                              : SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1855
1856      if (show_c_function)
1857        {
1858          baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1859
1860          APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1861          APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1862          APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1863        }
1864
1865      SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1866                                            header_encoding, pool));
1867      SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1868                                            header_encoding, pool));
1869      SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1870                                            header_encoding, pool));
1871
1872      if (relative_to_dir)
1873        {
1874          /* Possibly adjust the "original" and "modified" paths shown in
1875             the output (see issue #2723). */
1876          const char *child_path;
1877
1878          if (! original_header)
1879            {
1880              child_path = svn_dirent_is_child(relative_to_dir,
1881                                               original_path, pool);
1882              if (child_path)
1883                original_path = child_path;
1884              else
1885                return svn_error_createf(
1886                                   SVN_ERR_BAD_RELATIVE_PATH, NULL,
1887                                   _("Path '%s' must be inside "
1888                                     "the directory '%s'"),
1889                                   svn_dirent_local_style(original_path, pool),
1890                                   svn_dirent_local_style(relative_to_dir,
1891                                                          pool));
1892            }
1893
1894          if (! modified_header)
1895            {
1896              child_path = svn_dirent_is_child(relative_to_dir,
1897                                               modified_path, pool);
1898              if (child_path)
1899                modified_path = child_path;
1900              else
1901                return svn_error_createf(
1902                                   SVN_ERR_BAD_RELATIVE_PATH, NULL,
1903                                   _("Path '%s' must be inside "
1904                                     "the directory '%s'"),
1905                                   svn_dirent_local_style(modified_path, pool),
1906                                   svn_dirent_local_style(relative_to_dir,
1907                                                          pool));
1908            }
1909        }
1910
1911      for (i = 0; i < 2; i++)
1912        {
1913          SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1914                                   APR_READ, APR_OS_DEFAULT, pool));
1915        }
1916
1917      if (original_header == NULL)
1918        {
1919          SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1920                                             pool));
1921        }
1922
1923      if (modified_header == NULL)
1924        {
1925          SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1926                                             pool));
1927        }
1928
1929      SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1930                                             original_header, modified_header,
1931                                             pool));
1932
1933      SVN_ERR(svn_diff_output2(diff, &baton,
1934                               &svn_diff__file_output_unified_vtable,
1935                               cancel_func, cancel_baton));
1936      SVN_ERR(output_unified_flush_hunk(&baton));
1937
1938      for (i = 0; i < 2; i++)
1939        {
1940          SVN_ERR(svn_io_file_close(baton.file[i], pool));
1941        }
1942    }
1943
1944  return SVN_NO_ERROR;
1945}
1946
1947
1948/** Display diff3 **/
1949
1950/* A stream to remember *leading* context.  Note that this stream does
1951   *not* copy the data that it is remembering; it just saves
1952   *pointers! */
1953typedef struct context_saver_t {
1954  svn_stream_t *stream;
1955  int context_size;
1956  const char **data; /* const char *data[context_size] */
1957  apr_size_t *len;   /* apr_size_t len[context_size] */
1958  apr_size_t next_slot;
1959  apr_size_t total_written;
1960} context_saver_t;
1961
1962
1963static svn_error_t *
1964context_saver_stream_write(void *baton,
1965                           const char *data,
1966                           apr_size_t *len)
1967{
1968  context_saver_t *cs = baton;
1969
1970  if (cs->context_size > 0)
1971    {
1972      cs->data[cs->next_slot] = data;
1973      cs->len[cs->next_slot] = *len;
1974      cs->next_slot = (cs->next_slot + 1) % cs->context_size;
1975      cs->total_written++;
1976    }
1977  return SVN_NO_ERROR;
1978}
1979
1980typedef struct svn_diff3__file_output_baton_t
1981{
1982  svn_stream_t *output_stream;
1983
1984  const char *path[3];
1985
1986  apr_off_t   current_line[3];
1987
1988  char       *buffer[3];
1989  char       *endp[3];
1990  char       *curp[3];
1991
1992  /* The following four members are in the encoding used for the output. */
1993  const char *conflict_modified;
1994  const char *conflict_original;
1995  const char *conflict_separator;
1996  const char *conflict_latest;
1997
1998  const char *marker_eol;
1999
2000  svn_diff_conflict_display_style_t conflict_style;
2001  int context_size;
2002
2003  /* cancel support */
2004  svn_cancel_func_t cancel_func;
2005  void *cancel_baton;
2006
2007  /* The rest of the fields are for
2008     svn_diff_conflict_display_only_conflicts only.  Note that for
2009     these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
2010     (soon after a conflict) a "trailing context stream", never the
2011     actual output stream.*/
2012  /* The actual output stream. */
2013  svn_stream_t *real_output_stream;
2014  context_saver_t *context_saver;
2015  /* Used to allocate context_saver and trailing context streams, and
2016     for some printfs. */
2017  apr_pool_t *pool;
2018} svn_diff3__file_output_baton_t;
2019
2020static svn_error_t *
2021flush_context_saver(context_saver_t *cs,
2022                    svn_stream_t *output_stream)
2023{
2024  int i;
2025  for (i = 0; i < cs->context_size; i++)
2026    {
2027      apr_size_t slot = (i + cs->next_slot) % cs->context_size;
2028      if (cs->data[slot])
2029        {
2030          apr_size_t len = cs->len[slot];
2031          SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2032        }
2033    }
2034  return SVN_NO_ERROR;
2035}
2036
2037static void
2038make_context_saver(svn_diff3__file_output_baton_t *fob)
2039{
2040  context_saver_t *cs;
2041
2042  assert(fob->context_size > 0); /* Or nothing to save */
2043
2044  svn_pool_clear(fob->pool);
2045  cs = apr_pcalloc(fob->pool, sizeof(*cs));
2046  cs->stream = svn_stream_empty(fob->pool);
2047  svn_stream_set_baton(cs->stream, cs);
2048  svn_stream_set_write(cs->stream, context_saver_stream_write);
2049  fob->context_saver = cs;
2050  fob->output_stream = cs->stream;
2051  cs->context_size = fob->context_size;
2052  cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size);
2053  cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size);
2054}
2055
2056
2057/* A stream which prints LINES_TO_PRINT (based on context size) lines to
2058   BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2059   a context_saver; used for *trailing* context. */
2060
2061struct trailing_context_printer {
2062  apr_size_t lines_to_print;
2063  svn_diff3__file_output_baton_t *fob;
2064};
2065
2066
2067
2068static svn_error_t *
2069trailing_context_printer_write(void *baton,
2070                               const char *data,
2071                               apr_size_t *len)
2072{
2073  struct trailing_context_printer *tcp = baton;
2074  SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2075  SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2076  tcp->lines_to_print--;
2077  if (tcp->lines_to_print == 0)
2078    make_context_saver(tcp->fob);
2079  return SVN_NO_ERROR;
2080}
2081
2082
2083static void
2084make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2085{
2086  struct trailing_context_printer *tcp;
2087  svn_stream_t *s;
2088
2089  svn_pool_clear(btn->pool);
2090
2091  tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2092  tcp->lines_to_print = btn->context_size;
2093  tcp->fob = btn;
2094  s = svn_stream_empty(btn->pool);
2095  svn_stream_set_baton(s, tcp);
2096  svn_stream_set_write(s, trailing_context_printer_write);
2097  btn->output_stream = s;
2098}
2099
2100
2101
2102typedef enum svn_diff3__file_output_type_e
2103{
2104  svn_diff3__file_output_skip,
2105  svn_diff3__file_output_normal
2106} svn_diff3__file_output_type_e;
2107
2108
2109static svn_error_t *
2110output_line(svn_diff3__file_output_baton_t *baton,
2111            svn_diff3__file_output_type_e type, int idx)
2112{
2113  char *curp;
2114  char *endp;
2115  char *eol;
2116  apr_size_t len;
2117
2118  curp = baton->curp[idx];
2119  endp = baton->endp[idx];
2120
2121  /* Lazily update the current line even if we're at EOF.
2122   */
2123  baton->current_line[idx]++;
2124
2125  if (curp == endp)
2126    return SVN_NO_ERROR;
2127
2128  eol = svn_eol__find_eol_start(curp, endp - curp);
2129  if (!eol)
2130    eol = endp;
2131  else
2132    {
2133      svn_boolean_t had_cr = (*eol == '\r');
2134      eol++;
2135      if (had_cr && eol != endp && *eol == '\n')
2136        eol++;
2137    }
2138
2139  if (type != svn_diff3__file_output_skip)
2140    {
2141      len = eol - curp;
2142      /* Note that the trailing context printer assumes that
2143         svn_stream_write is called exactly once per line. */
2144      SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2145    }
2146
2147  baton->curp[idx] = eol;
2148
2149  return SVN_NO_ERROR;
2150}
2151
2152static svn_error_t *
2153output_marker_eol(svn_diff3__file_output_baton_t *btn)
2154{
2155  return svn_stream_puts(btn->output_stream, btn->marker_eol);
2156}
2157
2158static svn_error_t *
2159output_hunk(void *baton, int idx, apr_off_t target_line,
2160            apr_off_t target_length)
2161{
2162  svn_diff3__file_output_baton_t *output_baton = baton;
2163
2164  /* Skip lines until we are at the start of the changed range */
2165  while (output_baton->current_line[idx] < target_line)
2166    {
2167      SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2168    }
2169
2170  target_line += target_length;
2171
2172  while (output_baton->current_line[idx] < target_line)
2173    {
2174      SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2175    }
2176
2177  return SVN_NO_ERROR;
2178}
2179
2180static svn_error_t *
2181output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2182              apr_off_t modified_start, apr_off_t modified_length,
2183              apr_off_t latest_start, apr_off_t latest_length)
2184{
2185  return output_hunk(baton, 1, modified_start, modified_length);
2186}
2187
2188static svn_error_t *
2189output_diff_modified(void *baton,
2190                     apr_off_t original_start, apr_off_t original_length,
2191                     apr_off_t modified_start, apr_off_t modified_length,
2192                     apr_off_t latest_start, apr_off_t latest_length)
2193{
2194  return output_hunk(baton, 1, modified_start, modified_length);
2195}
2196
2197static svn_error_t *
2198output_diff_latest(void *baton,
2199                   apr_off_t original_start, apr_off_t original_length,
2200                   apr_off_t modified_start, apr_off_t modified_length,
2201                   apr_off_t latest_start, apr_off_t latest_length)
2202{
2203  return output_hunk(baton, 2, latest_start, latest_length);
2204}
2205
2206static svn_error_t *
2207output_conflict(void *baton,
2208                apr_off_t original_start, apr_off_t original_length,
2209                apr_off_t modified_start, apr_off_t modified_length,
2210                apr_off_t latest_start, apr_off_t latest_length,
2211                svn_diff_t *diff);
2212
2213static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2214{
2215  output_common,
2216  output_diff_modified,
2217  output_diff_latest,
2218  output_diff_modified, /* output_diff_common */
2219  output_conflict
2220};
2221
2222static svn_error_t *
2223output_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn,
2224                                    const char *label,
2225                                    apr_off_t start,
2226                                    apr_off_t length)
2227{
2228  if (length == 1)
2229    SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2230                              "%s (%" APR_OFF_T_FMT ")",
2231                              label, start + 1));
2232  else
2233    SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2234                              "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")",
2235                              label, start + 1, length));
2236
2237  SVN_ERR(output_marker_eol(btn));
2238
2239  return SVN_NO_ERROR;
2240}
2241
2242static svn_error_t *
2243output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2244                             apr_off_t original_start,
2245                             apr_off_t original_length,
2246                             apr_off_t modified_start,
2247                             apr_off_t modified_length,
2248                             apr_off_t latest_start,
2249                             apr_off_t latest_length)
2250{
2251  /* Are we currently saving starting context (as opposed to printing
2252     trailing context)?  If so, flush it. */
2253  if (btn->output_stream == btn->context_saver->stream)
2254    {
2255      if (btn->context_saver->total_written > btn->context_size)
2256        SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2257      SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2258    }
2259
2260  /* Print to the real output stream. */
2261  btn->output_stream = btn->real_output_stream;
2262
2263  /* Output the conflict itself. */
2264  SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified,
2265                                              modified_start, modified_length));
2266  SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2267
2268  SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original,
2269                                              original_start, original_length));
2270  SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2271
2272  SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2273                            "%s%s", btn->conflict_separator, btn->marker_eol));
2274  SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2275  SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest,
2276                                              latest_start, latest_length));
2277
2278  /* Go into print-trailing-context mode instead. */
2279  make_trailing_context_printer(btn);
2280
2281  return SVN_NO_ERROR;
2282}
2283
2284
2285static svn_error_t *
2286output_conflict(void *baton,
2287                apr_off_t original_start, apr_off_t original_length,
2288                apr_off_t modified_start, apr_off_t modified_length,
2289                apr_off_t latest_start, apr_off_t latest_length,
2290                svn_diff_t *diff)
2291{
2292  svn_diff3__file_output_baton_t *file_baton = baton;
2293
2294  svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2295
2296  if (style == svn_diff_conflict_display_only_conflicts)
2297    return output_conflict_with_context(file_baton,
2298                                        original_start, original_length,
2299                                        modified_start, modified_length,
2300                                        latest_start, latest_length);
2301
2302  if (style == svn_diff_conflict_display_resolved_modified_latest)
2303    {
2304      if (diff)
2305        return svn_diff_output2(diff, baton,
2306                                &svn_diff3__file_output_vtable,
2307                                file_baton->cancel_func,
2308                                file_baton->cancel_baton);
2309      else
2310        style = svn_diff_conflict_display_modified_latest;
2311    }
2312
2313  if (style == svn_diff_conflict_display_modified_latest ||
2314      style == svn_diff_conflict_display_modified_original_latest)
2315    {
2316      SVN_ERR(svn_stream_puts(file_baton->output_stream,
2317                               file_baton->conflict_modified));
2318      SVN_ERR(output_marker_eol(file_baton));
2319
2320      SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2321
2322      if (style == svn_diff_conflict_display_modified_original_latest)
2323        {
2324          SVN_ERR(svn_stream_puts(file_baton->output_stream,
2325                                   file_baton->conflict_original));
2326          SVN_ERR(output_marker_eol(file_baton));
2327          SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2328        }
2329
2330      SVN_ERR(svn_stream_puts(file_baton->output_stream,
2331                              file_baton->conflict_separator));
2332      SVN_ERR(output_marker_eol(file_baton));
2333
2334      SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2335
2336      SVN_ERR(svn_stream_puts(file_baton->output_stream,
2337                              file_baton->conflict_latest));
2338      SVN_ERR(output_marker_eol(file_baton));
2339    }
2340  else if (style == svn_diff_conflict_display_modified)
2341    SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2342  else if (style == svn_diff_conflict_display_latest)
2343    SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2344  else /* unknown style */
2345    SVN_ERR_MALFUNCTION();
2346
2347  return SVN_NO_ERROR;
2348}
2349
2350svn_error_t *
2351svn_diff_file_output_merge3(svn_stream_t *output_stream,
2352                            svn_diff_t *diff,
2353                            const char *original_path,
2354                            const char *modified_path,
2355                            const char *latest_path,
2356                            const char *conflict_original,
2357                            const char *conflict_modified,
2358                            const char *conflict_latest,
2359                            const char *conflict_separator,
2360                            svn_diff_conflict_display_style_t style,
2361                            svn_cancel_func_t cancel_func,
2362                            void *cancel_baton,
2363                            apr_pool_t *scratch_pool)
2364{
2365  svn_diff3__file_output_baton_t baton;
2366  apr_file_t *file[3];
2367  int idx;
2368#if APR_HAS_MMAP
2369  apr_mmap_t *mm[3] = { 0 };
2370#endif /* APR_HAS_MMAP */
2371  const char *eol;
2372  svn_boolean_t conflicts_only =
2373    (style == svn_diff_conflict_display_only_conflicts);
2374
2375  memset(&baton, 0, sizeof(baton));
2376  baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2377  if (conflicts_only)
2378    {
2379      baton.pool = svn_pool_create(scratch_pool);
2380      make_context_saver(&baton);
2381      baton.real_output_stream = output_stream;
2382    }
2383  else
2384    baton.output_stream = output_stream;
2385  baton.path[0] = original_path;
2386  baton.path[1] = modified_path;
2387  baton.path[2] = latest_path;
2388  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2389                                    conflict_modified ? conflict_modified
2390                                    : apr_psprintf(scratch_pool, "<<<<<<< %s",
2391                                                   modified_path),
2392                                    scratch_pool));
2393  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2394                                    conflict_original ? conflict_original
2395                                    : apr_psprintf(scratch_pool, "||||||| %s",
2396                                                   original_path),
2397                                    scratch_pool));
2398  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2399                                    conflict_separator ? conflict_separator
2400                                    : "=======", scratch_pool));
2401  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2402                                    conflict_latest ? conflict_latest
2403                                    : apr_psprintf(scratch_pool, ">>>>>>> %s",
2404                                                   latest_path),
2405                                    scratch_pool));
2406
2407  baton.conflict_style = style;
2408
2409  for (idx = 0; idx < 3; idx++)
2410    {
2411      apr_size_t size;
2412
2413      SVN_ERR(map_or_read_file(&file[idx],
2414                               MMAP_T_ARG(mm[idx])
2415                               &baton.buffer[idx], &size,
2416                               baton.path[idx], scratch_pool));
2417
2418      baton.curp[idx] = baton.buffer[idx];
2419      baton.endp[idx] = baton.buffer[idx];
2420
2421      if (baton.endp[idx])
2422        baton.endp[idx] += size;
2423    }
2424
2425  /* Check what eol marker we should use for conflict markers.
2426     We use the eol marker of the modified file and fall back on the
2427     platform's eol marker if that file doesn't contain any newlines. */
2428  eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2429                            NULL);
2430  if (! eol)
2431    eol = APR_EOL_STR;
2432  baton.marker_eol = eol;
2433
2434  baton.cancel_func = cancel_func;
2435  baton.cancel_baton = cancel_baton;
2436
2437  SVN_ERR(svn_diff_output2(diff, &baton,
2438                          &svn_diff3__file_output_vtable,
2439                          cancel_func, cancel_baton));
2440
2441  for (idx = 0; idx < 3; idx++)
2442    {
2443#if APR_HAS_MMAP
2444      if (mm[idx])
2445        {
2446          apr_status_t rv = apr_mmap_delete(mm[idx]);
2447          if (rv != APR_SUCCESS)
2448            {
2449              return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2450                                        baton.path[idx]);
2451            }
2452        }
2453#endif /* APR_HAS_MMAP */
2454
2455      if (file[idx])
2456        {
2457          SVN_ERR(svn_io_file_close(file[idx], scratch_pool));
2458        }
2459    }
2460
2461  if (conflicts_only)
2462    svn_pool_destroy(baton.pool);
2463
2464  return SVN_NO_ERROR;
2465}
2466
2467