diff_file.c revision 362181
1/*
2 * diff_file.c :  routines for doing diffs on files
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <apr.h>
26#include <apr_pools.h>
27#include <apr_general.h>
28#include <apr_file_io.h>
29#include <apr_file_info.h>
30#include <apr_time.h>
31#include <apr_mmap.h>
32#include <apr_getopt.h>
33
34#include <assert.h>
35
36#include "svn_error.h"
37#include "svn_diff.h"
38#include "svn_types.h"
39#include "svn_string.h"
40#include "svn_subst.h"
41#include "svn_io.h"
42#include "svn_utf.h"
43#include "svn_pools.h"
44#include "diff.h"
45#include "svn_private_config.h"
46#include "svn_path.h"
47#include "svn_ctype.h"
48
49#include "private/svn_utf_private.h"
50#include "private/svn_eol_private.h"
51#include "private/svn_dep_compat.h"
52#include "private/svn_adler32.h"
53#include "private/svn_diff_private.h"
54
55/* A token, i.e. a line read from a file. */
56typedef struct svn_diff__file_token_t
57{
58  /* Next token in free list. */
59  struct svn_diff__file_token_t *next;
60  svn_diff_datasource_e datasource;
61  /* Offset in the datasource. */
62  apr_off_t offset;
63  /* Offset of the normalized token (may skip leading whitespace) */
64  apr_off_t norm_offset;
65  /* Total length - before normalization. */
66  apr_off_t raw_length;
67  /* Total length - after normalization. */
68  apr_off_t length;
69} svn_diff__file_token_t;
70
71
72typedef struct svn_diff__file_baton_t
73{
74  const svn_diff_file_options_t *options;
75
76  struct file_info {
77    const char *path;  /* path to this file, absolute or relative to CWD */
78
79    /* All the following fields are active while this datasource is open */
80    apr_file_t *file;  /* handle of this file */
81    apr_off_t size;    /* total raw size in bytes of this file */
82
83    /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
84    int chunk;     /* the current chunk number, zero-based */
85    char *buffer;  /* a buffer containing the current chunk */
86    char *curp;    /* current position in the current chunk */
87    char *endp;    /* next memory address after the current chunk */
88
89    svn_diff__normalize_state_t normalize_state;
90
91    /* Where the identical suffix starts in this datasource */
92    int suffix_start_chunk;
93    apr_off_t suffix_offset_in_chunk;
94  } files[4];
95
96  /* List of free tokens that may be reused. */
97  svn_diff__file_token_t *tokens;
98
99  apr_pool_t *pool;
100} svn_diff__file_baton_t;
101
102static int
103datasource_to_index(svn_diff_datasource_e datasource)
104{
105  switch (datasource)
106    {
107    case svn_diff_datasource_original:
108      return 0;
109
110    case svn_diff_datasource_modified:
111      return 1;
112
113    case svn_diff_datasource_latest:
114      return 2;
115
116    case svn_diff_datasource_ancestor:
117      return 3;
118    }
119
120  return -1;
121}
122
123/* Files are read in chunks of 128k.  There is no support for this number
124 * whatsoever.  If there is a number someone comes up with that has some
125 * argumentation, let's use that.
126 */
127/* If you change this number, update test_norm_offset(),
128 * test_identical_suffix() and and test_token_compare()  in diff-diff3-test.c.
129 */
130#define CHUNK_SHIFT 17
131#define CHUNK_SIZE (1 << CHUNK_SHIFT)
132
133#define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
134#define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
135#define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
136
137
138/* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
139 * *LENGTH.  The actual bytes read are stored in *LENGTH on return.
140 */
141static APR_INLINE svn_error_t *
142read_chunk(apr_file_t *file,
143           char *buffer, apr_off_t length,
144           apr_off_t offset, apr_pool_t *scratch_pool)
145{
146  /* XXX: The final offset may not be the one we asked for.
147   * XXX: Check.
148   */
149  SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool));
150  return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
151                                NULL, NULL, scratch_pool);
152}
153
154
155/* Map or read a file at PATH. *BUFFER will point to the file
156 * contents; if the file was mapped, *FILE and *MM will contain the
157 * mmap context; otherwise they will be NULL.  SIZE will contain the
158 * file size.  Allocate from POOL.
159 */
160#if APR_HAS_MMAP
161#define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
162#define MMAP_T_ARG(NAME)   &(NAME),
163#else
164#define MMAP_T_PARAM(NAME)
165#define MMAP_T_ARG(NAME)
166#endif
167
168static svn_error_t *
169map_or_read_file(apr_file_t **file,
170                 MMAP_T_PARAM(mm)
171                 char **buffer, apr_size_t *size_p,
172                 const char *path, apr_pool_t *pool)
173{
174  apr_finfo_t finfo;
175  apr_status_t rv;
176  apr_size_t size;
177
178  *buffer = NULL;
179
180  SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
181  SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
182
183  if (finfo.size > APR_SIZE_MAX)
184    {
185      return svn_error_createf(APR_ENOMEM, NULL,
186                               _("File '%s' is too large to be read in "
187                                 "to memory"), path);
188    }
189
190  size = (apr_size_t) finfo.size;
191#if APR_HAS_MMAP
192  if (size > APR_MMAP_THRESHOLD)
193    {
194      rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
195      if (rv == APR_SUCCESS)
196        {
197          *buffer = (*mm)->mm;
198        }
199      else
200        {
201          /* Clear *MM because output parameters are undefined on error. */
202          *mm = NULL;
203        }
204
205      /* On failure we just fall through and try reading the file into
206       * memory instead.
207       */
208    }
209#endif /* APR_HAS_MMAP */
210
211   if (*buffer == NULL && size > 0)
212    {
213      *buffer = apr_palloc(pool, size);
214
215      SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
216
217      /* Since we have the entire contents of the file we can
218       * close it now.
219       */
220      SVN_ERR(svn_io_file_close(*file, pool));
221
222      *file = NULL;
223    }
224
225  *size_p = size;
226
227  return SVN_NO_ERROR;
228}
229
230
231/* For all files in the FILE array, increment the curp pointer.  If a file
232 * points before the beginning of file, let it point at the first byte again.
233 * If the end of the current chunk is reached, read the next chunk in the
234 * buffer and point curp to the start of the chunk.  If EOF is reached, set
235 * curp equal to endp to indicate EOF. */
236#define INCREMENT_POINTERS(all_files, files_len, pool)                       \
237  do {                                                                       \
238    apr_size_t svn_macro__i;                                                 \
239                                                                             \
240    for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
241    {                                                                        \
242      if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
243        (all_files)[svn_macro__i].curp++;                                    \
244      else                                                                   \
245        SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool)));        \
246    }                                                                        \
247  } while (0)
248
249
250/* For all files in the FILE array, decrement the curp pointer.  If the
251 * start of a chunk is reached, read the previous chunk in the buffer and
252 * point curp to the last byte of the chunk.  If the beginning of a FILE is
253 * reached, set chunk to -1 to indicate BOF. */
254#define DECREMENT_POINTERS(all_files, files_len, pool)                       \
255  do {                                                                       \
256    apr_size_t svn_macro__i;                                                 \
257                                                                             \
258    for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
259    {                                                                        \
260      if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
261        (all_files)[svn_macro__i].curp--;                                    \
262      else                                                                   \
263        SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool)));        \
264    }                                                                        \
265  } while (0)
266
267
268static svn_error_t *
269increment_chunk(struct file_info *file, apr_pool_t *pool)
270{
271  apr_off_t length;
272  apr_off_t last_chunk = offset_to_chunk(file->size);
273
274  if (file->chunk == -1)
275    {
276      /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
277      file->chunk = 0;
278      file->curp = file->buffer;
279    }
280  else if (file->chunk == last_chunk)
281    {
282      /* We are at the last chunk. Indicate EOF by setting curp == endp. */
283      file->curp = file->endp;
284    }
285  else
286    {
287      /* There are still chunks left. Read next chunk and reset pointers. */
288      file->chunk++;
289      length = file->chunk == last_chunk ?
290        offset_in_chunk(file->size) : CHUNK_SIZE;
291      SVN_ERR(read_chunk(file->file, file->buffer,
292                         length, chunk_to_offset(file->chunk),
293                         pool));
294      file->endp = file->buffer + length;
295      file->curp = file->buffer;
296    }
297
298  return SVN_NO_ERROR;
299}
300
301
302static svn_error_t *
303decrement_chunk(struct file_info *file, apr_pool_t *pool)
304{
305  if (file->chunk == 0)
306    {
307      /* We are already at the first chunk. Indicate BOF (Beginning Of File)
308         by setting chunk = -1 and curp = endp - 1. Both conditions are
309         important. They help the increment step to catch the BOF situation
310         in an efficient way. */
311      file->chunk--;
312      file->curp = file->endp - 1;
313    }
314  else
315    {
316      /* Read previous chunk and reset pointers. */
317      file->chunk--;
318      SVN_ERR(read_chunk(file->file, file->buffer,
319                         CHUNK_SIZE, chunk_to_offset(file->chunk),
320                         pool));
321      file->endp = file->buffer + CHUNK_SIZE;
322      file->curp = file->endp - 1;
323    }
324
325  return SVN_NO_ERROR;
326}
327
328
329/* Check whether one of the FILEs has its pointers 'before' the beginning of
330 * the file (this can happen while scanning backwards). This is the case if
331 * one of them has chunk == -1. */
332static svn_boolean_t
333is_one_at_bof(struct file_info file[], apr_size_t file_len)
334{
335  apr_size_t i;
336
337  for (i = 0; i < file_len; i++)
338    if (file[i].chunk == -1)
339      return TRUE;
340
341  return FALSE;
342}
343
344/* Check whether one of the FILEs has its pointers at EOF (this is the case if
345 * one of them has curp == endp (this can only happen at the last chunk)) */
346static svn_boolean_t
347is_one_at_eof(struct file_info file[], apr_size_t file_len)
348{
349  apr_size_t i;
350
351  for (i = 0; i < file_len; i++)
352    if (file[i].curp == file[i].endp)
353      return TRUE;
354
355  return FALSE;
356}
357
358/* Quickly determine whether there is a eol char in CHUNK.
359 * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
360 */
361
362#if SVN_UNALIGNED_ACCESS_IS_OK
363static svn_boolean_t contains_eol(apr_uintptr_t chunk)
364{
365  apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
366  apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
367
368  r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
369  n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
370
371  return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
372}
373#endif
374
375/* Find the prefix which is identical between all elements of the FILE array.
376 * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
377 * set to TRUE if one of the FILEs reached its end while scanning prefix,
378 * i.e. at least one file consisted entirely of prefix.  Otherwise,
379 * REACHED_ONE_EOF is set to FALSE.
380 *
381 * After this function is finished, the buffers, chunks, curp's and endp's
382 * of the FILEs are set to point at the first byte after the prefix. */
383static svn_error_t *
384find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
385                      struct file_info file[], apr_size_t file_len,
386                      apr_pool_t *pool)
387{
388  svn_boolean_t had_cr = FALSE;
389  svn_boolean_t is_match;
390  apr_off_t lines = 0;
391  apr_size_t i;
392
393  *reached_one_eof = FALSE;
394
395  for (i = 1, is_match = TRUE; i < file_len; i++)
396    is_match = is_match && *file[0].curp == *file[i].curp;
397  while (is_match)
398    {
399#if SVN_UNALIGNED_ACCESS_IS_OK
400      apr_ssize_t max_delta, delta;
401#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
402
403      /* ### TODO: see if we can take advantage of
404         diff options like ignore_eol_style or ignore_space. */
405      /* check for eol, and count */
406      if (*file[0].curp == '\r')
407        {
408          lines++;
409          had_cr = TRUE;
410        }
411      else if (*file[0].curp == '\n' && !had_cr)
412        {
413          lines++;
414        }
415      else
416        {
417          had_cr = FALSE;
418        }
419
420      INCREMENT_POINTERS(file, file_len, pool);
421
422#if SVN_UNALIGNED_ACCESS_IS_OK
423
424      /* Try to advance as far as possible with machine-word granularity.
425       * Determine how far we may advance with chunky ops without reaching
426       * endp for any of the files.
427       * Signedness is important here if curp gets close to endp.
428       */
429      max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
430      for (i = 1; i < file_len; i++)
431        {
432          delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
433          if (delta < max_delta)
434            max_delta = delta;
435        }
436
437      is_match = TRUE;
438      for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
439        {
440          apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
441          if (contains_eol(chunk))
442            break;
443
444          for (i = 1; i < file_len; i++)
445            if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
446              {
447                is_match = FALSE;
448                break;
449              }
450
451          if (! is_match)
452            break;
453        }
454
455      if (delta /* > 0*/)
456        {
457          /* We either found a mismatch or an EOL at or shortly behind curp+delta
458           * or we cannot proceed with chunky ops without exceeding endp.
459           * In any way, everything up to curp + delta is equal and not an EOL.
460           */
461          for (i = 0; i < file_len; i++)
462            file[i].curp += delta;
463
464          /* Skipped data without EOL markers, so last char was not a CR. */
465          had_cr = FALSE;
466        }
467#endif
468
469      *reached_one_eof = is_one_at_eof(file, file_len);
470      if (*reached_one_eof)
471        break;
472      else
473        for (i = 1, is_match = TRUE; i < file_len; i++)
474          is_match = is_match && *file[0].curp == *file[i].curp;
475    }
476
477  if (had_cr)
478    {
479      /* Check if we ended in the middle of a \r\n for one file, but \r for
480         another. If so, back up one byte, so the next loop will back up
481         the entire line. Also decrement lines, since we counted one
482         too many for the \r. */
483      svn_boolean_t ended_at_nonmatching_newline = FALSE;
484      for (i = 0; i < file_len; i++)
485        if (file[i].curp < file[i].endp)
486          ended_at_nonmatching_newline = ended_at_nonmatching_newline
487                                         || *file[i].curp == '\n';
488      if (ended_at_nonmatching_newline)
489        {
490          lines--;
491          DECREMENT_POINTERS(file, file_len, pool);
492        }
493    }
494
495  /* Back up one byte, so we point at the last identical byte */
496  DECREMENT_POINTERS(file, file_len, pool);
497
498  /* Back up to the last eol sequence (\n, \r\n or \r) */
499  while (!is_one_at_bof(file, file_len) &&
500         *file[0].curp != '\n' && *file[0].curp != '\r')
501    DECREMENT_POINTERS(file, file_len, pool);
502
503  /* Slide one byte forward, to point past the eol sequence */
504  INCREMENT_POINTERS(file, file_len, pool);
505
506  *prefix_lines = lines;
507
508  return SVN_NO_ERROR;
509}
510
511
512/* The number of identical suffix lines to keep with the middle section. These
513 * lines are not eliminated as suffix, and can be picked up by the token
514 * parsing and lcs steps. This is mainly for backward compatibility with
515 * the previous diff (and blame) output (if there are multiple diff solutions,
516 * our lcs algorithm prefers taking common lines from the start, rather than
517 * from the end. By giving it back some suffix lines, we give it some wiggle
518 * room to find the exact same diff as before).
519 *
520 * The number 50 is more or less arbitrary, based on some real-world tests
521 * with big files (and then doubling the required number to be on the safe
522 * side). This has a negligible effect on the power of the optimization. */
523/* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
524#ifndef SUFFIX_LINES_TO_KEEP
525#define SUFFIX_LINES_TO_KEEP 50
526#endif
527
528/* Find the suffix which is identical between all elements of the FILE array.
529 * Return the number of suffix lines in SUFFIX_LINES.
530 *
531 * Before this function is called the FILEs' pointers and chunks should be
532 * positioned right after the identical prefix (which is the case after
533 * find_identical_prefix), so we can determine where suffix scanning should
534 * ultimately stop. */
535static svn_error_t *
536find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
537                      apr_size_t file_len, apr_pool_t *pool)
538{
539  struct file_info file_for_suffix[4] = { { 0 }  };
540  apr_off_t length[4];
541  apr_off_t suffix_min_chunk0;
542  apr_off_t suffix_min_offset0;
543  apr_off_t min_file_size;
544  int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
545  svn_boolean_t is_match;
546  apr_off_t lines = 0;
547  svn_boolean_t had_nl;
548  apr_size_t i;
549
550  /* Initialize file_for_suffix[].
551     Read last chunk, position curp at last byte. */
552  for (i = 0; i < file_len; i++)
553    {
554      file_for_suffix[i].path = file[i].path;
555      file_for_suffix[i].file = file[i].file;
556      file_for_suffix[i].size = file[i].size;
557      file_for_suffix[i].chunk =
558        (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
559      length[i] = offset_in_chunk(file_for_suffix[i].size);
560      if (length[i] == 0)
561        {
562          /* last chunk is an empty chunk -> start at next-to-last chunk */
563          file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
564          length[i] = CHUNK_SIZE;
565        }
566
567      if (file_for_suffix[i].chunk == file[i].chunk)
568        {
569          /* Prefix ended in last chunk, so we can reuse the prefix buffer */
570          file_for_suffix[i].buffer = file[i].buffer;
571        }
572      else
573        {
574          /* There is at least more than 1 chunk,
575             so allocate full chunk size buffer */
576          file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
577          SVN_ERR(read_chunk(file_for_suffix[i].file,
578                             file_for_suffix[i].buffer, length[i],
579                             chunk_to_offset(file_for_suffix[i].chunk),
580                             pool));
581        }
582      file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
583      file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
584    }
585
586  /* Get the chunk and pointer offset (for file[0]) at which we should stop
587     scanning backward for the identical suffix, i.e. when we reach prefix. */
588  suffix_min_chunk0 = file[0].chunk;
589  suffix_min_offset0 = file[0].curp - file[0].buffer;
590
591  /* Compensate if other files are smaller than file[0] */
592  for (i = 1, min_file_size = file[0].size; i < file_len; i++)
593    if (file[i].size < min_file_size)
594      min_file_size = file[i].size;
595  if (file[0].size > min_file_size)
596    {
597      suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
598      suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
599    }
600
601  /* Scan backwards until mismatch or until we reach the prefix. */
602  for (i = 1, is_match = TRUE; i < file_len; i++)
603    is_match = is_match
604               && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
605  if (is_match && *file_for_suffix[0].curp != '\r'
606               && *file_for_suffix[0].curp != '\n')
607    /* Count an extra line for the last line not ending in an eol. */
608    lines++;
609
610  had_nl = FALSE;
611  while (is_match)
612    {
613      svn_boolean_t reached_prefix;
614#if SVN_UNALIGNED_ACCESS_IS_OK
615      /* Initialize the minimum pointer positions. */
616      const char *min_curp[4];
617      svn_boolean_t can_read_word;
618#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
619
620      /* ### TODO: see if we can take advantage of
621         diff options like ignore_eol_style or ignore_space. */
622      /* check for eol, and count */
623      if (*file_for_suffix[0].curp == '\n')
624        {
625          lines++;
626          had_nl = TRUE;
627        }
628      else if (*file_for_suffix[0].curp == '\r' && !had_nl)
629        {
630          lines++;
631        }
632      else
633        {
634          had_nl = FALSE;
635        }
636
637      DECREMENT_POINTERS(file_for_suffix, file_len, pool);
638
639#if SVN_UNALIGNED_ACCESS_IS_OK
640      for (i = 0; i < file_len; i++)
641        min_curp[i] = file_for_suffix[i].buffer;
642
643      /* If we are in the same chunk that contains the last part of the common
644         prefix, use the min_curp[0] pointer to make sure we don't get a
645         suffix that overlaps the already determined common prefix. */
646      if (file_for_suffix[0].chunk == suffix_min_chunk0)
647        min_curp[0] += suffix_min_offset0;
648
649      /* Scan quickly by reading with machine-word granularity. */
650      for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
651        can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
652                         > min_curp[i]);
653
654      while (can_read_word)
655        {
656          apr_uintptr_t chunk;
657
658          /* For each file curp is positioned at the current byte, but we
659             want to examine the current byte and the ones before the current
660             location as one machine word. */
661
662          chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
663                                             - sizeof(apr_uintptr_t));
664          if (contains_eol(chunk))
665            break;
666
667          for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
668            is_match = (chunk
669                           == *(const apr_uintptr_t *)
670                                    (file_for_suffix[i].curp + 1
671                                       - sizeof(apr_uintptr_t)));
672
673          if (! is_match)
674            break;
675
676          for (i = 0; i < file_len; i++)
677            {
678              file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
679              can_read_word = can_read_word
680                              && (  (file_for_suffix[i].curp + 1
681                                       - sizeof(apr_uintptr_t))
682                                  > min_curp[i]);
683            }
684
685          /* We skipped some bytes, so there are no closing EOLs */
686          had_nl = FALSE;
687        }
688
689      /* The > min_curp[i] check leaves at least one final byte for checking
690         in the non block optimized case below. */
691#endif
692
693      reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
694                       && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
695                          == suffix_min_offset0;
696      if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
697        break;
698
699      is_match = TRUE;
700      for (i = 1; i < file_len; i++)
701        is_match = is_match
702                   && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
703    }
704
705  /* Slide one byte forward, to point at the first byte of identical suffix */
706  INCREMENT_POINTERS(file_for_suffix, file_len, pool);
707
708  /* Slide forward until we find an eol sequence to add the rest of the line
709     we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
710     one file reaches its end. */
711  do
712    {
713      svn_boolean_t had_cr = FALSE;
714      while (!is_one_at_eof(file_for_suffix, file_len)
715             && *file_for_suffix[0].curp != '\n'
716             && *file_for_suffix[0].curp != '\r')
717        INCREMENT_POINTERS(file_for_suffix, file_len, pool);
718
719      /* Slide one or two more bytes, to point past the eol. */
720      if (!is_one_at_eof(file_for_suffix, file_len)
721          && *file_for_suffix[0].curp == '\r')
722        {
723          lines--;
724          had_cr = TRUE;
725          INCREMENT_POINTERS(file_for_suffix, file_len, pool);
726        }
727      if (!is_one_at_eof(file_for_suffix, file_len)
728          && *file_for_suffix[0].curp == '\n')
729        {
730          if (!had_cr)
731            lines--;
732          INCREMENT_POINTERS(file_for_suffix, file_len, pool);
733        }
734    }
735  while (!is_one_at_eof(file_for_suffix, file_len)
736         && suffix_lines_to_keep--);
737
738  if (is_one_at_eof(file_for_suffix, file_len))
739    lines = 0;
740
741  /* Save the final suffix information in the original file_info */
742  for (i = 0; i < file_len; i++)
743    {
744      file[i].suffix_start_chunk = file_for_suffix[i].chunk;
745      file[i].suffix_offset_in_chunk =
746        file_for_suffix[i].curp - file_for_suffix[i].buffer;
747    }
748
749  *suffix_lines = lines;
750
751  return SVN_NO_ERROR;
752}
753
754
755/* Let FILE stand for the array of file_info struct elements of BATON->files
756 * that are indexed by the elements of the DATASOURCE array.
757 * BATON's type is (svn_diff__file_baton_t *).
758 *
759 * For each file in the FILE array, open the file at FILE.path; initialize
760 * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
761 * buffer and read the first chunk.  Then find the prefix and suffix lines
762 * which are identical between all the files.  Return the number of identical
763 * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
764 * SUFFIX_LINES.
765 *
766 * Finding the identical prefix and suffix allows us to exclude those from the
767 * rest of the diff algorithm, which increases performance by reducing the
768 * problem space.
769 *
770 * Implements svn_diff_fns2_t::datasources_open. */
771static svn_error_t *
772datasources_open(void *baton,
773                 apr_off_t *prefix_lines,
774                 apr_off_t *suffix_lines,
775                 const svn_diff_datasource_e *datasources,
776                 apr_size_t datasources_len)
777{
778  svn_diff__file_baton_t *file_baton = baton;
779  struct file_info files[4];
780  apr_off_t length[4];
781#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
782  svn_boolean_t reached_one_eof;
783#endif
784  apr_size_t i;
785
786  /* Make sure prefix_lines and suffix_lines are set correctly, even if we
787   * exit early because one of the files is empty. */
788  *prefix_lines = 0;
789  *suffix_lines = 0;
790
791  /* Open datasources and read first chunk */
792  for (i = 0; i < datasources_len; i++)
793    {
794      svn_filesize_t filesize;
795      struct file_info *file
796          = &file_baton->files[datasource_to_index(datasources[i])];
797      SVN_ERR(svn_io_file_open(&file->file, file->path,
798                               APR_READ, APR_OS_DEFAULT, file_baton->pool));
799      SVN_ERR(svn_io_file_size_get(&filesize, file->file, file_baton->pool));
800      file->size = filesize;
801      length[i] = filesize > CHUNK_SIZE ? CHUNK_SIZE : filesize;
802      file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
803      SVN_ERR(read_chunk(file->file, file->buffer,
804                         length[i], 0, file_baton->pool));
805      file->endp = file->buffer + length[i];
806      file->curp = file->buffer;
807      /* Set suffix_start_chunk to a guard value, so if suffix scanning is
808       * skipped because one of the files is empty, or because of
809       * reached_one_eof, we can still easily check for the suffix during
810       * token reading (datasource_get_next_token). */
811      file->suffix_start_chunk = -1;
812
813      files[i] = *file;
814    }
815
816  for (i = 0; i < datasources_len; i++)
817    if (length[i] == 0)
818      /* There will not be any identical prefix/suffix, so we're done. */
819      return SVN_NO_ERROR;
820
821#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
822
823  SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
824                                files, datasources_len, file_baton->pool));
825
826  if (!reached_one_eof)
827    /* No file consisted totally of identical prefix,
828     * so there may be some identical suffix.  */
829    SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
830                                  file_baton->pool));
831
832#endif
833
834  /* Copy local results back to baton. */
835  for (i = 0; i < datasources_len; i++)
836    file_baton->files[datasource_to_index(datasources[i])] = files[i];
837
838  return SVN_NO_ERROR;
839}
840
841
842/* Implements svn_diff_fns2_t::datasource_close */
843static svn_error_t *
844datasource_close(void *baton, svn_diff_datasource_e datasource)
845{
846  /* Do nothing.  The compare_token function needs previous datasources
847   * to stay available until all datasources are processed.
848   */
849
850  return SVN_NO_ERROR;
851}
852
853/* Implements svn_diff_fns2_t::datasource_get_next_token */
854static svn_error_t *
855datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
856                          svn_diff_datasource_e datasource)
857{
858  svn_diff__file_baton_t *file_baton = baton;
859  svn_diff__file_token_t *file_token;
860  struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
861  char *endp;
862  char *curp;
863  char *eol;
864  apr_off_t last_chunk;
865  apr_off_t length;
866  apr_uint32_t h = 0;
867  /* Did the last chunk end in a CR character? */
868  svn_boolean_t had_cr = FALSE;
869
870  *token = NULL;
871
872  curp = file->curp;
873  endp = file->endp;
874
875  last_chunk = offset_to_chunk(file->size);
876
877  /* Are we already at the end of a chunk? */
878  if (curp == endp)
879    {
880      /* Are we at EOF */
881      if (last_chunk == file->chunk)
882        return SVN_NO_ERROR; /* EOF */
883
884      /* Or right before an identical suffix in the next chunk? */
885      if (file->chunk + 1 == file->suffix_start_chunk
886          && file->suffix_offset_in_chunk == 0)
887        return SVN_NO_ERROR;
888    }
889
890  /* Stop when we encounter the identical suffix. If suffix scanning was not
891   * performed, suffix_start_chunk will be -1, so this condition will never
892   * be true. */
893  if (file->chunk == file->suffix_start_chunk
894      && (curp - file->buffer) == file->suffix_offset_in_chunk)
895    return SVN_NO_ERROR;
896
897  /* Allocate a new token, or fetch one from the "reusable tokens" list. */
898  file_token = file_baton->tokens;
899  if (file_token)
900    {
901      file_baton->tokens = file_token->next;
902    }
903  else
904    {
905      file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
906    }
907
908  file_token->datasource = datasource;
909  file_token->offset = chunk_to_offset(file->chunk)
910                       + (curp - file->buffer);
911  file_token->norm_offset = file_token->offset;
912  file_token->raw_length = 0;
913  file_token->length = 0;
914
915  while (1)
916    {
917      eol = svn_eol__find_eol_start(curp, endp - curp);
918      if (eol)
919        {
920          had_cr = (*eol == '\r');
921          eol++;
922          /* If we have the whole eol sequence in the chunk... */
923          if (!(had_cr && eol == endp))
924            {
925              /* Also skip past the '\n' in an '\r\n' sequence. */
926              if (had_cr && *eol == '\n')
927                eol++;
928              break;
929            }
930        }
931
932      if (file->chunk == last_chunk)
933        {
934          eol = endp;
935          break;
936        }
937
938      length = endp - curp;
939      file_token->raw_length += length;
940      {
941        char *c = curp;
942
943        svn_diff__normalize_buffer(&c, &length,
944                                   &file->normalize_state,
945                                   curp, file_baton->options);
946        if (file_token->length == 0)
947          {
948            /* When we are reading the first part of the token, move the
949               normalized offset past leading ignored characters, if any. */
950            file_token->norm_offset += (c - curp);
951          }
952        file_token->length += length;
953        h = svn__adler32(h, c, length);
954      }
955
956      curp = endp = file->buffer;
957      file->chunk++;
958      length = file->chunk == last_chunk ?
959        offset_in_chunk(file->size) : CHUNK_SIZE;
960      endp += length;
961      file->endp = endp;
962
963      /* Issue #4283: Normally we should have checked for reaching the skipped
964         suffix here, but because we assume that a suffix always starts on a
965         line and token boundary we rely on catching the suffix earlier in this
966         function.
967
968         When changing things here, make sure the whitespace settings are
969         applied, or we might not reach the exact suffix boundary as token
970         boundary. */
971      SVN_ERR(read_chunk(file->file,
972                         curp, length,
973                         chunk_to_offset(file->chunk),
974                         file_baton->pool));
975
976      /* If the last chunk ended in a CR, we're done. */
977      if (had_cr)
978        {
979          eol = curp;
980          if (*curp == '\n')
981            ++eol;
982          break;
983        }
984    }
985
986  length = eol - curp;
987  file_token->raw_length += length;
988  file->curp = eol;
989
990  /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
991   * with a spurious empty token.  Avoid returning it.
992   * Note that we use the unnormalized length; we don't want a line containing
993   * only spaces (and no trailing newline) to appear like a non-existent
994   * line. */
995  if (file_token->raw_length > 0)
996    {
997      char *c = curp;
998      svn_diff__normalize_buffer(&c, &length,
999                                 &file->normalize_state,
1000                                 curp, file_baton->options);
1001      if (file_token->length == 0)
1002        {
1003          /* When we are reading the first part of the token, move the
1004             normalized offset past leading ignored characters, if any. */
1005          file_token->norm_offset += (c - curp);
1006        }
1007
1008      file_token->length += length;
1009
1010      *hash = svn__adler32(h, c, length);
1011      *token = file_token;
1012    }
1013
1014  return SVN_NO_ERROR;
1015}
1016
1017#define COMPARE_CHUNK_SIZE 4096
1018
1019/* Implements svn_diff_fns2_t::token_compare */
1020static svn_error_t *
1021token_compare(void *baton, void *token1, void *token2, int *compare)
1022{
1023  svn_diff__file_baton_t *file_baton = baton;
1024  svn_diff__file_token_t *file_token[2];
1025  char buffer[2][COMPARE_CHUNK_SIZE];
1026  char *bufp[2];
1027  apr_off_t offset[2];
1028  struct file_info *file[2];
1029  apr_off_t length[2];
1030  apr_off_t total_length;
1031  /* How much is left to read of each token from the file. */
1032  apr_off_t raw_length[2];
1033  int i;
1034  svn_diff__normalize_state_t state[2];
1035
1036  file_token[0] = token1;
1037  file_token[1] = token2;
1038  if (file_token[0]->length < file_token[1]->length)
1039    {
1040      *compare = -1;
1041      return SVN_NO_ERROR;
1042    }
1043
1044  if (file_token[0]->length > file_token[1]->length)
1045    {
1046      *compare = 1;
1047      return SVN_NO_ERROR;
1048    }
1049
1050  total_length = file_token[0]->length;
1051  if (total_length == 0)
1052    {
1053      *compare = 0;
1054      return SVN_NO_ERROR;
1055    }
1056
1057  for (i = 0; i < 2; ++i)
1058    {
1059      int idx = datasource_to_index(file_token[i]->datasource);
1060
1061      file[i] = &file_baton->files[idx];
1062      offset[i] = file_token[i]->norm_offset;
1063      state[i] = svn_diff__normalize_state_normal;
1064
1065      if (offset_to_chunk(offset[i]) == file[i]->chunk)
1066        {
1067          /* If the start of the token is in memory, the entire token is
1068           * in memory.
1069           */
1070          bufp[i] = file[i]->buffer;
1071          bufp[i] += offset_in_chunk(offset[i]);
1072
1073          length[i] = total_length;
1074          raw_length[i] = 0;
1075        }
1076      else
1077        {
1078          apr_off_t skipped;
1079
1080          length[i] = 0;
1081
1082          /* When we skipped the first part of the token via the whitespace
1083             normalization we must reduce the raw length of the token */
1084          skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1085
1086          raw_length[i] = file_token[i]->raw_length - skipped;
1087        }
1088    }
1089
1090  do
1091    {
1092      apr_off_t len;
1093      for (i = 0; i < 2; i++)
1094        {
1095          if (length[i] == 0)
1096            {
1097              /* Error if raw_length is 0, that's an unexpected change
1098               * of the file that can happen when ingoring whitespace
1099               * and that can lead to an infinite loop. */
1100              if (raw_length[i] == 0)
1101                return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1102                                         NULL,
1103                                         _("The file '%s' changed unexpectedly"
1104                                           " during diff"),
1105                                         file[i]->path);
1106
1107              /* Read a chunk from disk into a buffer */
1108              bufp[i] = buffer[i];
1109              length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1110                COMPARE_CHUNK_SIZE : raw_length[i];
1111
1112              SVN_ERR(read_chunk(file[i]->file,
1113                                 bufp[i], length[i], offset[i],
1114                                 file_baton->pool));
1115              offset[i] += length[i];
1116              raw_length[i] -= length[i];
1117              /* bufp[i] gets reset to buffer[i] before reading each chunk,
1118                 so, overwriting it isn't a problem */
1119              svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1120                                         bufp[i], file_baton->options);
1121
1122              /* assert(length[i] == file_token[i]->length); */
1123            }
1124        }
1125
1126      len = length[0] > length[1] ? length[1] : length[0];
1127
1128      /* Compare two chunks (that could be entire tokens if they both reside
1129       * in memory).
1130       */
1131      *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1132      if (*compare != 0)
1133        return SVN_NO_ERROR;
1134
1135      total_length -= len;
1136      length[0] -= len;
1137      length[1] -= len;
1138      bufp[0] += len;
1139      bufp[1] += len;
1140    }
1141  while(total_length > 0);
1142
1143  *compare = 0;
1144  return SVN_NO_ERROR;
1145}
1146
1147
1148/* Implements svn_diff_fns2_t::token_discard */
1149static void
1150token_discard(void *baton, void *token)
1151{
1152  svn_diff__file_baton_t *file_baton = baton;
1153  svn_diff__file_token_t *file_token = token;
1154
1155  /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1156  file_token->next = file_baton->tokens;
1157  file_baton->tokens = file_token;
1158}
1159
1160
1161/* Implements svn_diff_fns2_t::token_discard_all */
1162static void
1163token_discard_all(void *baton)
1164{
1165  svn_diff__file_baton_t *file_baton = baton;
1166
1167  /* Discard all memory in use by the tokens, and close all open files. */
1168  svn_pool_clear(file_baton->pool);
1169}
1170
1171
1172static const svn_diff_fns2_t svn_diff__file_vtable =
1173{
1174  datasources_open,
1175  datasource_close,
1176  datasource_get_next_token,
1177  token_compare,
1178  token_discard,
1179  token_discard_all
1180};
1181
1182/* Id for the --ignore-eol-style option, which doesn't have a short name. */
1183#define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1184
1185/* Options supported by svn_diff_file_options_parse(). */
1186static const apr_getopt_option_t diff_options[] =
1187{
1188  { "ignore-space-change", 'b', 0, NULL },
1189  { "ignore-all-space", 'w', 0, NULL },
1190  { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1191  { "show-c-function", 'p', 0, NULL },
1192  /* ### For compatibility; we don't support the argument to -u, because
1193   * ### we don't have optional argument support. */
1194  { "unified", 'u', 0, NULL },
1195  { "context", 'U', 1, NULL },
1196  { NULL, 0, 0, NULL }
1197};
1198
1199svn_diff_file_options_t *
1200svn_diff_file_options_create(apr_pool_t *pool)
1201{
1202  svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts));
1203
1204  opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1205
1206  return opts;
1207}
1208
1209/* A baton for use with opt_parsing_error_func(). */
1210struct opt_parsing_error_baton_t
1211{
1212  svn_error_t *err;
1213  apr_pool_t *pool;
1214};
1215
1216/* Store an error message from apr_getopt_long().  Set BATON->err to a new
1217 * error with a message generated from FMT and the remaining arguments.
1218 * Implements apr_getopt_err_fn_t. */
1219static void
1220opt_parsing_error_func(void *baton,
1221                       const char *fmt, ...)
1222{
1223  struct opt_parsing_error_baton_t *b = baton;
1224  const char *message;
1225  va_list ap;
1226
1227  va_start(ap, fmt);
1228  message = apr_pvsprintf(b->pool, fmt, ap);
1229  va_end(ap);
1230
1231  /* Skip leading ": " (if present, which it always is in known cases). */
1232  if (strncmp(message, ": ", 2) == 0)
1233    message += 2;
1234
1235  b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1236}
1237
1238svn_error_t *
1239svn_diff_file_options_parse(svn_diff_file_options_t *options,
1240                            const apr_array_header_t *args,
1241                            apr_pool_t *pool)
1242{
1243  apr_getopt_t *os;
1244  struct opt_parsing_error_baton_t opt_parsing_error_baton;
1245  apr_array_header_t *argv;
1246
1247  opt_parsing_error_baton.err = NULL;
1248  opt_parsing_error_baton.pool = pool;
1249
1250  /* Make room for each option (starting at index 1) plus trailing NULL. */
1251  argv = apr_array_make(pool, args->nelts + 2, sizeof(char*));
1252  APR_ARRAY_PUSH(argv, const char *) = "";
1253  apr_array_cat(argv, args);
1254  APR_ARRAY_PUSH(argv, const char *) = NULL;
1255
1256  apr_getopt_init(&os, pool,
1257                  argv->nelts - 1 /* Exclude trailing NULL */,
1258                  (const char *const *) argv->elts);
1259
1260  /* Capture any error message from apr_getopt_long().  This will typically
1261   * say which option is wrong, which we would not otherwise know. */
1262  os->errfn = opt_parsing_error_func;
1263  os->errarg = &opt_parsing_error_baton;
1264
1265  while (1)
1266    {
1267      const char *opt_arg;
1268      int opt_id;
1269      apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1270
1271      if (APR_STATUS_IS_EOF(err))
1272        break;
1273      if (err)
1274        /* Wrap apr_getopt_long()'s error message.  Its doc string implies
1275         * it always will produce one, but never mind if it doesn't.  Avoid
1276         * using the message associated with the return code ERR, because
1277         * it refers to the "command line" which may be misleading here. */
1278        return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1279                                opt_parsing_error_baton.err,
1280                                _("Error in options to internal diff"));
1281
1282      switch (opt_id)
1283        {
1284        case 'b':
1285          /* -w takes precedence over -b. */
1286          if (! options->ignore_space)
1287            options->ignore_space = svn_diff_file_ignore_space_change;
1288          break;
1289        case 'w':
1290          options->ignore_space = svn_diff_file_ignore_space_all;
1291          break;
1292        case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1293          options->ignore_eol_style = TRUE;
1294          break;
1295        case 'p':
1296          options->show_c_function = TRUE;
1297          break;
1298        case 'U':
1299          SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg));
1300          break;
1301        default:
1302          break;
1303        }
1304    }
1305
1306  /* Check for spurious arguments. */
1307  if (os->ind < os->argc)
1308    return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1309                             _("Invalid argument '%s' in diff options"),
1310                             os->argv[os->ind]);
1311
1312  return SVN_NO_ERROR;
1313}
1314
1315svn_error_t *
1316svn_diff_file_diff_2(svn_diff_t **diff,
1317                     const char *original,
1318                     const char *modified,
1319                     const svn_diff_file_options_t *options,
1320                     apr_pool_t *pool)
1321{
1322  svn_diff__file_baton_t baton = { 0 };
1323
1324  baton.options = options;
1325  baton.files[0].path = original;
1326  baton.files[1].path = modified;
1327  baton.pool = svn_pool_create(pool);
1328
1329  SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1330
1331  svn_pool_destroy(baton.pool);
1332  return SVN_NO_ERROR;
1333}
1334
1335svn_error_t *
1336svn_diff_file_diff3_2(svn_diff_t **diff,
1337                      const char *original,
1338                      const char *modified,
1339                      const char *latest,
1340                      const svn_diff_file_options_t *options,
1341                      apr_pool_t *pool)
1342{
1343  svn_diff__file_baton_t baton = { 0 };
1344
1345  baton.options = options;
1346  baton.files[0].path = original;
1347  baton.files[1].path = modified;
1348  baton.files[2].path = latest;
1349  baton.pool = svn_pool_create(pool);
1350
1351  SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1352
1353  svn_pool_destroy(baton.pool);
1354  return SVN_NO_ERROR;
1355}
1356
1357svn_error_t *
1358svn_diff_file_diff4_2(svn_diff_t **diff,
1359                      const char *original,
1360                      const char *modified,
1361                      const char *latest,
1362                      const char *ancestor,
1363                      const svn_diff_file_options_t *options,
1364                      apr_pool_t *pool)
1365{
1366  svn_diff__file_baton_t baton = { 0 };
1367
1368  baton.options = options;
1369  baton.files[0].path = original;
1370  baton.files[1].path = modified;
1371  baton.files[2].path = latest;
1372  baton.files[3].path = ancestor;
1373  baton.pool = svn_pool_create(pool);
1374
1375  SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1376
1377  svn_pool_destroy(baton.pool);
1378  return SVN_NO_ERROR;
1379}
1380
1381
1382/** Display unified context diffs **/
1383
1384/* Maximum length of the extra context to show when show_c_function is set.
1385 * GNU diff uses 40, let's be brave and use 50 instead. */
1386#define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1387typedef struct svn_diff__file_output_baton_t
1388{
1389  svn_stream_t *output_stream;
1390  const char *header_encoding;
1391
1392  /* Cached markers, in header_encoding. */
1393  const char *context_str;
1394  const char *delete_str;
1395  const char *insert_str;
1396
1397  const char *path[2];
1398  apr_file_t *file[2];
1399
1400  apr_off_t   current_line[2];
1401
1402  char        buffer[2][4096];
1403  apr_size_t  length[2];
1404  char       *curp[2];
1405
1406  apr_off_t   hunk_start[2];
1407  apr_off_t   hunk_length[2];
1408  svn_stringbuf_t *hunk;
1409
1410  /* Should we emit C functions in the unified diff header */
1411  svn_boolean_t show_c_function;
1412  /* Extra strings to skip over if we match. */
1413  apr_array_header_t *extra_skip_match;
1414  /* "Context" to append to the @@ line when the show_c_function option
1415   * is set. */
1416  svn_stringbuf_t *extra_context;
1417  /* Extra context for the current hunk. */
1418  char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1419
1420  int context_size;
1421
1422  /* Cancel handler */
1423  svn_cancel_func_t cancel_func;
1424  void *cancel_baton;
1425
1426  apr_pool_t *pool;
1427} svn_diff__file_output_baton_t;
1428
1429typedef enum svn_diff__file_output_unified_type_e
1430{
1431  svn_diff__file_output_unified_skip,
1432  svn_diff__file_output_unified_context,
1433  svn_diff__file_output_unified_delete,
1434  svn_diff__file_output_unified_insert
1435} svn_diff__file_output_unified_type_e;
1436
1437
1438static svn_error_t *
1439output_unified_line(svn_diff__file_output_baton_t *baton,
1440                    svn_diff__file_output_unified_type_e type, int idx)
1441{
1442  char *curp;
1443  char *eol;
1444  apr_size_t length;
1445  svn_error_t *err;
1446  svn_boolean_t bytes_processed = FALSE;
1447  svn_boolean_t had_cr = FALSE;
1448  /* Are we collecting extra context? */
1449  svn_boolean_t collect_extra = FALSE;
1450
1451  length = baton->length[idx];
1452  curp = baton->curp[idx];
1453
1454  /* Lazily update the current line even if we're at EOF.
1455   * This way we fake output of context at EOF
1456   */
1457  baton->current_line[idx]++;
1458
1459  if (length == 0 && apr_file_eof(baton->file[idx]))
1460    {
1461      return SVN_NO_ERROR;
1462    }
1463
1464  do
1465    {
1466      if (length > 0)
1467        {
1468          if (!bytes_processed)
1469            {
1470              switch (type)
1471                {
1472                case svn_diff__file_output_unified_context:
1473                  svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1474                  baton->hunk_length[0]++;
1475                  baton->hunk_length[1]++;
1476                  break;
1477                case svn_diff__file_output_unified_delete:
1478                  svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1479                  baton->hunk_length[0]++;
1480                  break;
1481                case svn_diff__file_output_unified_insert:
1482                  svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1483                  baton->hunk_length[1]++;
1484                  break;
1485                default:
1486                  break;
1487                }
1488
1489              if (baton->show_c_function
1490                  && (type == svn_diff__file_output_unified_skip
1491                      || type == svn_diff__file_output_unified_context)
1492                  && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1493                  && !svn_cstring_match_glob_list(curp,
1494                                                  baton->extra_skip_match))
1495                {
1496                  svn_stringbuf_setempty(baton->extra_context);
1497                  collect_extra = TRUE;
1498                }
1499            }
1500
1501          eol = svn_eol__find_eol_start(curp, length);
1502
1503          if (eol != NULL)
1504            {
1505              apr_size_t len;
1506
1507              had_cr = (*eol == '\r');
1508              eol++;
1509              len = (apr_size_t)(eol - curp);
1510
1511              if (! had_cr || len < length)
1512                {
1513                  if (had_cr && *eol == '\n')
1514                    {
1515                      ++eol;
1516                      ++len;
1517                    }
1518
1519                  length -= len;
1520
1521                  if (type != svn_diff__file_output_unified_skip)
1522                    {
1523                      svn_stringbuf_appendbytes(baton->hunk, curp, len);
1524                    }
1525                  if (collect_extra)
1526                    {
1527                      svn_stringbuf_appendbytes(baton->extra_context,
1528                                                curp, len);
1529                    }
1530
1531                  baton->curp[idx] = eol;
1532                  baton->length[idx] = length;
1533
1534                  err = SVN_NO_ERROR;
1535
1536                  break;
1537                }
1538            }
1539
1540          if (type != svn_diff__file_output_unified_skip)
1541            {
1542              svn_stringbuf_appendbytes(baton->hunk, curp, length);
1543            }
1544
1545          if (collect_extra)
1546            {
1547              svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1548            }
1549
1550          bytes_processed = TRUE;
1551        }
1552
1553      curp = baton->buffer[idx];
1554      length = sizeof(baton->buffer[idx]);
1555
1556      err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1557
1558      /* If the last chunk ended with a CR, we look for an LF at the start
1559         of this chunk. */
1560      if (had_cr)
1561        {
1562          if (! err && length > 0 && *curp == '\n')
1563            {
1564              if (type != svn_diff__file_output_unified_skip)
1565                {
1566                  svn_stringbuf_appendbyte(baton->hunk, *curp);
1567                }
1568              /* We don't append the LF to extra_context, since it would
1569               * just be stripped anyway. */
1570              ++curp;
1571              --length;
1572            }
1573
1574          baton->curp[idx] = curp;
1575          baton->length[idx] = length;
1576
1577          break;
1578        }
1579    }
1580  while (! err);
1581
1582  if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1583    return err;
1584
1585  if (err && APR_STATUS_IS_EOF(err->apr_err))
1586    {
1587      svn_error_clear(err);
1588      /* Special case if we reach the end of file AND the last line is in the
1589         changed range AND the file doesn't end with a newline */
1590      if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1591          && ! had_cr)
1592        {
1593          SVN_ERR(svn_diff__unified_append_no_newline_msg(
1594                    baton->hunk, baton->header_encoding, baton->pool));
1595        }
1596
1597      baton->length[idx] = 0;
1598    }
1599
1600  return SVN_NO_ERROR;
1601}
1602
1603static APR_INLINE svn_error_t *
1604output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1605                          int source,
1606                          svn_diff__file_output_unified_type_e type,
1607                          apr_off_t until,
1608                          svn_cancel_func_t cancel_func,
1609                          void *cancel_baton)
1610{
1611  while (output_baton->current_line[source] < until)
1612    {
1613      if (cancel_func)
1614        SVN_ERR(cancel_func(cancel_baton));
1615
1616      SVN_ERR(output_unified_line(output_baton, type, source));
1617    }
1618  return SVN_NO_ERROR;
1619}
1620
1621static svn_error_t *
1622output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1623{
1624  apr_off_t target_line;
1625  apr_size_t hunk_len;
1626  apr_off_t old_start;
1627  apr_off_t new_start;
1628
1629  if (svn_stringbuf_isempty(baton->hunk))
1630    {
1631      /* Nothing to flush */
1632      return SVN_NO_ERROR;
1633    }
1634
1635  target_line = baton->hunk_start[0] + baton->hunk_length[0]
1636                + baton->context_size;
1637
1638  /* Add trailing context to the hunk */
1639  SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1640                                    svn_diff__file_output_unified_context,
1641                                    target_line,
1642                                    baton->cancel_func, baton->cancel_baton));
1643
1644  old_start = baton->hunk_start[0];
1645  new_start = baton->hunk_start[1];
1646
1647  /* If the file is non-empty, convert the line indexes from
1648     zero based to one based */
1649  if (baton->hunk_length[0])
1650    old_start++;
1651  if (baton->hunk_length[1])
1652    new_start++;
1653
1654  /* Write the hunk header */
1655  SVN_ERR(svn_diff__unified_write_hunk_header(
1656            baton->output_stream, baton->header_encoding, "@@",
1657            old_start, baton->hunk_length[0],
1658            new_start, baton->hunk_length[1],
1659            baton->hunk_extra_context,
1660            baton->pool));
1661
1662  /* Output the hunk content */
1663  hunk_len = baton->hunk->len;
1664  SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1665                           &hunk_len));
1666
1667  /* Prepare for the next hunk */
1668  baton->hunk_length[0] = 0;
1669  baton->hunk_length[1] = 0;
1670  baton->hunk_start[0] = 0;
1671  baton->hunk_start[1] = 0;
1672  svn_stringbuf_setempty(baton->hunk);
1673
1674  return SVN_NO_ERROR;
1675}
1676
1677static svn_error_t *
1678output_unified_diff_modified(void *baton,
1679  apr_off_t original_start, apr_off_t original_length,
1680  apr_off_t modified_start, apr_off_t modified_length,
1681  apr_off_t latest_start, apr_off_t latest_length)
1682{
1683  svn_diff__file_output_baton_t *output_baton = baton;
1684  apr_off_t context_prefix_length;
1685  apr_off_t prev_context_end;
1686  svn_boolean_t init_hunk = FALSE;
1687
1688  if (original_start > output_baton->context_size)
1689    context_prefix_length = output_baton->context_size;
1690  else
1691    context_prefix_length = original_start;
1692
1693  /* Calculate where the previous hunk will end if we would write it now
1694     (including the necessary context at the end) */
1695  if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1696    {
1697      prev_context_end = output_baton->hunk_start[0]
1698                         + output_baton->hunk_length[0]
1699                         + output_baton->context_size;
1700    }
1701  else
1702    {
1703      prev_context_end = -1;
1704
1705      if (output_baton->hunk_start[0] == 0
1706          && (original_length > 0 || modified_length > 0))
1707        init_hunk = TRUE;
1708    }
1709
1710  /* If the changed range is far enough from the previous range, flush the current
1711     hunk. */
1712  {
1713    apr_off_t new_hunk_start = (original_start - context_prefix_length);
1714
1715    if (output_baton->current_line[0] < new_hunk_start
1716          && prev_context_end <= new_hunk_start)
1717      {
1718        SVN_ERR(output_unified_flush_hunk(output_baton));
1719        init_hunk = TRUE;
1720      }
1721    else if (output_baton->hunk_length[0] > 0
1722             || output_baton->hunk_length[1] > 0)
1723      {
1724        /* We extend the current hunk */
1725
1726
1727        /* Original: Output the context preceding the changed range */
1728        SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1729                                          svn_diff__file_output_unified_context,
1730                                          original_start,
1731                                          output_baton->cancel_func,
1732                                          output_baton->cancel_baton));
1733      }
1734  }
1735
1736  /* Original: Skip lines until we are at the beginning of the context we want
1737     to display */
1738  SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1739                                    svn_diff__file_output_unified_skip,
1740                                    original_start - context_prefix_length,
1741                                    output_baton->cancel_func,
1742                                    output_baton->cancel_baton));
1743
1744  /* Note that the above skip stores data for the show_c_function support below */
1745
1746  if (init_hunk)
1747    {
1748      SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1749                     && output_baton->hunk_length[1] == 0);
1750
1751      output_baton->hunk_start[0] = original_start - context_prefix_length;
1752      output_baton->hunk_start[1] = modified_start - context_prefix_length;
1753    }
1754
1755  if (init_hunk && output_baton->show_c_function)
1756    {
1757      apr_size_t p;
1758      const char *invalid_character;
1759
1760      /* Save the extra context for later use.
1761       * Note that the last byte of the hunk_extra_context array is never
1762       * touched after it is zero-initialized, so the array is always
1763       * 0-terminated. */
1764      strncpy(output_baton->hunk_extra_context,
1765              output_baton->extra_context->data,
1766              SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1767      /* Trim whitespace at the end, most notably to get rid of any
1768       * newline characters. */
1769      p = strlen(output_baton->hunk_extra_context);
1770      while (p > 0
1771             && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1772        {
1773          output_baton->hunk_extra_context[--p] = '\0';
1774        }
1775      invalid_character =
1776        svn_utf__last_valid(output_baton->hunk_extra_context,
1777                            SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1778      for (p = invalid_character - output_baton->hunk_extra_context;
1779           p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1780        {
1781          output_baton->hunk_extra_context[p] = '\0';
1782        }
1783    }
1784
1785  /* Modified: Skip lines until we are at the start of the changed range */
1786  SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1787                                    svn_diff__file_output_unified_skip,
1788                                    modified_start,
1789                                    output_baton->cancel_func,
1790                                    output_baton->cancel_baton));
1791
1792  /* Original: Output the context preceding the changed range */
1793  SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1794                                    svn_diff__file_output_unified_context,
1795                                    original_start,
1796                                    output_baton->cancel_func,
1797                                    output_baton->cancel_baton));
1798
1799  /* Both: Output the changed range */
1800  SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1801                                    svn_diff__file_output_unified_delete,
1802                                    original_start + original_length,
1803                                    output_baton->cancel_func,
1804                                    output_baton->cancel_baton));
1805  SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1806                                    svn_diff__file_output_unified_insert,
1807                                    modified_start + modified_length,
1808                                    output_baton->cancel_func,
1809                                    output_baton->cancel_baton));
1810
1811  return SVN_NO_ERROR;
1812}
1813
1814/* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1815static svn_error_t *
1816output_unified_default_hdr(const char **header, const char *path,
1817                           apr_pool_t *pool)
1818{
1819  apr_finfo_t file_info;
1820  apr_time_exp_t exploded_time;
1821  char time_buffer[64];
1822  apr_size_t time_len;
1823  const char *utf8_timestr;
1824
1825  SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1826  apr_time_exp_lt(&exploded_time, file_info.mtime);
1827
1828  apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1829  /* Order of date components can be different in different languages */
1830               _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1831
1832  SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1833
1834  *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1835
1836  return SVN_NO_ERROR;
1837}
1838
1839static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1840{
1841  NULL, /* output_common */
1842  output_unified_diff_modified,
1843  NULL, /* output_diff_latest */
1844  NULL, /* output_diff_common */
1845  NULL  /* output_conflict */
1846};
1847
1848svn_error_t *
1849svn_diff_file_output_unified4(svn_stream_t *output_stream,
1850                              svn_diff_t *diff,
1851                              const char *original_path,
1852                              const char *modified_path,
1853                              const char *original_header,
1854                              const char *modified_header,
1855                              const char *header_encoding,
1856                              const char *relative_to_dir,
1857                              svn_boolean_t show_c_function,
1858                              int context_size,
1859                              svn_cancel_func_t cancel_func,
1860                              void *cancel_baton,
1861                              apr_pool_t *pool)
1862{
1863  if (svn_diff_contains_diffs(diff))
1864    {
1865      svn_diff__file_output_baton_t baton;
1866      int i;
1867
1868      memset(&baton, 0, sizeof(baton));
1869      baton.output_stream = output_stream;
1870      baton.cancel_func = cancel_func;
1871      baton.cancel_baton = cancel_baton;
1872      baton.pool = pool;
1873      baton.header_encoding = header_encoding;
1874      baton.path[0] = original_path;
1875      baton.path[1] = modified_path;
1876      baton.hunk = svn_stringbuf_create_empty(pool);
1877      baton.show_c_function = show_c_function;
1878      baton.extra_context = svn_stringbuf_create_empty(pool);
1879      baton.context_size = (context_size >= 0) ? context_size
1880                                              : SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1881
1882      if (show_c_function)
1883        {
1884          baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1885
1886          APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1887          APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1888          APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1889        }
1890
1891      SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1892                                            header_encoding, pool));
1893      SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1894                                            header_encoding, pool));
1895      SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1896                                            header_encoding, pool));
1897
1898      if (relative_to_dir)
1899        {
1900          /* Possibly adjust the "original" and "modified" paths shown in
1901             the output (see issue #2723). */
1902          const char *child_path;
1903
1904          if (! original_header)
1905            {
1906              child_path = svn_dirent_is_child(relative_to_dir,
1907                                               original_path, pool);
1908              if (child_path)
1909                original_path = child_path;
1910              else
1911                return svn_error_createf(
1912                                   SVN_ERR_BAD_RELATIVE_PATH, NULL,
1913                                   _("Path '%s' must be inside "
1914                                     "the directory '%s'"),
1915                                   svn_dirent_local_style(original_path, pool),
1916                                   svn_dirent_local_style(relative_to_dir,
1917                                                          pool));
1918            }
1919
1920          if (! modified_header)
1921            {
1922              child_path = svn_dirent_is_child(relative_to_dir,
1923                                               modified_path, pool);
1924              if (child_path)
1925                modified_path = child_path;
1926              else
1927                return svn_error_createf(
1928                                   SVN_ERR_BAD_RELATIVE_PATH, NULL,
1929                                   _("Path '%s' must be inside "
1930                                     "the directory '%s'"),
1931                                   svn_dirent_local_style(modified_path, pool),
1932                                   svn_dirent_local_style(relative_to_dir,
1933                                                          pool));
1934            }
1935        }
1936
1937      for (i = 0; i < 2; i++)
1938        {
1939          SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1940                                   APR_READ, APR_OS_DEFAULT, pool));
1941        }
1942
1943      if (original_header == NULL)
1944        {
1945          SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1946                                             pool));
1947        }
1948
1949      if (modified_header == NULL)
1950        {
1951          SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1952                                             pool));
1953        }
1954
1955      SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1956                                             original_header, modified_header,
1957                                             pool));
1958
1959      SVN_ERR(svn_diff_output2(diff, &baton,
1960                               &svn_diff__file_output_unified_vtable,
1961                               cancel_func, cancel_baton));
1962      SVN_ERR(output_unified_flush_hunk(&baton));
1963
1964      for (i = 0; i < 2; i++)
1965        {
1966          SVN_ERR(svn_io_file_close(baton.file[i], pool));
1967        }
1968    }
1969
1970  return SVN_NO_ERROR;
1971}
1972
1973
1974/** Display diff3 **/
1975
1976/* A stream to remember *leading* context.  Note that this stream does
1977   *not* copy the data that it is remembering; it just saves
1978   *pointers! */
1979typedef struct context_saver_t {
1980  svn_stream_t *stream;
1981  int context_size;
1982  const char **data; /* const char *data[context_size] */
1983  apr_size_t *len;   /* apr_size_t len[context_size] */
1984  apr_size_t next_slot;
1985  apr_ssize_t total_writes;
1986} context_saver_t;
1987
1988
1989static svn_error_t *
1990context_saver_stream_write(void *baton,
1991                           const char *data,
1992                           apr_size_t *len)
1993{
1994  context_saver_t *cs = baton;
1995
1996  if (cs->context_size > 0)
1997    {
1998      cs->data[cs->next_slot] = data;
1999      cs->len[cs->next_slot] = *len;
2000      cs->next_slot = (cs->next_slot + 1) % cs->context_size;
2001      cs->total_writes++;
2002    }
2003  return SVN_NO_ERROR;
2004}
2005
2006typedef struct svn_diff3__file_output_baton_t
2007{
2008  svn_stream_t *output_stream;
2009
2010  const char *path[3];
2011
2012  apr_off_t   current_line[3];
2013
2014  char       *buffer[3];
2015  char       *endp[3];
2016  char       *curp[3];
2017
2018  /* The following four members are in the encoding used for the output. */
2019  const char *conflict_modified;
2020  const char *conflict_original;
2021  const char *conflict_separator;
2022  const char *conflict_latest;
2023
2024  const char *marker_eol;
2025
2026  svn_diff_conflict_display_style_t conflict_style;
2027  int context_size;
2028
2029  /* cancel support */
2030  svn_cancel_func_t cancel_func;
2031  void *cancel_baton;
2032
2033  /* The rest of the fields are for
2034     svn_diff_conflict_display_only_conflicts only.  Note that for
2035     these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
2036     (soon after a conflict) a "trailing context stream", never the
2037     actual output stream.*/
2038  /* The actual output stream. */
2039  svn_stream_t *real_output_stream;
2040  context_saver_t *context_saver;
2041  /* Used to allocate context_saver and trailing context streams, and
2042     for some printfs. */
2043  apr_pool_t *pool;
2044} svn_diff3__file_output_baton_t;
2045
2046static svn_error_t *
2047flush_context_saver(context_saver_t *cs,
2048                    svn_stream_t *output_stream)
2049{
2050  int i;
2051  for (i = 0; i < cs->context_size; i++)
2052    {
2053      apr_size_t slot = (i + cs->next_slot) % cs->context_size;
2054      if (cs->data[slot])
2055        {
2056          apr_size_t len = cs->len[slot];
2057          SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2058        }
2059    }
2060  return SVN_NO_ERROR;
2061}
2062
2063static void
2064make_context_saver(svn_diff3__file_output_baton_t *fob)
2065{
2066  context_saver_t *cs;
2067
2068  assert(fob->context_size > 0); /* Or nothing to save */
2069
2070  svn_pool_clear(fob->pool);
2071  cs = apr_pcalloc(fob->pool, sizeof(*cs));
2072  cs->stream = svn_stream_empty(fob->pool);
2073  svn_stream_set_baton(cs->stream, cs);
2074  svn_stream_set_write(cs->stream, context_saver_stream_write);
2075  fob->context_saver = cs;
2076  fob->output_stream = cs->stream;
2077  cs->context_size = fob->context_size;
2078  cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size);
2079  cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size);
2080}
2081
2082
2083/* A stream which prints LINES_TO_PRINT (based on context size) lines to
2084   BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2085   a context_saver; used for *trailing* context. */
2086
2087struct trailing_context_printer {
2088  apr_size_t lines_to_print;
2089  svn_diff3__file_output_baton_t *fob;
2090};
2091
2092
2093
2094static svn_error_t *
2095trailing_context_printer_write(void *baton,
2096                               const char *data,
2097                               apr_size_t *len)
2098{
2099  struct trailing_context_printer *tcp = baton;
2100  SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2101  SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2102  tcp->lines_to_print--;
2103  if (tcp->lines_to_print == 0)
2104    make_context_saver(tcp->fob);
2105  return SVN_NO_ERROR;
2106}
2107
2108
2109static void
2110make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2111{
2112  struct trailing_context_printer *tcp;
2113  svn_stream_t *s;
2114
2115  svn_pool_clear(btn->pool);
2116
2117  tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2118  tcp->lines_to_print = btn->context_size;
2119  tcp->fob = btn;
2120  s = svn_stream_empty(btn->pool);
2121  svn_stream_set_baton(s, tcp);
2122  svn_stream_set_write(s, trailing_context_printer_write);
2123  btn->output_stream = s;
2124}
2125
2126
2127
2128typedef enum svn_diff3__file_output_type_e
2129{
2130  svn_diff3__file_output_skip,
2131  svn_diff3__file_output_normal
2132} svn_diff3__file_output_type_e;
2133
2134
2135static svn_error_t *
2136output_line(svn_diff3__file_output_baton_t *baton,
2137            svn_diff3__file_output_type_e type, int idx)
2138{
2139  char *curp;
2140  char *endp;
2141  char *eol;
2142  apr_size_t len;
2143
2144  curp = baton->curp[idx];
2145  endp = baton->endp[idx];
2146
2147  /* Lazily update the current line even if we're at EOF.
2148   */
2149  baton->current_line[idx]++;
2150
2151  if (curp == endp)
2152    return SVN_NO_ERROR;
2153
2154  eol = svn_eol__find_eol_start(curp, endp - curp);
2155  if (!eol)
2156    eol = endp;
2157  else
2158    {
2159      svn_boolean_t had_cr = (*eol == '\r');
2160      eol++;
2161      if (had_cr && eol != endp && *eol == '\n')
2162        eol++;
2163    }
2164
2165  if (type != svn_diff3__file_output_skip)
2166    {
2167      len = eol - curp;
2168      /* Note that the trailing context printer assumes that
2169         svn_stream_write is called exactly once per line. */
2170      SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2171    }
2172
2173  baton->curp[idx] = eol;
2174
2175  return SVN_NO_ERROR;
2176}
2177
2178static svn_error_t *
2179output_marker_eol(svn_diff3__file_output_baton_t *btn)
2180{
2181  return svn_stream_puts(btn->output_stream, btn->marker_eol);
2182}
2183
2184static svn_error_t *
2185output_hunk(void *baton, int idx, apr_off_t target_line,
2186            apr_off_t target_length)
2187{
2188  svn_diff3__file_output_baton_t *output_baton = baton;
2189
2190  /* Skip lines until we are at the start of the changed range */
2191  while (output_baton->current_line[idx] < target_line)
2192    {
2193      SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2194    }
2195
2196  target_line += target_length;
2197
2198  while (output_baton->current_line[idx] < target_line)
2199    {
2200      SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2201    }
2202
2203  return SVN_NO_ERROR;
2204}
2205
2206static svn_error_t *
2207output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2208              apr_off_t modified_start, apr_off_t modified_length,
2209              apr_off_t latest_start, apr_off_t latest_length)
2210{
2211  return output_hunk(baton, 1, modified_start, modified_length);
2212}
2213
2214static svn_error_t *
2215output_diff_modified(void *baton,
2216                     apr_off_t original_start, apr_off_t original_length,
2217                     apr_off_t modified_start, apr_off_t modified_length,
2218                     apr_off_t latest_start, apr_off_t latest_length)
2219{
2220  return output_hunk(baton, 1, modified_start, modified_length);
2221}
2222
2223static svn_error_t *
2224output_diff_latest(void *baton,
2225                   apr_off_t original_start, apr_off_t original_length,
2226                   apr_off_t modified_start, apr_off_t modified_length,
2227                   apr_off_t latest_start, apr_off_t latest_length)
2228{
2229  return output_hunk(baton, 2, latest_start, latest_length);
2230}
2231
2232static svn_error_t *
2233output_conflict(void *baton,
2234                apr_off_t original_start, apr_off_t original_length,
2235                apr_off_t modified_start, apr_off_t modified_length,
2236                apr_off_t latest_start, apr_off_t latest_length,
2237                svn_diff_t *diff);
2238
2239static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2240{
2241  output_common,
2242  output_diff_modified,
2243  output_diff_latest,
2244  output_diff_modified, /* output_diff_common */
2245  output_conflict
2246};
2247
2248static svn_error_t *
2249output_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn,
2250                                    const char *label,
2251                                    apr_off_t start,
2252                                    apr_off_t length)
2253{
2254  if (length == 1)
2255    SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2256                              "%s (%" APR_OFF_T_FMT ")",
2257                              label, start + 1));
2258  else
2259    SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2260                              "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")",
2261                              label, start + 1, length));
2262
2263  SVN_ERR(output_marker_eol(btn));
2264
2265  return SVN_NO_ERROR;
2266}
2267
2268static svn_error_t *
2269output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2270                             apr_off_t original_start,
2271                             apr_off_t original_length,
2272                             apr_off_t modified_start,
2273                             apr_off_t modified_length,
2274                             apr_off_t latest_start,
2275                             apr_off_t latest_length)
2276{
2277  /* Are we currently saving starting context (as opposed to printing
2278     trailing context)?  If so, flush it. */
2279  if (btn->output_stream == btn->context_saver->stream)
2280    {
2281      if (btn->context_saver->total_writes > btn->context_size)
2282        SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2283      SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2284    }
2285
2286  /* Print to the real output stream. */
2287  btn->output_stream = btn->real_output_stream;
2288
2289  /* Output the conflict itself. */
2290  SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified,
2291                                              modified_start, modified_length));
2292  SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2293
2294  SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original,
2295                                              original_start, original_length));
2296  SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2297
2298  SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2299                            "%s%s", btn->conflict_separator, btn->marker_eol));
2300  SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2301  SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest,
2302                                              latest_start, latest_length));
2303
2304  /* Go into print-trailing-context mode instead. */
2305  make_trailing_context_printer(btn);
2306
2307  return SVN_NO_ERROR;
2308}
2309
2310
2311static svn_error_t *
2312output_conflict(void *baton,
2313                apr_off_t original_start, apr_off_t original_length,
2314                apr_off_t modified_start, apr_off_t modified_length,
2315                apr_off_t latest_start, apr_off_t latest_length,
2316                svn_diff_t *diff)
2317{
2318  svn_diff3__file_output_baton_t *file_baton = baton;
2319
2320  svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2321
2322  if (style == svn_diff_conflict_display_only_conflicts)
2323    return output_conflict_with_context(file_baton,
2324                                        original_start, original_length,
2325                                        modified_start, modified_length,
2326                                        latest_start, latest_length);
2327
2328  if (style == svn_diff_conflict_display_resolved_modified_latest)
2329    {
2330      if (diff)
2331        return svn_diff_output2(diff, baton,
2332                                &svn_diff3__file_output_vtable,
2333                                file_baton->cancel_func,
2334                                file_baton->cancel_baton);
2335      else
2336        style = svn_diff_conflict_display_modified_latest;
2337    }
2338
2339  if (style == svn_diff_conflict_display_modified_latest ||
2340      style == svn_diff_conflict_display_modified_original_latest)
2341    {
2342      SVN_ERR(svn_stream_puts(file_baton->output_stream,
2343                               file_baton->conflict_modified));
2344      SVN_ERR(output_marker_eol(file_baton));
2345
2346      SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2347
2348      if (style == svn_diff_conflict_display_modified_original_latest)
2349        {
2350          SVN_ERR(svn_stream_puts(file_baton->output_stream,
2351                                   file_baton->conflict_original));
2352          SVN_ERR(output_marker_eol(file_baton));
2353          SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2354        }
2355
2356      SVN_ERR(svn_stream_puts(file_baton->output_stream,
2357                              file_baton->conflict_separator));
2358      SVN_ERR(output_marker_eol(file_baton));
2359
2360      SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2361
2362      SVN_ERR(svn_stream_puts(file_baton->output_stream,
2363                              file_baton->conflict_latest));
2364      SVN_ERR(output_marker_eol(file_baton));
2365    }
2366  else if (style == svn_diff_conflict_display_modified)
2367    SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2368  else if (style == svn_diff_conflict_display_latest)
2369    SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2370  else /* unknown style */
2371    SVN_ERR_MALFUNCTION();
2372
2373  return SVN_NO_ERROR;
2374}
2375
2376svn_error_t *
2377svn_diff_file_output_merge3(svn_stream_t *output_stream,
2378                            svn_diff_t *diff,
2379                            const char *original_path,
2380                            const char *modified_path,
2381                            const char *latest_path,
2382                            const char *conflict_original,
2383                            const char *conflict_modified,
2384                            const char *conflict_latest,
2385                            const char *conflict_separator,
2386                            svn_diff_conflict_display_style_t style,
2387                            svn_cancel_func_t cancel_func,
2388                            void *cancel_baton,
2389                            apr_pool_t *scratch_pool)
2390{
2391  svn_diff3__file_output_baton_t baton;
2392  apr_file_t *file[3];
2393  int idx;
2394#if APR_HAS_MMAP
2395  apr_mmap_t *mm[3] = { 0 };
2396#endif /* APR_HAS_MMAP */
2397  const char *eol;
2398  svn_boolean_t conflicts_only =
2399    (style == svn_diff_conflict_display_only_conflicts);
2400
2401  memset(&baton, 0, sizeof(baton));
2402  baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2403  if (conflicts_only)
2404    {
2405      baton.pool = svn_pool_create(scratch_pool);
2406      make_context_saver(&baton);
2407      baton.real_output_stream = output_stream;
2408    }
2409  else
2410    baton.output_stream = output_stream;
2411  baton.path[0] = original_path;
2412  baton.path[1] = modified_path;
2413  baton.path[2] = latest_path;
2414  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2415                                    conflict_modified ? conflict_modified
2416                                    : apr_psprintf(scratch_pool, "<<<<<<< %s",
2417                                                   modified_path),
2418                                    scratch_pool));
2419  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2420                                    conflict_original ? conflict_original
2421                                    : apr_psprintf(scratch_pool, "||||||| %s",
2422                                                   original_path),
2423                                    scratch_pool));
2424  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2425                                    conflict_separator ? conflict_separator
2426                                    : "=======", scratch_pool));
2427  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2428                                    conflict_latest ? conflict_latest
2429                                    : apr_psprintf(scratch_pool, ">>>>>>> %s",
2430                                                   latest_path),
2431                                    scratch_pool));
2432
2433  baton.conflict_style = style;
2434
2435  for (idx = 0; idx < 3; idx++)
2436    {
2437      apr_size_t size;
2438
2439      SVN_ERR(map_or_read_file(&file[idx],
2440                               MMAP_T_ARG(mm[idx])
2441                               &baton.buffer[idx], &size,
2442                               baton.path[idx], scratch_pool));
2443
2444      baton.curp[idx] = baton.buffer[idx];
2445      baton.endp[idx] = baton.buffer[idx];
2446
2447      if (baton.endp[idx])
2448        baton.endp[idx] += size;
2449    }
2450
2451  /* Check what eol marker we should use for conflict markers.
2452     We use the eol marker of the modified file and fall back on the
2453     platform's eol marker if that file doesn't contain any newlines. */
2454  eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2455                            NULL);
2456  if (! eol)
2457    eol = APR_EOL_STR;
2458  baton.marker_eol = eol;
2459
2460  baton.cancel_func = cancel_func;
2461  baton.cancel_baton = cancel_baton;
2462
2463  SVN_ERR(svn_diff_output2(diff, &baton,
2464                          &svn_diff3__file_output_vtable,
2465                          cancel_func, cancel_baton));
2466
2467  for (idx = 0; idx < 3; idx++)
2468    {
2469#if APR_HAS_MMAP
2470      if (mm[idx])
2471        {
2472          apr_status_t rv = apr_mmap_delete(mm[idx]);
2473          if (rv != APR_SUCCESS)
2474            {
2475              return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2476                                        baton.path[idx]);
2477            }
2478        }
2479#endif /* APR_HAS_MMAP */
2480
2481      if (file[idx])
2482        {
2483          SVN_ERR(svn_io_file_close(file[idx], scratch_pool));
2484        }
2485    }
2486
2487  if (conflicts_only)
2488    svn_pool_destroy(baton.pool);
2489
2490  return SVN_NO_ERROR;
2491}
2492
2493