1251881Speter/*
2251881Speter * diff_file.c :  routines for doing diffs on files
3251881Speter *
4251881Speter * ====================================================================
5251881Speter *    Licensed to the Apache Software Foundation (ASF) under one
6251881Speter *    or more contributor license agreements.  See the NOTICE file
7251881Speter *    distributed with this work for additional information
8251881Speter *    regarding copyright ownership.  The ASF licenses this file
9251881Speter *    to you under the Apache License, Version 2.0 (the
10251881Speter *    "License"); you may not use this file except in compliance
11251881Speter *    with the License.  You may obtain a copy of the License at
12251881Speter *
13251881Speter *      http://www.apache.org/licenses/LICENSE-2.0
14251881Speter *
15251881Speter *    Unless required by applicable law or agreed to in writing,
16251881Speter *    software distributed under the License is distributed on an
17251881Speter *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18251881Speter *    KIND, either express or implied.  See the License for the
19251881Speter *    specific language governing permissions and limitations
20251881Speter *    under the License.
21251881Speter * ====================================================================
22251881Speter */
23251881Speter
24251881Speter
25251881Speter#include <apr.h>
26251881Speter#include <apr_pools.h>
27251881Speter#include <apr_general.h>
28251881Speter#include <apr_file_io.h>
29251881Speter#include <apr_file_info.h>
30251881Speter#include <apr_time.h>
31251881Speter#include <apr_mmap.h>
32251881Speter#include <apr_getopt.h>
33251881Speter
34289180Speter#include <assert.h>
35289180Speter
36251881Speter#include "svn_error.h"
37251881Speter#include "svn_diff.h"
38251881Speter#include "svn_types.h"
39251881Speter#include "svn_string.h"
40251881Speter#include "svn_subst.h"
41251881Speter#include "svn_io.h"
42251881Speter#include "svn_utf.h"
43251881Speter#include "svn_pools.h"
44251881Speter#include "diff.h"
45251881Speter#include "svn_private_config.h"
46251881Speter#include "svn_path.h"
47251881Speter#include "svn_ctype.h"
48251881Speter
49251881Speter#include "private/svn_utf_private.h"
50251881Speter#include "private/svn_eol_private.h"
51251881Speter#include "private/svn_dep_compat.h"
52251881Speter#include "private/svn_adler32.h"
53251881Speter#include "private/svn_diff_private.h"
54251881Speter
55251881Speter/* A token, i.e. a line read from a file. */
56251881Spetertypedef struct svn_diff__file_token_t
57251881Speter{
58251881Speter  /* Next token in free list. */
59251881Speter  struct svn_diff__file_token_t *next;
60251881Speter  svn_diff_datasource_e datasource;
61251881Speter  /* Offset in the datasource. */
62251881Speter  apr_off_t offset;
63251881Speter  /* Offset of the normalized token (may skip leading whitespace) */
64251881Speter  apr_off_t norm_offset;
65251881Speter  /* Total length - before normalization. */
66251881Speter  apr_off_t raw_length;
67251881Speter  /* Total length - after normalization. */
68251881Speter  apr_off_t length;
69251881Speter} svn_diff__file_token_t;
70251881Speter
71251881Speter
72251881Spetertypedef struct svn_diff__file_baton_t
73251881Speter{
74251881Speter  const svn_diff_file_options_t *options;
75251881Speter
76251881Speter  struct file_info {
77251881Speter    const char *path;  /* path to this file, absolute or relative to CWD */
78251881Speter
79251881Speter    /* All the following fields are active while this datasource is open */
80251881Speter    apr_file_t *file;  /* handle of this file */
81251881Speter    apr_off_t size;    /* total raw size in bytes of this file */
82251881Speter
83251881Speter    /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
84251881Speter    int chunk;     /* the current chunk number, zero-based */
85251881Speter    char *buffer;  /* a buffer containing the current chunk */
86251881Speter    char *curp;    /* current position in the current chunk */
87251881Speter    char *endp;    /* next memory address after the current chunk */
88251881Speter
89251881Speter    svn_diff__normalize_state_t normalize_state;
90251881Speter
91251881Speter    /* Where the identical suffix starts in this datasource */
92251881Speter    int suffix_start_chunk;
93251881Speter    apr_off_t suffix_offset_in_chunk;
94251881Speter  } files[4];
95251881Speter
96251881Speter  /* List of free tokens that may be reused. */
97251881Speter  svn_diff__file_token_t *tokens;
98251881Speter
99251881Speter  apr_pool_t *pool;
100251881Speter} svn_diff__file_baton_t;
101251881Speter
102251881Speterstatic int
103251881Speterdatasource_to_index(svn_diff_datasource_e datasource)
104251881Speter{
105251881Speter  switch (datasource)
106251881Speter    {
107251881Speter    case svn_diff_datasource_original:
108251881Speter      return 0;
109251881Speter
110251881Speter    case svn_diff_datasource_modified:
111251881Speter      return 1;
112251881Speter
113251881Speter    case svn_diff_datasource_latest:
114251881Speter      return 2;
115251881Speter
116251881Speter    case svn_diff_datasource_ancestor:
117251881Speter      return 3;
118251881Speter    }
119251881Speter
120251881Speter  return -1;
121251881Speter}
122251881Speter
123251881Speter/* Files are read in chunks of 128k.  There is no support for this number
124251881Speter * whatsoever.  If there is a number someone comes up with that has some
125251881Speter * argumentation, let's use that.
126251881Speter */
127251881Speter/* If you change this number, update test_norm_offset(),
128251881Speter * test_identical_suffix() and and test_token_compare()  in diff-diff3-test.c.
129251881Speter */
130251881Speter#define CHUNK_SHIFT 17
131251881Speter#define CHUNK_SIZE (1 << CHUNK_SHIFT)
132251881Speter
133251881Speter#define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
134251881Speter#define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
135251881Speter#define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
136251881Speter
137251881Speter
138251881Speter/* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
139251881Speter * *LENGTH.  The actual bytes read are stored in *LENGTH on return.
140251881Speter */
141251881Speterstatic APR_INLINE svn_error_t *
142289180Speterread_chunk(apr_file_t *file,
143251881Speter           char *buffer, apr_off_t length,
144289180Speter           apr_off_t offset, apr_pool_t *scratch_pool)
145251881Speter{
146251881Speter  /* XXX: The final offset may not be the one we asked for.
147251881Speter   * XXX: Check.
148251881Speter   */
149289180Speter  SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool));
150251881Speter  return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
151289180Speter                                NULL, NULL, scratch_pool);
152251881Speter}
153251881Speter
154251881Speter
155251881Speter/* Map or read a file at PATH. *BUFFER will point to the file
156251881Speter * contents; if the file was mapped, *FILE and *MM will contain the
157251881Speter * mmap context; otherwise they will be NULL.  SIZE will contain the
158251881Speter * file size.  Allocate from POOL.
159251881Speter */
160251881Speter#if APR_HAS_MMAP
161251881Speter#define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
162251881Speter#define MMAP_T_ARG(NAME)   &(NAME),
163251881Speter#else
164251881Speter#define MMAP_T_PARAM(NAME)
165251881Speter#define MMAP_T_ARG(NAME)
166251881Speter#endif
167251881Speter
168251881Speterstatic svn_error_t *
169251881Spetermap_or_read_file(apr_file_t **file,
170251881Speter                 MMAP_T_PARAM(mm)
171257936Speter                 char **buffer, apr_size_t *size_p,
172251881Speter                 const char *path, apr_pool_t *pool)
173251881Speter{
174251881Speter  apr_finfo_t finfo;
175251881Speter  apr_status_t rv;
176257936Speter  apr_size_t size;
177251881Speter
178251881Speter  *buffer = NULL;
179251881Speter
180251881Speter  SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
181251881Speter  SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
182251881Speter
183257936Speter  if (finfo.size > APR_SIZE_MAX)
184257936Speter    {
185257936Speter      return svn_error_createf(APR_ENOMEM, NULL,
186257936Speter                               _("File '%s' is too large to be read in "
187257936Speter                                 "to memory"), path);
188257936Speter    }
189257936Speter
190257936Speter  size = (apr_size_t) finfo.size;
191251881Speter#if APR_HAS_MMAP
192257936Speter  if (size > APR_MMAP_THRESHOLD)
193251881Speter    {
194257936Speter      rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
195251881Speter      if (rv == APR_SUCCESS)
196251881Speter        {
197251881Speter          *buffer = (*mm)->mm;
198251881Speter        }
199257936Speter      else
200257936Speter        {
201257936Speter          /* Clear *MM because output parameters are undefined on error. */
202257936Speter          *mm = NULL;
203257936Speter        }
204251881Speter
205251881Speter      /* On failure we just fall through and try reading the file into
206251881Speter       * memory instead.
207251881Speter       */
208251881Speter    }
209251881Speter#endif /* APR_HAS_MMAP */
210251881Speter
211257936Speter   if (*buffer == NULL && size > 0)
212251881Speter    {
213257936Speter      *buffer = apr_palloc(pool, size);
214251881Speter
215257936Speter      SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
216251881Speter
217251881Speter      /* Since we have the entire contents of the file we can
218251881Speter       * close it now.
219251881Speter       */
220251881Speter      SVN_ERR(svn_io_file_close(*file, pool));
221251881Speter
222251881Speter      *file = NULL;
223251881Speter    }
224251881Speter
225257936Speter  *size_p = size;
226251881Speter
227251881Speter  return SVN_NO_ERROR;
228251881Speter}
229251881Speter
230251881Speter
231251881Speter/* For all files in the FILE array, increment the curp pointer.  If a file
232251881Speter * points before the beginning of file, let it point at the first byte again.
233251881Speter * If the end of the current chunk is reached, read the next chunk in the
234251881Speter * buffer and point curp to the start of the chunk.  If EOF is reached, set
235251881Speter * curp equal to endp to indicate EOF. */
236251881Speter#define INCREMENT_POINTERS(all_files, files_len, pool)                       \
237251881Speter  do {                                                                       \
238251881Speter    apr_size_t svn_macro__i;                                                 \
239251881Speter                                                                             \
240251881Speter    for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
241251881Speter    {                                                                        \
242251881Speter      if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
243251881Speter        (all_files)[svn_macro__i].curp++;                                    \
244251881Speter      else                                                                   \
245251881Speter        SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool)));        \
246251881Speter    }                                                                        \
247251881Speter  } while (0)
248251881Speter
249251881Speter
250251881Speter/* For all files in the FILE array, decrement the curp pointer.  If the
251251881Speter * start of a chunk is reached, read the previous chunk in the buffer and
252251881Speter * point curp to the last byte of the chunk.  If the beginning of a FILE is
253251881Speter * reached, set chunk to -1 to indicate BOF. */
254251881Speter#define DECREMENT_POINTERS(all_files, files_len, pool)                       \
255251881Speter  do {                                                                       \
256251881Speter    apr_size_t svn_macro__i;                                                 \
257251881Speter                                                                             \
258251881Speter    for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
259251881Speter    {                                                                        \
260251881Speter      if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
261251881Speter        (all_files)[svn_macro__i].curp--;                                    \
262251881Speter      else                                                                   \
263251881Speter        SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool)));        \
264251881Speter    }                                                                        \
265251881Speter  } while (0)
266251881Speter
267251881Speter
268251881Speterstatic svn_error_t *
269251881Speterincrement_chunk(struct file_info *file, apr_pool_t *pool)
270251881Speter{
271251881Speter  apr_off_t length;
272251881Speter  apr_off_t last_chunk = offset_to_chunk(file->size);
273251881Speter
274251881Speter  if (file->chunk == -1)
275251881Speter    {
276251881Speter      /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
277251881Speter      file->chunk = 0;
278251881Speter      file->curp = file->buffer;
279251881Speter    }
280251881Speter  else if (file->chunk == last_chunk)
281251881Speter    {
282251881Speter      /* We are at the last chunk. Indicate EOF by setting curp == endp. */
283251881Speter      file->curp = file->endp;
284251881Speter    }
285251881Speter  else
286251881Speter    {
287251881Speter      /* There are still chunks left. Read next chunk and reset pointers. */
288251881Speter      file->chunk++;
289251881Speter      length = file->chunk == last_chunk ?
290251881Speter        offset_in_chunk(file->size) : CHUNK_SIZE;
291289180Speter      SVN_ERR(read_chunk(file->file, file->buffer,
292251881Speter                         length, chunk_to_offset(file->chunk),
293251881Speter                         pool));
294251881Speter      file->endp = file->buffer + length;
295251881Speter      file->curp = file->buffer;
296251881Speter    }
297251881Speter
298251881Speter  return SVN_NO_ERROR;
299251881Speter}
300251881Speter
301251881Speter
302251881Speterstatic svn_error_t *
303251881Speterdecrement_chunk(struct file_info *file, apr_pool_t *pool)
304251881Speter{
305251881Speter  if (file->chunk == 0)
306251881Speter    {
307251881Speter      /* We are already at the first chunk. Indicate BOF (Beginning Of File)
308251881Speter         by setting chunk = -1 and curp = endp - 1. Both conditions are
309251881Speter         important. They help the increment step to catch the BOF situation
310251881Speter         in an efficient way. */
311251881Speter      file->chunk--;
312251881Speter      file->curp = file->endp - 1;
313251881Speter    }
314251881Speter  else
315251881Speter    {
316251881Speter      /* Read previous chunk and reset pointers. */
317251881Speter      file->chunk--;
318289180Speter      SVN_ERR(read_chunk(file->file, file->buffer,
319251881Speter                         CHUNK_SIZE, chunk_to_offset(file->chunk),
320251881Speter                         pool));
321251881Speter      file->endp = file->buffer + CHUNK_SIZE;
322251881Speter      file->curp = file->endp - 1;
323251881Speter    }
324251881Speter
325251881Speter  return SVN_NO_ERROR;
326251881Speter}
327251881Speter
328251881Speter
329251881Speter/* Check whether one of the FILEs has its pointers 'before' the beginning of
330251881Speter * the file (this can happen while scanning backwards). This is the case if
331251881Speter * one of them has chunk == -1. */
332251881Speterstatic svn_boolean_t
333251881Speteris_one_at_bof(struct file_info file[], apr_size_t file_len)
334251881Speter{
335251881Speter  apr_size_t i;
336251881Speter
337251881Speter  for (i = 0; i < file_len; i++)
338251881Speter    if (file[i].chunk == -1)
339251881Speter      return TRUE;
340251881Speter
341251881Speter  return FALSE;
342251881Speter}
343251881Speter
344251881Speter/* Check whether one of the FILEs has its pointers at EOF (this is the case if
345251881Speter * one of them has curp == endp (this can only happen at the last chunk)) */
346251881Speterstatic svn_boolean_t
347251881Speteris_one_at_eof(struct file_info file[], apr_size_t file_len)
348251881Speter{
349251881Speter  apr_size_t i;
350251881Speter
351251881Speter  for (i = 0; i < file_len; i++)
352251881Speter    if (file[i].curp == file[i].endp)
353251881Speter      return TRUE;
354251881Speter
355251881Speter  return FALSE;
356251881Speter}
357251881Speter
358251881Speter/* Quickly determine whether there is a eol char in CHUNK.
359251881Speter * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
360251881Speter */
361251881Speter
362251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK
363251881Speterstatic svn_boolean_t contains_eol(apr_uintptr_t chunk)
364251881Speter{
365251881Speter  apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
366251881Speter  apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
367251881Speter
368251881Speter  r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
369251881Speter  n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
370251881Speter
371251881Speter  return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
372251881Speter}
373251881Speter#endif
374251881Speter
375251881Speter/* Find the prefix which is identical between all elements of the FILE array.
376251881Speter * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
377251881Speter * set to TRUE if one of the FILEs reached its end while scanning prefix,
378251881Speter * i.e. at least one file consisted entirely of prefix.  Otherwise,
379251881Speter * REACHED_ONE_EOF is set to FALSE.
380251881Speter *
381251881Speter * After this function is finished, the buffers, chunks, curp's and endp's
382251881Speter * of the FILEs are set to point at the first byte after the prefix. */
383251881Speterstatic svn_error_t *
384251881Speterfind_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
385251881Speter                      struct file_info file[], apr_size_t file_len,
386251881Speter                      apr_pool_t *pool)
387251881Speter{
388251881Speter  svn_boolean_t had_cr = FALSE;
389251881Speter  svn_boolean_t is_match;
390251881Speter  apr_off_t lines = 0;
391251881Speter  apr_size_t i;
392251881Speter
393251881Speter  *reached_one_eof = FALSE;
394251881Speter
395251881Speter  for (i = 1, is_match = TRUE; i < file_len; i++)
396251881Speter    is_match = is_match && *file[0].curp == *file[i].curp;
397251881Speter  while (is_match)
398251881Speter    {
399251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK
400251881Speter      apr_ssize_t max_delta, delta;
401251881Speter#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
402251881Speter
403251881Speter      /* ### TODO: see if we can take advantage of
404251881Speter         diff options like ignore_eol_style or ignore_space. */
405251881Speter      /* check for eol, and count */
406251881Speter      if (*file[0].curp == '\r')
407251881Speter        {
408251881Speter          lines++;
409251881Speter          had_cr = TRUE;
410251881Speter        }
411251881Speter      else if (*file[0].curp == '\n' && !had_cr)
412251881Speter        {
413251881Speter          lines++;
414251881Speter        }
415251881Speter      else
416251881Speter        {
417251881Speter          had_cr = FALSE;
418251881Speter        }
419251881Speter
420251881Speter      INCREMENT_POINTERS(file, file_len, pool);
421251881Speter
422251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK
423251881Speter
424251881Speter      /* Try to advance as far as possible with machine-word granularity.
425251881Speter       * Determine how far we may advance with chunky ops without reaching
426251881Speter       * endp for any of the files.
427251881Speter       * Signedness is important here if curp gets close to endp.
428251881Speter       */
429251881Speter      max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
430251881Speter      for (i = 1; i < file_len; i++)
431251881Speter        {
432251881Speter          delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
433251881Speter          if (delta < max_delta)
434251881Speter            max_delta = delta;
435251881Speter        }
436251881Speter
437251881Speter      is_match = TRUE;
438251881Speter      for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
439251881Speter        {
440251881Speter          apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
441251881Speter          if (contains_eol(chunk))
442251881Speter            break;
443251881Speter
444251881Speter          for (i = 1; i < file_len; i++)
445251881Speter            if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
446251881Speter              {
447251881Speter                is_match = FALSE;
448251881Speter                break;
449251881Speter              }
450251881Speter
451251881Speter          if (! is_match)
452251881Speter            break;
453251881Speter        }
454251881Speter
455251881Speter      if (delta /* > 0*/)
456251881Speter        {
457251881Speter          /* We either found a mismatch or an EOL at or shortly behind curp+delta
458251881Speter           * or we cannot proceed with chunky ops without exceeding endp.
459251881Speter           * In any way, everything up to curp + delta is equal and not an EOL.
460251881Speter           */
461251881Speter          for (i = 0; i < file_len; i++)
462251881Speter            file[i].curp += delta;
463251881Speter
464251881Speter          /* Skipped data without EOL markers, so last char was not a CR. */
465251881Speter          had_cr = FALSE;
466251881Speter        }
467251881Speter#endif
468251881Speter
469251881Speter      *reached_one_eof = is_one_at_eof(file, file_len);
470251881Speter      if (*reached_one_eof)
471251881Speter        break;
472251881Speter      else
473251881Speter        for (i = 1, is_match = TRUE; i < file_len; i++)
474251881Speter          is_match = is_match && *file[0].curp == *file[i].curp;
475251881Speter    }
476251881Speter
477251881Speter  if (had_cr)
478251881Speter    {
479251881Speter      /* Check if we ended in the middle of a \r\n for one file, but \r for
480251881Speter         another. If so, back up one byte, so the next loop will back up
481251881Speter         the entire line. Also decrement lines, since we counted one
482251881Speter         too many for the \r. */
483251881Speter      svn_boolean_t ended_at_nonmatching_newline = FALSE;
484251881Speter      for (i = 0; i < file_len; i++)
485251881Speter        if (file[i].curp < file[i].endp)
486251881Speter          ended_at_nonmatching_newline = ended_at_nonmatching_newline
487251881Speter                                         || *file[i].curp == '\n';
488251881Speter      if (ended_at_nonmatching_newline)
489251881Speter        {
490251881Speter          lines--;
491251881Speter          DECREMENT_POINTERS(file, file_len, pool);
492251881Speter        }
493251881Speter    }
494251881Speter
495251881Speter  /* Back up one byte, so we point at the last identical byte */
496251881Speter  DECREMENT_POINTERS(file, file_len, pool);
497251881Speter
498251881Speter  /* Back up to the last eol sequence (\n, \r\n or \r) */
499251881Speter  while (!is_one_at_bof(file, file_len) &&
500251881Speter         *file[0].curp != '\n' && *file[0].curp != '\r')
501251881Speter    DECREMENT_POINTERS(file, file_len, pool);
502251881Speter
503251881Speter  /* Slide one byte forward, to point past the eol sequence */
504251881Speter  INCREMENT_POINTERS(file, file_len, pool);
505251881Speter
506251881Speter  *prefix_lines = lines;
507251881Speter
508251881Speter  return SVN_NO_ERROR;
509251881Speter}
510251881Speter
511251881Speter
512251881Speter/* The number of identical suffix lines to keep with the middle section. These
513251881Speter * lines are not eliminated as suffix, and can be picked up by the token
514251881Speter * parsing and lcs steps. This is mainly for backward compatibility with
515251881Speter * the previous diff (and blame) output (if there are multiple diff solutions,
516251881Speter * our lcs algorithm prefers taking common lines from the start, rather than
517251881Speter * from the end. By giving it back some suffix lines, we give it some wiggle
518251881Speter * room to find the exact same diff as before).
519251881Speter *
520251881Speter * The number 50 is more or less arbitrary, based on some real-world tests
521251881Speter * with big files (and then doubling the required number to be on the safe
522251881Speter * side). This has a negligible effect on the power of the optimization. */
523251881Speter/* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
524251881Speter#ifndef SUFFIX_LINES_TO_KEEP
525251881Speter#define SUFFIX_LINES_TO_KEEP 50
526251881Speter#endif
527251881Speter
528251881Speter/* Find the suffix which is identical between all elements of the FILE array.
529251881Speter * Return the number of suffix lines in SUFFIX_LINES.
530251881Speter *
531251881Speter * Before this function is called the FILEs' pointers and chunks should be
532251881Speter * positioned right after the identical prefix (which is the case after
533251881Speter * find_identical_prefix), so we can determine where suffix scanning should
534251881Speter * ultimately stop. */
535251881Speterstatic svn_error_t *
536251881Speterfind_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
537251881Speter                      apr_size_t file_len, apr_pool_t *pool)
538251881Speter{
539251881Speter  struct file_info file_for_suffix[4] = { { 0 }  };
540251881Speter  apr_off_t length[4];
541251881Speter  apr_off_t suffix_min_chunk0;
542251881Speter  apr_off_t suffix_min_offset0;
543251881Speter  apr_off_t min_file_size;
544251881Speter  int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
545251881Speter  svn_boolean_t is_match;
546251881Speter  apr_off_t lines = 0;
547251881Speter  svn_boolean_t had_nl;
548251881Speter  apr_size_t i;
549251881Speter
550251881Speter  /* Initialize file_for_suffix[].
551251881Speter     Read last chunk, position curp at last byte. */
552251881Speter  for (i = 0; i < file_len; i++)
553251881Speter    {
554251881Speter      file_for_suffix[i].path = file[i].path;
555251881Speter      file_for_suffix[i].file = file[i].file;
556251881Speter      file_for_suffix[i].size = file[i].size;
557251881Speter      file_for_suffix[i].chunk =
558251881Speter        (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
559251881Speter      length[i] = offset_in_chunk(file_for_suffix[i].size);
560251881Speter      if (length[i] == 0)
561251881Speter        {
562251881Speter          /* last chunk is an empty chunk -> start at next-to-last chunk */
563251881Speter          file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
564251881Speter          length[i] = CHUNK_SIZE;
565251881Speter        }
566251881Speter
567251881Speter      if (file_for_suffix[i].chunk == file[i].chunk)
568251881Speter        {
569251881Speter          /* Prefix ended in last chunk, so we can reuse the prefix buffer */
570251881Speter          file_for_suffix[i].buffer = file[i].buffer;
571251881Speter        }
572251881Speter      else
573251881Speter        {
574251881Speter          /* There is at least more than 1 chunk,
575251881Speter             so allocate full chunk size buffer */
576251881Speter          file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
577289180Speter          SVN_ERR(read_chunk(file_for_suffix[i].file,
578251881Speter                             file_for_suffix[i].buffer, length[i],
579251881Speter                             chunk_to_offset(file_for_suffix[i].chunk),
580251881Speter                             pool));
581251881Speter        }
582251881Speter      file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
583251881Speter      file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
584251881Speter    }
585251881Speter
586251881Speter  /* Get the chunk and pointer offset (for file[0]) at which we should stop
587251881Speter     scanning backward for the identical suffix, i.e. when we reach prefix. */
588251881Speter  suffix_min_chunk0 = file[0].chunk;
589251881Speter  suffix_min_offset0 = file[0].curp - file[0].buffer;
590251881Speter
591251881Speter  /* Compensate if other files are smaller than file[0] */
592251881Speter  for (i = 1, min_file_size = file[0].size; i < file_len; i++)
593251881Speter    if (file[i].size < min_file_size)
594251881Speter      min_file_size = file[i].size;
595251881Speter  if (file[0].size > min_file_size)
596251881Speter    {
597251881Speter      suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
598251881Speter      suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
599251881Speter    }
600251881Speter
601251881Speter  /* Scan backwards until mismatch or until we reach the prefix. */
602251881Speter  for (i = 1, is_match = TRUE; i < file_len; i++)
603251881Speter    is_match = is_match
604251881Speter               && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
605251881Speter  if (is_match && *file_for_suffix[0].curp != '\r'
606251881Speter               && *file_for_suffix[0].curp != '\n')
607251881Speter    /* Count an extra line for the last line not ending in an eol. */
608251881Speter    lines++;
609251881Speter
610251881Speter  had_nl = FALSE;
611251881Speter  while (is_match)
612251881Speter    {
613251881Speter      svn_boolean_t reached_prefix;
614251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK
615251881Speter      /* Initialize the minimum pointer positions. */
616251881Speter      const char *min_curp[4];
617251881Speter      svn_boolean_t can_read_word;
618251881Speter#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
619251881Speter
620251881Speter      /* ### TODO: see if we can take advantage of
621251881Speter         diff options like ignore_eol_style or ignore_space. */
622251881Speter      /* check for eol, and count */
623251881Speter      if (*file_for_suffix[0].curp == '\n')
624251881Speter        {
625251881Speter          lines++;
626251881Speter          had_nl = TRUE;
627251881Speter        }
628251881Speter      else if (*file_for_suffix[0].curp == '\r' && !had_nl)
629251881Speter        {
630251881Speter          lines++;
631251881Speter        }
632251881Speter      else
633251881Speter        {
634251881Speter          had_nl = FALSE;
635251881Speter        }
636251881Speter
637251881Speter      DECREMENT_POINTERS(file_for_suffix, file_len, pool);
638251881Speter
639251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK
640251881Speter      for (i = 0; i < file_len; i++)
641251881Speter        min_curp[i] = file_for_suffix[i].buffer;
642251881Speter
643251881Speter      /* If we are in the same chunk that contains the last part of the common
644251881Speter         prefix, use the min_curp[0] pointer to make sure we don't get a
645251881Speter         suffix that overlaps the already determined common prefix. */
646251881Speter      if (file_for_suffix[0].chunk == suffix_min_chunk0)
647251881Speter        min_curp[0] += suffix_min_offset0;
648251881Speter
649251881Speter      /* Scan quickly by reading with machine-word granularity. */
650289180Speter      for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
651289180Speter        can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
652289180Speter                         > min_curp[i]);
653289180Speter
654251881Speter      while (can_read_word)
655251881Speter        {
656251881Speter          apr_uintptr_t chunk;
657251881Speter
658251881Speter          /* For each file curp is positioned at the current byte, but we
659251881Speter             want to examine the current byte and the ones before the current
660251881Speter             location as one machine word. */
661251881Speter
662251881Speter          chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
663251881Speter                                             - sizeof(apr_uintptr_t));
664251881Speter          if (contains_eol(chunk))
665251881Speter            break;
666251881Speter
667289180Speter          for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
668289180Speter            is_match = (chunk
669251881Speter                           == *(const apr_uintptr_t *)
670251881Speter                                    (file_for_suffix[i].curp + 1
671251881Speter                                       - sizeof(apr_uintptr_t)));
672251881Speter
673251881Speter          if (! is_match)
674251881Speter            break;
675251881Speter
676251881Speter          for (i = 0; i < file_len; i++)
677251881Speter            {
678251881Speter              file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
679251881Speter              can_read_word = can_read_word
680251881Speter                              && (  (file_for_suffix[i].curp + 1
681251881Speter                                       - sizeof(apr_uintptr_t))
682251881Speter                                  > min_curp[i]);
683251881Speter            }
684251881Speter
685251881Speter          /* We skipped some bytes, so there are no closing EOLs */
686251881Speter          had_nl = FALSE;
687251881Speter        }
688251881Speter
689251881Speter      /* The > min_curp[i] check leaves at least one final byte for checking
690251881Speter         in the non block optimized case below. */
691251881Speter#endif
692251881Speter
693251881Speter      reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
694251881Speter                       && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
695251881Speter                          == suffix_min_offset0;
696251881Speter      if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
697251881Speter        break;
698251881Speter
699251881Speter      is_match = TRUE;
700251881Speter      for (i = 1; i < file_len; i++)
701251881Speter        is_match = is_match
702251881Speter                   && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
703251881Speter    }
704251881Speter
705251881Speter  /* Slide one byte forward, to point at the first byte of identical suffix */
706251881Speter  INCREMENT_POINTERS(file_for_suffix, file_len, pool);
707251881Speter
708251881Speter  /* Slide forward until we find an eol sequence to add the rest of the line
709251881Speter     we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
710251881Speter     one file reaches its end. */
711251881Speter  do
712251881Speter    {
713289180Speter      svn_boolean_t had_cr = FALSE;
714251881Speter      while (!is_one_at_eof(file_for_suffix, file_len)
715251881Speter             && *file_for_suffix[0].curp != '\n'
716251881Speter             && *file_for_suffix[0].curp != '\r')
717251881Speter        INCREMENT_POINTERS(file_for_suffix, file_len, pool);
718251881Speter
719251881Speter      /* Slide one or two more bytes, to point past the eol. */
720251881Speter      if (!is_one_at_eof(file_for_suffix, file_len)
721251881Speter          && *file_for_suffix[0].curp == '\r')
722251881Speter        {
723251881Speter          lines--;
724251881Speter          had_cr = TRUE;
725251881Speter          INCREMENT_POINTERS(file_for_suffix, file_len, pool);
726251881Speter        }
727251881Speter      if (!is_one_at_eof(file_for_suffix, file_len)
728251881Speter          && *file_for_suffix[0].curp == '\n')
729251881Speter        {
730251881Speter          if (!had_cr)
731251881Speter            lines--;
732251881Speter          INCREMENT_POINTERS(file_for_suffix, file_len, pool);
733251881Speter        }
734251881Speter    }
735251881Speter  while (!is_one_at_eof(file_for_suffix, file_len)
736251881Speter         && suffix_lines_to_keep--);
737251881Speter
738251881Speter  if (is_one_at_eof(file_for_suffix, file_len))
739251881Speter    lines = 0;
740251881Speter
741251881Speter  /* Save the final suffix information in the original file_info */
742251881Speter  for (i = 0; i < file_len; i++)
743251881Speter    {
744251881Speter      file[i].suffix_start_chunk = file_for_suffix[i].chunk;
745251881Speter      file[i].suffix_offset_in_chunk =
746251881Speter        file_for_suffix[i].curp - file_for_suffix[i].buffer;
747251881Speter    }
748251881Speter
749251881Speter  *suffix_lines = lines;
750251881Speter
751251881Speter  return SVN_NO_ERROR;
752251881Speter}
753251881Speter
754251881Speter
755251881Speter/* Let FILE stand for the array of file_info struct elements of BATON->files
756251881Speter * that are indexed by the elements of the DATASOURCE array.
757251881Speter * BATON's type is (svn_diff__file_baton_t *).
758251881Speter *
759251881Speter * For each file in the FILE array, open the file at FILE.path; initialize
760251881Speter * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
761251881Speter * buffer and read the first chunk.  Then find the prefix and suffix lines
762251881Speter * which are identical between all the files.  Return the number of identical
763251881Speter * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
764251881Speter * SUFFIX_LINES.
765251881Speter *
766251881Speter * Finding the identical prefix and suffix allows us to exclude those from the
767251881Speter * rest of the diff algorithm, which increases performance by reducing the
768251881Speter * problem space.
769251881Speter *
770251881Speter * Implements svn_diff_fns2_t::datasources_open. */
771251881Speterstatic svn_error_t *
772251881Speterdatasources_open(void *baton,
773251881Speter                 apr_off_t *prefix_lines,
774251881Speter                 apr_off_t *suffix_lines,
775251881Speter                 const svn_diff_datasource_e *datasources,
776251881Speter                 apr_size_t datasources_len)
777251881Speter{
778251881Speter  svn_diff__file_baton_t *file_baton = baton;
779251881Speter  struct file_info files[4];
780251881Speter  apr_finfo_t finfo[4];
781251881Speter  apr_off_t length[4];
782251881Speter#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
783251881Speter  svn_boolean_t reached_one_eof;
784251881Speter#endif
785251881Speter  apr_size_t i;
786251881Speter
787251881Speter  /* Make sure prefix_lines and suffix_lines are set correctly, even if we
788251881Speter   * exit early because one of the files is empty. */
789251881Speter  *prefix_lines = 0;
790251881Speter  *suffix_lines = 0;
791251881Speter
792251881Speter  /* Open datasources and read first chunk */
793251881Speter  for (i = 0; i < datasources_len; i++)
794251881Speter    {
795251881Speter      struct file_info *file
796251881Speter          = &file_baton->files[datasource_to_index(datasources[i])];
797251881Speter      SVN_ERR(svn_io_file_open(&file->file, file->path,
798251881Speter                               APR_READ, APR_OS_DEFAULT, file_baton->pool));
799251881Speter      SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
800251881Speter                                   file->file, file_baton->pool));
801251881Speter      file->size = finfo[i].size;
802251881Speter      length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
803251881Speter      file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
804289180Speter      SVN_ERR(read_chunk(file->file, file->buffer,
805251881Speter                         length[i], 0, file_baton->pool));
806251881Speter      file->endp = file->buffer + length[i];
807251881Speter      file->curp = file->buffer;
808251881Speter      /* Set suffix_start_chunk to a guard value, so if suffix scanning is
809251881Speter       * skipped because one of the files is empty, or because of
810251881Speter       * reached_one_eof, we can still easily check for the suffix during
811251881Speter       * token reading (datasource_get_next_token). */
812251881Speter      file->suffix_start_chunk = -1;
813251881Speter
814251881Speter      files[i] = *file;
815251881Speter    }
816251881Speter
817251881Speter  for (i = 0; i < datasources_len; i++)
818251881Speter    if (length[i] == 0)
819251881Speter      /* There will not be any identical prefix/suffix, so we're done. */
820251881Speter      return SVN_NO_ERROR;
821251881Speter
822251881Speter#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
823251881Speter
824251881Speter  SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
825251881Speter                                files, datasources_len, file_baton->pool));
826251881Speter
827251881Speter  if (!reached_one_eof)
828251881Speter    /* No file consisted totally of identical prefix,
829251881Speter     * so there may be some identical suffix.  */
830251881Speter    SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
831251881Speter                                  file_baton->pool));
832251881Speter
833251881Speter#endif
834251881Speter
835251881Speter  /* Copy local results back to baton. */
836251881Speter  for (i = 0; i < datasources_len; i++)
837251881Speter    file_baton->files[datasource_to_index(datasources[i])] = files[i];
838251881Speter
839251881Speter  return SVN_NO_ERROR;
840251881Speter}
841251881Speter
842251881Speter
843251881Speter/* Implements svn_diff_fns2_t::datasource_close */
844251881Speterstatic svn_error_t *
845251881Speterdatasource_close(void *baton, svn_diff_datasource_e datasource)
846251881Speter{
847251881Speter  /* Do nothing.  The compare_token function needs previous datasources
848251881Speter   * to stay available until all datasources are processed.
849251881Speter   */
850251881Speter
851251881Speter  return SVN_NO_ERROR;
852251881Speter}
853251881Speter
854251881Speter/* Implements svn_diff_fns2_t::datasource_get_next_token */
855251881Speterstatic svn_error_t *
856251881Speterdatasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
857251881Speter                          svn_diff_datasource_e datasource)
858251881Speter{
859251881Speter  svn_diff__file_baton_t *file_baton = baton;
860251881Speter  svn_diff__file_token_t *file_token;
861251881Speter  struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
862251881Speter  char *endp;
863251881Speter  char *curp;
864251881Speter  char *eol;
865251881Speter  apr_off_t last_chunk;
866251881Speter  apr_off_t length;
867251881Speter  apr_uint32_t h = 0;
868251881Speter  /* Did the last chunk end in a CR character? */
869251881Speter  svn_boolean_t had_cr = FALSE;
870251881Speter
871251881Speter  *token = NULL;
872251881Speter
873251881Speter  curp = file->curp;
874251881Speter  endp = file->endp;
875251881Speter
876251881Speter  last_chunk = offset_to_chunk(file->size);
877251881Speter
878251881Speter  /* Are we already at the end of a chunk? */
879251881Speter  if (curp == endp)
880251881Speter    {
881251881Speter      /* Are we at EOF */
882251881Speter      if (last_chunk == file->chunk)
883251881Speter        return SVN_NO_ERROR; /* EOF */
884251881Speter
885251881Speter      /* Or right before an identical suffix in the next chunk? */
886251881Speter      if (file->chunk + 1 == file->suffix_start_chunk
887251881Speter          && file->suffix_offset_in_chunk == 0)
888251881Speter        return SVN_NO_ERROR;
889251881Speter    }
890251881Speter
891251881Speter  /* Stop when we encounter the identical suffix. If suffix scanning was not
892251881Speter   * performed, suffix_start_chunk will be -1, so this condition will never
893251881Speter   * be true. */
894251881Speter  if (file->chunk == file->suffix_start_chunk
895251881Speter      && (curp - file->buffer) == file->suffix_offset_in_chunk)
896251881Speter    return SVN_NO_ERROR;
897251881Speter
898251881Speter  /* Allocate a new token, or fetch one from the "reusable tokens" list. */
899251881Speter  file_token = file_baton->tokens;
900251881Speter  if (file_token)
901251881Speter    {
902251881Speter      file_baton->tokens = file_token->next;
903251881Speter    }
904251881Speter  else
905251881Speter    {
906251881Speter      file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
907251881Speter    }
908251881Speter
909251881Speter  file_token->datasource = datasource;
910251881Speter  file_token->offset = chunk_to_offset(file->chunk)
911251881Speter                       + (curp - file->buffer);
912251881Speter  file_token->norm_offset = file_token->offset;
913251881Speter  file_token->raw_length = 0;
914251881Speter  file_token->length = 0;
915251881Speter
916251881Speter  while (1)
917251881Speter    {
918251881Speter      eol = svn_eol__find_eol_start(curp, endp - curp);
919251881Speter      if (eol)
920251881Speter        {
921251881Speter          had_cr = (*eol == '\r');
922251881Speter          eol++;
923251881Speter          /* If we have the whole eol sequence in the chunk... */
924251881Speter          if (!(had_cr && eol == endp))
925251881Speter            {
926251881Speter              /* Also skip past the '\n' in an '\r\n' sequence. */
927251881Speter              if (had_cr && *eol == '\n')
928251881Speter                eol++;
929251881Speter              break;
930251881Speter            }
931251881Speter        }
932251881Speter
933251881Speter      if (file->chunk == last_chunk)
934251881Speter        {
935251881Speter          eol = endp;
936251881Speter          break;
937251881Speter        }
938251881Speter
939251881Speter      length = endp - curp;
940251881Speter      file_token->raw_length += length;
941251881Speter      {
942251881Speter        char *c = curp;
943251881Speter
944251881Speter        svn_diff__normalize_buffer(&c, &length,
945251881Speter                                   &file->normalize_state,
946251881Speter                                   curp, file_baton->options);
947251881Speter        if (file_token->length == 0)
948251881Speter          {
949251881Speter            /* When we are reading the first part of the token, move the
950251881Speter               normalized offset past leading ignored characters, if any. */
951251881Speter            file_token->norm_offset += (c - curp);
952251881Speter          }
953251881Speter        file_token->length += length;
954251881Speter        h = svn__adler32(h, c, length);
955251881Speter      }
956251881Speter
957251881Speter      curp = endp = file->buffer;
958251881Speter      file->chunk++;
959251881Speter      length = file->chunk == last_chunk ?
960251881Speter        offset_in_chunk(file->size) : CHUNK_SIZE;
961251881Speter      endp += length;
962251881Speter      file->endp = endp;
963251881Speter
964251881Speter      /* Issue #4283: Normally we should have checked for reaching the skipped
965251881Speter         suffix here, but because we assume that a suffix always starts on a
966251881Speter         line and token boundary we rely on catching the suffix earlier in this
967251881Speter         function.
968251881Speter
969251881Speter         When changing things here, make sure the whitespace settings are
970289180Speter         applied, or we might not reach the exact suffix boundary as token
971251881Speter         boundary. */
972289180Speter      SVN_ERR(read_chunk(file->file,
973251881Speter                         curp, length,
974251881Speter                         chunk_to_offset(file->chunk),
975251881Speter                         file_baton->pool));
976251881Speter
977251881Speter      /* If the last chunk ended in a CR, we're done. */
978251881Speter      if (had_cr)
979251881Speter        {
980251881Speter          eol = curp;
981251881Speter          if (*curp == '\n')
982251881Speter            ++eol;
983251881Speter          break;
984251881Speter        }
985251881Speter    }
986251881Speter
987251881Speter  length = eol - curp;
988251881Speter  file_token->raw_length += length;
989251881Speter  file->curp = eol;
990251881Speter
991251881Speter  /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
992251881Speter   * with a spurious empty token.  Avoid returning it.
993251881Speter   * Note that we use the unnormalized length; we don't want a line containing
994251881Speter   * only spaces (and no trailing newline) to appear like a non-existent
995251881Speter   * line. */
996251881Speter  if (file_token->raw_length > 0)
997251881Speter    {
998251881Speter      char *c = curp;
999251881Speter      svn_diff__normalize_buffer(&c, &length,
1000251881Speter                                 &file->normalize_state,
1001251881Speter                                 curp, file_baton->options);
1002251881Speter      if (file_token->length == 0)
1003251881Speter        {
1004251881Speter          /* When we are reading the first part of the token, move the
1005251881Speter             normalized offset past leading ignored characters, if any. */
1006251881Speter          file_token->norm_offset += (c - curp);
1007251881Speter        }
1008251881Speter
1009251881Speter      file_token->length += length;
1010251881Speter
1011251881Speter      *hash = svn__adler32(h, c, length);
1012251881Speter      *token = file_token;
1013251881Speter    }
1014251881Speter
1015251881Speter  return SVN_NO_ERROR;
1016251881Speter}
1017251881Speter
1018251881Speter#define COMPARE_CHUNK_SIZE 4096
1019251881Speter
1020251881Speter/* Implements svn_diff_fns2_t::token_compare */
1021251881Speterstatic svn_error_t *
1022251881Spetertoken_compare(void *baton, void *token1, void *token2, int *compare)
1023251881Speter{
1024251881Speter  svn_diff__file_baton_t *file_baton = baton;
1025251881Speter  svn_diff__file_token_t *file_token[2];
1026251881Speter  char buffer[2][COMPARE_CHUNK_SIZE];
1027251881Speter  char *bufp[2];
1028251881Speter  apr_off_t offset[2];
1029251881Speter  struct file_info *file[2];
1030251881Speter  apr_off_t length[2];
1031251881Speter  apr_off_t total_length;
1032251881Speter  /* How much is left to read of each token from the file. */
1033251881Speter  apr_off_t raw_length[2];
1034251881Speter  int i;
1035251881Speter  svn_diff__normalize_state_t state[2];
1036251881Speter
1037251881Speter  file_token[0] = token1;
1038251881Speter  file_token[1] = token2;
1039251881Speter  if (file_token[0]->length < file_token[1]->length)
1040251881Speter    {
1041251881Speter      *compare = -1;
1042251881Speter      return SVN_NO_ERROR;
1043251881Speter    }
1044251881Speter
1045251881Speter  if (file_token[0]->length > file_token[1]->length)
1046251881Speter    {
1047251881Speter      *compare = 1;
1048251881Speter      return SVN_NO_ERROR;
1049251881Speter    }
1050251881Speter
1051251881Speter  total_length = file_token[0]->length;
1052251881Speter  if (total_length == 0)
1053251881Speter    {
1054251881Speter      *compare = 0;
1055251881Speter      return SVN_NO_ERROR;
1056251881Speter    }
1057251881Speter
1058251881Speter  for (i = 0; i < 2; ++i)
1059251881Speter    {
1060251881Speter      int idx = datasource_to_index(file_token[i]->datasource);
1061251881Speter
1062251881Speter      file[i] = &file_baton->files[idx];
1063251881Speter      offset[i] = file_token[i]->norm_offset;
1064251881Speter      state[i] = svn_diff__normalize_state_normal;
1065251881Speter
1066251881Speter      if (offset_to_chunk(offset[i]) == file[i]->chunk)
1067251881Speter        {
1068251881Speter          /* If the start of the token is in memory, the entire token is
1069251881Speter           * in memory.
1070251881Speter           */
1071251881Speter          bufp[i] = file[i]->buffer;
1072251881Speter          bufp[i] += offset_in_chunk(offset[i]);
1073251881Speter
1074251881Speter          length[i] = total_length;
1075251881Speter          raw_length[i] = 0;
1076251881Speter        }
1077251881Speter      else
1078251881Speter        {
1079251881Speter          apr_off_t skipped;
1080251881Speter
1081251881Speter          length[i] = 0;
1082251881Speter
1083251881Speter          /* When we skipped the first part of the token via the whitespace
1084251881Speter             normalization we must reduce the raw length of the token */
1085251881Speter          skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1086251881Speter
1087251881Speter          raw_length[i] = file_token[i]->raw_length - skipped;
1088251881Speter        }
1089251881Speter    }
1090251881Speter
1091251881Speter  do
1092251881Speter    {
1093251881Speter      apr_off_t len;
1094251881Speter      for (i = 0; i < 2; i++)
1095251881Speter        {
1096251881Speter          if (length[i] == 0)
1097251881Speter            {
1098251881Speter              /* Error if raw_length is 0, that's an unexpected change
1099251881Speter               * of the file that can happen when ingoring whitespace
1100251881Speter               * and that can lead to an infinite loop. */
1101251881Speter              if (raw_length[i] == 0)
1102251881Speter                return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1103251881Speter                                         NULL,
1104251881Speter                                         _("The file '%s' changed unexpectedly"
1105251881Speter                                           " during diff"),
1106251881Speter                                         file[i]->path);
1107251881Speter
1108251881Speter              /* Read a chunk from disk into a buffer */
1109251881Speter              bufp[i] = buffer[i];
1110251881Speter              length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1111251881Speter                COMPARE_CHUNK_SIZE : raw_length[i];
1112251881Speter
1113251881Speter              SVN_ERR(read_chunk(file[i]->file,
1114251881Speter                                 bufp[i], length[i], offset[i],
1115251881Speter                                 file_baton->pool));
1116251881Speter              offset[i] += length[i];
1117251881Speter              raw_length[i] -= length[i];
1118251881Speter              /* bufp[i] gets reset to buffer[i] before reading each chunk,
1119251881Speter                 so, overwriting it isn't a problem */
1120251881Speter              svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1121251881Speter                                         bufp[i], file_baton->options);
1122251881Speter
1123251881Speter              /* assert(length[i] == file_token[i]->length); */
1124251881Speter            }
1125251881Speter        }
1126251881Speter
1127251881Speter      len = length[0] > length[1] ? length[1] : length[0];
1128251881Speter
1129251881Speter      /* Compare two chunks (that could be entire tokens if they both reside
1130251881Speter       * in memory).
1131251881Speter       */
1132251881Speter      *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1133251881Speter      if (*compare != 0)
1134251881Speter        return SVN_NO_ERROR;
1135251881Speter
1136251881Speter      total_length -= len;
1137251881Speter      length[0] -= len;
1138251881Speter      length[1] -= len;
1139251881Speter      bufp[0] += len;
1140251881Speter      bufp[1] += len;
1141251881Speter    }
1142251881Speter  while(total_length > 0);
1143251881Speter
1144251881Speter  *compare = 0;
1145251881Speter  return SVN_NO_ERROR;
1146251881Speter}
1147251881Speter
1148251881Speter
1149251881Speter/* Implements svn_diff_fns2_t::token_discard */
1150251881Speterstatic void
1151251881Spetertoken_discard(void *baton, void *token)
1152251881Speter{
1153251881Speter  svn_diff__file_baton_t *file_baton = baton;
1154251881Speter  svn_diff__file_token_t *file_token = token;
1155251881Speter
1156251881Speter  /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1157251881Speter  file_token->next = file_baton->tokens;
1158251881Speter  file_baton->tokens = file_token;
1159251881Speter}
1160251881Speter
1161251881Speter
1162251881Speter/* Implements svn_diff_fns2_t::token_discard_all */
1163251881Speterstatic void
1164251881Spetertoken_discard_all(void *baton)
1165251881Speter{
1166251881Speter  svn_diff__file_baton_t *file_baton = baton;
1167251881Speter
1168251881Speter  /* Discard all memory in use by the tokens, and close all open files. */
1169251881Speter  svn_pool_clear(file_baton->pool);
1170251881Speter}
1171251881Speter
1172251881Speter
1173251881Speterstatic const svn_diff_fns2_t svn_diff__file_vtable =
1174251881Speter{
1175251881Speter  datasources_open,
1176251881Speter  datasource_close,
1177251881Speter  datasource_get_next_token,
1178251881Speter  token_compare,
1179251881Speter  token_discard,
1180251881Speter  token_discard_all
1181251881Speter};
1182251881Speter
1183251881Speter/* Id for the --ignore-eol-style option, which doesn't have a short name. */
1184251881Speter#define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1185251881Speter
1186251881Speter/* Options supported by svn_diff_file_options_parse(). */
1187251881Speterstatic const apr_getopt_option_t diff_options[] =
1188251881Speter{
1189251881Speter  { "ignore-space-change", 'b', 0, NULL },
1190251881Speter  { "ignore-all-space", 'w', 0, NULL },
1191251881Speter  { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1192251881Speter  { "show-c-function", 'p', 0, NULL },
1193251881Speter  /* ### For compatibility; we don't support the argument to -u, because
1194251881Speter   * ### we don't have optional argument support. */
1195251881Speter  { "unified", 'u', 0, NULL },
1196289180Speter  { "context", 'U', 1, NULL },
1197251881Speter  { NULL, 0, 0, NULL }
1198251881Speter};
1199251881Speter
1200251881Spetersvn_diff_file_options_t *
1201251881Spetersvn_diff_file_options_create(apr_pool_t *pool)
1202251881Speter{
1203289180Speter  svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts));
1204289180Speter
1205289180Speter  opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1206289180Speter
1207289180Speter  return opts;
1208251881Speter}
1209251881Speter
1210251881Speter/* A baton for use with opt_parsing_error_func(). */
1211251881Speterstruct opt_parsing_error_baton_t
1212251881Speter{
1213251881Speter  svn_error_t *err;
1214251881Speter  apr_pool_t *pool;
1215251881Speter};
1216251881Speter
1217251881Speter/* Store an error message from apr_getopt_long().  Set BATON->err to a new
1218251881Speter * error with a message generated from FMT and the remaining arguments.
1219251881Speter * Implements apr_getopt_err_fn_t. */
1220251881Speterstatic void
1221251881Speteropt_parsing_error_func(void *baton,
1222251881Speter                       const char *fmt, ...)
1223251881Speter{
1224251881Speter  struct opt_parsing_error_baton_t *b = baton;
1225251881Speter  const char *message;
1226251881Speter  va_list ap;
1227251881Speter
1228251881Speter  va_start(ap, fmt);
1229251881Speter  message = apr_pvsprintf(b->pool, fmt, ap);
1230251881Speter  va_end(ap);
1231251881Speter
1232251881Speter  /* Skip leading ": " (if present, which it always is in known cases). */
1233251881Speter  if (strncmp(message, ": ", 2) == 0)
1234251881Speter    message += 2;
1235251881Speter
1236251881Speter  b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1237251881Speter}
1238251881Speter
1239251881Spetersvn_error_t *
1240251881Spetersvn_diff_file_options_parse(svn_diff_file_options_t *options,
1241251881Speter                            const apr_array_header_t *args,
1242251881Speter                            apr_pool_t *pool)
1243251881Speter{
1244251881Speter  apr_getopt_t *os;
1245251881Speter  struct opt_parsing_error_baton_t opt_parsing_error_baton;
1246251881Speter  /* Make room for each option (starting at index 1) plus trailing NULL. */
1247251881Speter  const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2));
1248251881Speter
1249251881Speter  opt_parsing_error_baton.err = NULL;
1250251881Speter  opt_parsing_error_baton.pool = pool;
1251251881Speter
1252251881Speter  argv[0] = "";
1253289180Speter  memcpy(argv + 1, args->elts, sizeof(char*) * args->nelts);
1254251881Speter  argv[args->nelts + 1] = NULL;
1255251881Speter
1256251881Speter  apr_getopt_init(&os, pool, args->nelts + 1, argv);
1257251881Speter
1258251881Speter  /* Capture any error message from apr_getopt_long().  This will typically
1259251881Speter   * say which option is wrong, which we would not otherwise know. */
1260251881Speter  os->errfn = opt_parsing_error_func;
1261251881Speter  os->errarg = &opt_parsing_error_baton;
1262251881Speter
1263251881Speter  while (1)
1264251881Speter    {
1265251881Speter      const char *opt_arg;
1266251881Speter      int opt_id;
1267251881Speter      apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1268251881Speter
1269251881Speter      if (APR_STATUS_IS_EOF(err))
1270251881Speter        break;
1271251881Speter      if (err)
1272251881Speter        /* Wrap apr_getopt_long()'s error message.  Its doc string implies
1273251881Speter         * it always will produce one, but never mind if it doesn't.  Avoid
1274251881Speter         * using the message associated with the return code ERR, because
1275251881Speter         * it refers to the "command line" which may be misleading here. */
1276251881Speter        return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1277251881Speter                                opt_parsing_error_baton.err,
1278251881Speter                                _("Error in options to internal diff"));
1279251881Speter
1280251881Speter      switch (opt_id)
1281251881Speter        {
1282251881Speter        case 'b':
1283251881Speter          /* -w takes precedence over -b. */
1284251881Speter          if (! options->ignore_space)
1285251881Speter            options->ignore_space = svn_diff_file_ignore_space_change;
1286251881Speter          break;
1287251881Speter        case 'w':
1288251881Speter          options->ignore_space = svn_diff_file_ignore_space_all;
1289251881Speter          break;
1290251881Speter        case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1291251881Speter          options->ignore_eol_style = TRUE;
1292251881Speter          break;
1293251881Speter        case 'p':
1294251881Speter          options->show_c_function = TRUE;
1295251881Speter          break;
1296289180Speter        case 'U':
1297289180Speter          SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg));
1298289180Speter          break;
1299251881Speter        default:
1300251881Speter          break;
1301251881Speter        }
1302251881Speter    }
1303251881Speter
1304251881Speter  /* Check for spurious arguments. */
1305251881Speter  if (os->ind < os->argc)
1306251881Speter    return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1307251881Speter                             _("Invalid argument '%s' in diff options"),
1308251881Speter                             os->argv[os->ind]);
1309251881Speter
1310251881Speter  return SVN_NO_ERROR;
1311251881Speter}
1312251881Speter
1313251881Spetersvn_error_t *
1314251881Spetersvn_diff_file_diff_2(svn_diff_t **diff,
1315251881Speter                     const char *original,
1316251881Speter                     const char *modified,
1317251881Speter                     const svn_diff_file_options_t *options,
1318251881Speter                     apr_pool_t *pool)
1319251881Speter{
1320251881Speter  svn_diff__file_baton_t baton = { 0 };
1321251881Speter
1322251881Speter  baton.options = options;
1323251881Speter  baton.files[0].path = original;
1324251881Speter  baton.files[1].path = modified;
1325251881Speter  baton.pool = svn_pool_create(pool);
1326251881Speter
1327251881Speter  SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1328251881Speter
1329251881Speter  svn_pool_destroy(baton.pool);
1330251881Speter  return SVN_NO_ERROR;
1331251881Speter}
1332251881Speter
1333251881Spetersvn_error_t *
1334251881Spetersvn_diff_file_diff3_2(svn_diff_t **diff,
1335251881Speter                      const char *original,
1336251881Speter                      const char *modified,
1337251881Speter                      const char *latest,
1338251881Speter                      const svn_diff_file_options_t *options,
1339251881Speter                      apr_pool_t *pool)
1340251881Speter{
1341251881Speter  svn_diff__file_baton_t baton = { 0 };
1342251881Speter
1343251881Speter  baton.options = options;
1344251881Speter  baton.files[0].path = original;
1345251881Speter  baton.files[1].path = modified;
1346251881Speter  baton.files[2].path = latest;
1347251881Speter  baton.pool = svn_pool_create(pool);
1348251881Speter
1349251881Speter  SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1350251881Speter
1351251881Speter  svn_pool_destroy(baton.pool);
1352251881Speter  return SVN_NO_ERROR;
1353251881Speter}
1354251881Speter
1355251881Spetersvn_error_t *
1356251881Spetersvn_diff_file_diff4_2(svn_diff_t **diff,
1357251881Speter                      const char *original,
1358251881Speter                      const char *modified,
1359251881Speter                      const char *latest,
1360251881Speter                      const char *ancestor,
1361251881Speter                      const svn_diff_file_options_t *options,
1362251881Speter                      apr_pool_t *pool)
1363251881Speter{
1364251881Speter  svn_diff__file_baton_t baton = { 0 };
1365251881Speter
1366251881Speter  baton.options = options;
1367251881Speter  baton.files[0].path = original;
1368251881Speter  baton.files[1].path = modified;
1369251881Speter  baton.files[2].path = latest;
1370251881Speter  baton.files[3].path = ancestor;
1371251881Speter  baton.pool = svn_pool_create(pool);
1372251881Speter
1373251881Speter  SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1374251881Speter
1375251881Speter  svn_pool_destroy(baton.pool);
1376251881Speter  return SVN_NO_ERROR;
1377251881Speter}
1378251881Speter
1379251881Speter
1380251881Speter/** Display unified context diffs **/
1381251881Speter
1382251881Speter/* Maximum length of the extra context to show when show_c_function is set.
1383251881Speter * GNU diff uses 40, let's be brave and use 50 instead. */
1384251881Speter#define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1385251881Spetertypedef struct svn_diff__file_output_baton_t
1386251881Speter{
1387251881Speter  svn_stream_t *output_stream;
1388251881Speter  const char *header_encoding;
1389251881Speter
1390251881Speter  /* Cached markers, in header_encoding. */
1391251881Speter  const char *context_str;
1392251881Speter  const char *delete_str;
1393251881Speter  const char *insert_str;
1394251881Speter
1395251881Speter  const char *path[2];
1396251881Speter  apr_file_t *file[2];
1397251881Speter
1398251881Speter  apr_off_t   current_line[2];
1399251881Speter
1400251881Speter  char        buffer[2][4096];
1401251881Speter  apr_size_t  length[2];
1402251881Speter  char       *curp[2];
1403251881Speter
1404251881Speter  apr_off_t   hunk_start[2];
1405251881Speter  apr_off_t   hunk_length[2];
1406251881Speter  svn_stringbuf_t *hunk;
1407251881Speter
1408251881Speter  /* Should we emit C functions in the unified diff header */
1409251881Speter  svn_boolean_t show_c_function;
1410251881Speter  /* Extra strings to skip over if we match. */
1411251881Speter  apr_array_header_t *extra_skip_match;
1412251881Speter  /* "Context" to append to the @@ line when the show_c_function option
1413251881Speter   * is set. */
1414251881Speter  svn_stringbuf_t *extra_context;
1415251881Speter  /* Extra context for the current hunk. */
1416251881Speter  char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1417251881Speter
1418289180Speter  int context_size;
1419289180Speter
1420251881Speter  apr_pool_t *pool;
1421251881Speter} svn_diff__file_output_baton_t;
1422251881Speter
1423251881Spetertypedef enum svn_diff__file_output_unified_type_e
1424251881Speter{
1425251881Speter  svn_diff__file_output_unified_skip,
1426251881Speter  svn_diff__file_output_unified_context,
1427251881Speter  svn_diff__file_output_unified_delete,
1428251881Speter  svn_diff__file_output_unified_insert
1429251881Speter} svn_diff__file_output_unified_type_e;
1430251881Speter
1431251881Speter
1432251881Speterstatic svn_error_t *
1433251881Speteroutput_unified_line(svn_diff__file_output_baton_t *baton,
1434251881Speter                    svn_diff__file_output_unified_type_e type, int idx)
1435251881Speter{
1436251881Speter  char *curp;
1437251881Speter  char *eol;
1438251881Speter  apr_size_t length;
1439251881Speter  svn_error_t *err;
1440251881Speter  svn_boolean_t bytes_processed = FALSE;
1441251881Speter  svn_boolean_t had_cr = FALSE;
1442251881Speter  /* Are we collecting extra context? */
1443251881Speter  svn_boolean_t collect_extra = FALSE;
1444251881Speter
1445251881Speter  length = baton->length[idx];
1446251881Speter  curp = baton->curp[idx];
1447251881Speter
1448251881Speter  /* Lazily update the current line even if we're at EOF.
1449251881Speter   * This way we fake output of context at EOF
1450251881Speter   */
1451251881Speter  baton->current_line[idx]++;
1452251881Speter
1453251881Speter  if (length == 0 && apr_file_eof(baton->file[idx]))
1454251881Speter    {
1455251881Speter      return SVN_NO_ERROR;
1456251881Speter    }
1457251881Speter
1458251881Speter  do
1459251881Speter    {
1460251881Speter      if (length > 0)
1461251881Speter        {
1462251881Speter          if (!bytes_processed)
1463251881Speter            {
1464251881Speter              switch (type)
1465251881Speter                {
1466251881Speter                case svn_diff__file_output_unified_context:
1467251881Speter                  svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1468251881Speter                  baton->hunk_length[0]++;
1469251881Speter                  baton->hunk_length[1]++;
1470251881Speter                  break;
1471251881Speter                case svn_diff__file_output_unified_delete:
1472251881Speter                  svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1473251881Speter                  baton->hunk_length[0]++;
1474251881Speter                  break;
1475251881Speter                case svn_diff__file_output_unified_insert:
1476251881Speter                  svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1477251881Speter                  baton->hunk_length[1]++;
1478251881Speter                  break;
1479251881Speter                default:
1480251881Speter                  break;
1481251881Speter                }
1482251881Speter
1483251881Speter              if (baton->show_c_function
1484251881Speter                  && (type == svn_diff__file_output_unified_skip
1485251881Speter                      || type == svn_diff__file_output_unified_context)
1486251881Speter                  && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1487251881Speter                  && !svn_cstring_match_glob_list(curp,
1488251881Speter                                                  baton->extra_skip_match))
1489251881Speter                {
1490251881Speter                  svn_stringbuf_setempty(baton->extra_context);
1491251881Speter                  collect_extra = TRUE;
1492251881Speter                }
1493251881Speter            }
1494251881Speter
1495251881Speter          eol = svn_eol__find_eol_start(curp, length);
1496251881Speter
1497251881Speter          if (eol != NULL)
1498251881Speter            {
1499251881Speter              apr_size_t len;
1500251881Speter
1501251881Speter              had_cr = (*eol == '\r');
1502251881Speter              eol++;
1503251881Speter              len = (apr_size_t)(eol - curp);
1504251881Speter
1505251881Speter              if (! had_cr || len < length)
1506251881Speter                {
1507251881Speter                  if (had_cr && *eol == '\n')
1508251881Speter                    {
1509251881Speter                      ++eol;
1510251881Speter                      ++len;
1511251881Speter                    }
1512251881Speter
1513251881Speter                  length -= len;
1514251881Speter
1515251881Speter                  if (type != svn_diff__file_output_unified_skip)
1516251881Speter                    {
1517251881Speter                      svn_stringbuf_appendbytes(baton->hunk, curp, len);
1518251881Speter                    }
1519251881Speter                  if (collect_extra)
1520251881Speter                    {
1521251881Speter                      svn_stringbuf_appendbytes(baton->extra_context,
1522251881Speter                                                curp, len);
1523251881Speter                    }
1524251881Speter
1525251881Speter                  baton->curp[idx] = eol;
1526251881Speter                  baton->length[idx] = length;
1527251881Speter
1528251881Speter                  err = SVN_NO_ERROR;
1529251881Speter
1530251881Speter                  break;
1531251881Speter                }
1532251881Speter            }
1533251881Speter
1534251881Speter          if (type != svn_diff__file_output_unified_skip)
1535251881Speter            {
1536251881Speter              svn_stringbuf_appendbytes(baton->hunk, curp, length);
1537251881Speter            }
1538251881Speter
1539251881Speter          if (collect_extra)
1540251881Speter            {
1541251881Speter              svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1542251881Speter            }
1543251881Speter
1544251881Speter          bytes_processed = TRUE;
1545251881Speter        }
1546251881Speter
1547251881Speter      curp = baton->buffer[idx];
1548251881Speter      length = sizeof(baton->buffer[idx]);
1549251881Speter
1550251881Speter      err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1551251881Speter
1552251881Speter      /* If the last chunk ended with a CR, we look for an LF at the start
1553251881Speter         of this chunk. */
1554251881Speter      if (had_cr)
1555251881Speter        {
1556251881Speter          if (! err && length > 0 && *curp == '\n')
1557251881Speter            {
1558251881Speter              if (type != svn_diff__file_output_unified_skip)
1559251881Speter                {
1560251881Speter                  svn_stringbuf_appendbyte(baton->hunk, *curp);
1561251881Speter                }
1562251881Speter              /* We don't append the LF to extra_context, since it would
1563251881Speter               * just be stripped anyway. */
1564251881Speter              ++curp;
1565251881Speter              --length;
1566251881Speter            }
1567251881Speter
1568251881Speter          baton->curp[idx] = curp;
1569251881Speter          baton->length[idx] = length;
1570251881Speter
1571251881Speter          break;
1572251881Speter        }
1573251881Speter    }
1574251881Speter  while (! err);
1575251881Speter
1576251881Speter  if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1577251881Speter    return err;
1578251881Speter
1579251881Speter  if (err && APR_STATUS_IS_EOF(err->apr_err))
1580251881Speter    {
1581251881Speter      svn_error_clear(err);
1582251881Speter      /* Special case if we reach the end of file AND the last line is in the
1583251881Speter         changed range AND the file doesn't end with a newline */
1584251881Speter      if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1585251881Speter          && ! had_cr)
1586251881Speter        {
1587251881Speter          SVN_ERR(svn_diff__unified_append_no_newline_msg(
1588251881Speter                    baton->hunk, baton->header_encoding, baton->pool));
1589251881Speter        }
1590251881Speter
1591251881Speter      baton->length[idx] = 0;
1592251881Speter    }
1593251881Speter
1594251881Speter  return SVN_NO_ERROR;
1595251881Speter}
1596251881Speter
1597251881Speterstatic APR_INLINE svn_error_t *
1598251881Speteroutput_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1599251881Speter                          int source,
1600251881Speter                          svn_diff__file_output_unified_type_e type,
1601251881Speter                          apr_off_t until)
1602251881Speter{
1603251881Speter  while (output_baton->current_line[source] < until)
1604251881Speter    {
1605251881Speter      SVN_ERR(output_unified_line(output_baton, type, source));
1606251881Speter    }
1607251881Speter  return SVN_NO_ERROR;
1608251881Speter}
1609251881Speter
1610251881Speterstatic svn_error_t *
1611251881Speteroutput_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1612251881Speter{
1613251881Speter  apr_off_t target_line;
1614251881Speter  apr_size_t hunk_len;
1615251881Speter  apr_off_t old_start;
1616251881Speter  apr_off_t new_start;
1617251881Speter
1618251881Speter  if (svn_stringbuf_isempty(baton->hunk))
1619251881Speter    {
1620251881Speter      /* Nothing to flush */
1621251881Speter      return SVN_NO_ERROR;
1622251881Speter    }
1623251881Speter
1624251881Speter  target_line = baton->hunk_start[0] + baton->hunk_length[0]
1625289180Speter                + baton->context_size;
1626251881Speter
1627251881Speter  /* Add trailing context to the hunk */
1628251881Speter  SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1629251881Speter                                    svn_diff__file_output_unified_context,
1630251881Speter                                    target_line));
1631251881Speter
1632251881Speter  old_start = baton->hunk_start[0];
1633251881Speter  new_start = baton->hunk_start[1];
1634251881Speter
1635251881Speter  /* If the file is non-empty, convert the line indexes from
1636251881Speter     zero based to one based */
1637251881Speter  if (baton->hunk_length[0])
1638251881Speter    old_start++;
1639251881Speter  if (baton->hunk_length[1])
1640251881Speter    new_start++;
1641251881Speter
1642251881Speter  /* Write the hunk header */
1643251881Speter  SVN_ERR(svn_diff__unified_write_hunk_header(
1644251881Speter            baton->output_stream, baton->header_encoding, "@@",
1645251881Speter            old_start, baton->hunk_length[0],
1646251881Speter            new_start, baton->hunk_length[1],
1647251881Speter            baton->hunk_extra_context,
1648251881Speter            baton->pool));
1649251881Speter
1650251881Speter  /* Output the hunk content */
1651251881Speter  hunk_len = baton->hunk->len;
1652251881Speter  SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1653251881Speter                           &hunk_len));
1654251881Speter
1655251881Speter  /* Prepare for the next hunk */
1656251881Speter  baton->hunk_length[0] = 0;
1657251881Speter  baton->hunk_length[1] = 0;
1658251881Speter  baton->hunk_start[0] = 0;
1659251881Speter  baton->hunk_start[1] = 0;
1660251881Speter  svn_stringbuf_setempty(baton->hunk);
1661251881Speter
1662251881Speter  return SVN_NO_ERROR;
1663251881Speter}
1664251881Speter
1665251881Speterstatic svn_error_t *
1666251881Speteroutput_unified_diff_modified(void *baton,
1667251881Speter  apr_off_t original_start, apr_off_t original_length,
1668251881Speter  apr_off_t modified_start, apr_off_t modified_length,
1669251881Speter  apr_off_t latest_start, apr_off_t latest_length)
1670251881Speter{
1671251881Speter  svn_diff__file_output_baton_t *output_baton = baton;
1672251881Speter  apr_off_t context_prefix_length;
1673251881Speter  apr_off_t prev_context_end;
1674251881Speter  svn_boolean_t init_hunk = FALSE;
1675251881Speter
1676289180Speter  if (original_start > output_baton->context_size)
1677289180Speter    context_prefix_length = output_baton->context_size;
1678251881Speter  else
1679251881Speter    context_prefix_length = original_start;
1680251881Speter
1681251881Speter  /* Calculate where the previous hunk will end if we would write it now
1682251881Speter     (including the necessary context at the end) */
1683251881Speter  if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1684251881Speter    {
1685251881Speter      prev_context_end = output_baton->hunk_start[0]
1686251881Speter                         + output_baton->hunk_length[0]
1687289180Speter                         + output_baton->context_size;
1688251881Speter    }
1689251881Speter  else
1690251881Speter    {
1691251881Speter      prev_context_end = -1;
1692251881Speter
1693251881Speter      if (output_baton->hunk_start[0] == 0
1694251881Speter          && (original_length > 0 || modified_length > 0))
1695251881Speter        init_hunk = TRUE;
1696251881Speter    }
1697251881Speter
1698251881Speter  /* If the changed range is far enough from the previous range, flush the current
1699251881Speter     hunk. */
1700251881Speter  {
1701251881Speter    apr_off_t new_hunk_start = (original_start - context_prefix_length);
1702251881Speter
1703251881Speter    if (output_baton->current_line[0] < new_hunk_start
1704251881Speter          && prev_context_end <= new_hunk_start)
1705251881Speter      {
1706251881Speter        SVN_ERR(output_unified_flush_hunk(output_baton));
1707251881Speter        init_hunk = TRUE;
1708251881Speter      }
1709251881Speter    else if (output_baton->hunk_length[0] > 0
1710251881Speter             || output_baton->hunk_length[1] > 0)
1711251881Speter      {
1712251881Speter        /* We extend the current hunk */
1713251881Speter
1714251881Speter
1715251881Speter        /* Original: Output the context preceding the changed range */
1716251881Speter        SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1717251881Speter                                          svn_diff__file_output_unified_context,
1718251881Speter                                          original_start));
1719251881Speter      }
1720251881Speter  }
1721251881Speter
1722251881Speter  /* Original: Skip lines until we are at the beginning of the context we want
1723251881Speter     to display */
1724251881Speter  SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1725251881Speter                                    svn_diff__file_output_unified_skip,
1726251881Speter                                    original_start - context_prefix_length));
1727251881Speter
1728251881Speter  /* Note that the above skip stores data for the show_c_function support below */
1729251881Speter
1730251881Speter  if (init_hunk)
1731251881Speter    {
1732251881Speter      SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1733251881Speter                     && output_baton->hunk_length[1] == 0);
1734251881Speter
1735251881Speter      output_baton->hunk_start[0] = original_start - context_prefix_length;
1736251881Speter      output_baton->hunk_start[1] = modified_start - context_prefix_length;
1737251881Speter    }
1738251881Speter
1739251881Speter  if (init_hunk && output_baton->show_c_function)
1740251881Speter    {
1741251881Speter      apr_size_t p;
1742251881Speter      const char *invalid_character;
1743251881Speter
1744251881Speter      /* Save the extra context for later use.
1745251881Speter       * Note that the last byte of the hunk_extra_context array is never
1746251881Speter       * touched after it is zero-initialized, so the array is always
1747251881Speter       * 0-terminated. */
1748251881Speter      strncpy(output_baton->hunk_extra_context,
1749251881Speter              output_baton->extra_context->data,
1750251881Speter              SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1751251881Speter      /* Trim whitespace at the end, most notably to get rid of any
1752251881Speter       * newline characters. */
1753251881Speter      p = strlen(output_baton->hunk_extra_context);
1754251881Speter      while (p > 0
1755251881Speter             && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1756251881Speter        {
1757251881Speter          output_baton->hunk_extra_context[--p] = '\0';
1758251881Speter        }
1759251881Speter      invalid_character =
1760251881Speter        svn_utf__last_valid(output_baton->hunk_extra_context,
1761251881Speter                            SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1762251881Speter      for (p = invalid_character - output_baton->hunk_extra_context;
1763251881Speter           p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1764251881Speter        {
1765251881Speter          output_baton->hunk_extra_context[p] = '\0';
1766251881Speter        }
1767251881Speter    }
1768251881Speter
1769251881Speter  /* Modified: Skip lines until we are at the start of the changed range */
1770251881Speter  SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1771251881Speter                                    svn_diff__file_output_unified_skip,
1772251881Speter                                    modified_start));
1773251881Speter
1774251881Speter  /* Original: Output the context preceding the changed range */
1775251881Speter  SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1776251881Speter                                    svn_diff__file_output_unified_context,
1777251881Speter                                    original_start));
1778251881Speter
1779251881Speter  /* Both: Output the changed range */
1780251881Speter  SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1781251881Speter                                    svn_diff__file_output_unified_delete,
1782251881Speter                                    original_start + original_length));
1783251881Speter  SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1784251881Speter                                    svn_diff__file_output_unified_insert,
1785251881Speter                                    modified_start + modified_length));
1786251881Speter
1787251881Speter  return SVN_NO_ERROR;
1788251881Speter}
1789251881Speter
1790251881Speter/* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1791251881Speterstatic svn_error_t *
1792251881Speteroutput_unified_default_hdr(const char **header, const char *path,
1793251881Speter                           apr_pool_t *pool)
1794251881Speter{
1795251881Speter  apr_finfo_t file_info;
1796251881Speter  apr_time_exp_t exploded_time;
1797251881Speter  char time_buffer[64];
1798251881Speter  apr_size_t time_len;
1799251881Speter  const char *utf8_timestr;
1800251881Speter
1801251881Speter  SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1802251881Speter  apr_time_exp_lt(&exploded_time, file_info.mtime);
1803251881Speter
1804251881Speter  apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1805251881Speter  /* Order of date components can be different in different languages */
1806251881Speter               _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1807251881Speter
1808251881Speter  SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1809251881Speter
1810251881Speter  *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1811251881Speter
1812251881Speter  return SVN_NO_ERROR;
1813251881Speter}
1814251881Speter
1815251881Speterstatic const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1816251881Speter{
1817251881Speter  NULL, /* output_common */
1818251881Speter  output_unified_diff_modified,
1819251881Speter  NULL, /* output_diff_latest */
1820251881Speter  NULL, /* output_diff_common */
1821251881Speter  NULL  /* output_conflict */
1822251881Speter};
1823251881Speter
1824251881Spetersvn_error_t *
1825289180Spetersvn_diff_file_output_unified4(svn_stream_t *output_stream,
1826251881Speter                              svn_diff_t *diff,
1827251881Speter                              const char *original_path,
1828251881Speter                              const char *modified_path,
1829251881Speter                              const char *original_header,
1830251881Speter                              const char *modified_header,
1831251881Speter                              const char *header_encoding,
1832251881Speter                              const char *relative_to_dir,
1833251881Speter                              svn_boolean_t show_c_function,
1834289180Speter                              int context_size,
1835289180Speter                              svn_cancel_func_t cancel_func,
1836289180Speter                              void *cancel_baton,
1837251881Speter                              apr_pool_t *pool)
1838251881Speter{
1839251881Speter  if (svn_diff_contains_diffs(diff))
1840251881Speter    {
1841251881Speter      svn_diff__file_output_baton_t baton;
1842251881Speter      int i;
1843251881Speter
1844251881Speter      memset(&baton, 0, sizeof(baton));
1845251881Speter      baton.output_stream = output_stream;
1846251881Speter      baton.pool = pool;
1847251881Speter      baton.header_encoding = header_encoding;
1848251881Speter      baton.path[0] = original_path;
1849251881Speter      baton.path[1] = modified_path;
1850251881Speter      baton.hunk = svn_stringbuf_create_empty(pool);
1851251881Speter      baton.show_c_function = show_c_function;
1852251881Speter      baton.extra_context = svn_stringbuf_create_empty(pool);
1853289180Speter      baton.context_size = (context_size >= 0) ? context_size
1854289180Speter                                              : SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1855251881Speter
1856251881Speter      if (show_c_function)
1857251881Speter        {
1858251881Speter          baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1859251881Speter
1860251881Speter          APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1861251881Speter          APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1862251881Speter          APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1863251881Speter        }
1864251881Speter
1865251881Speter      SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1866251881Speter                                            header_encoding, pool));
1867251881Speter      SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1868251881Speter                                            header_encoding, pool));
1869251881Speter      SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1870251881Speter                                            header_encoding, pool));
1871251881Speter
1872251881Speter      if (relative_to_dir)
1873251881Speter        {
1874251881Speter          /* Possibly adjust the "original" and "modified" paths shown in
1875251881Speter             the output (see issue #2723). */
1876251881Speter          const char *child_path;
1877251881Speter
1878251881Speter          if (! original_header)
1879251881Speter            {
1880251881Speter              child_path = svn_dirent_is_child(relative_to_dir,
1881251881Speter                                               original_path, pool);
1882251881Speter              if (child_path)
1883251881Speter                original_path = child_path;
1884251881Speter              else
1885251881Speter                return svn_error_createf(
1886251881Speter                                   SVN_ERR_BAD_RELATIVE_PATH, NULL,
1887251881Speter                                   _("Path '%s' must be inside "
1888251881Speter                                     "the directory '%s'"),
1889251881Speter                                   svn_dirent_local_style(original_path, pool),
1890251881Speter                                   svn_dirent_local_style(relative_to_dir,
1891251881Speter                                                          pool));
1892251881Speter            }
1893251881Speter
1894251881Speter          if (! modified_header)
1895251881Speter            {
1896251881Speter              child_path = svn_dirent_is_child(relative_to_dir,
1897251881Speter                                               modified_path, pool);
1898251881Speter              if (child_path)
1899251881Speter                modified_path = child_path;
1900251881Speter              else
1901251881Speter                return svn_error_createf(
1902251881Speter                                   SVN_ERR_BAD_RELATIVE_PATH, NULL,
1903251881Speter                                   _("Path '%s' must be inside "
1904251881Speter                                     "the directory '%s'"),
1905251881Speter                                   svn_dirent_local_style(modified_path, pool),
1906251881Speter                                   svn_dirent_local_style(relative_to_dir,
1907251881Speter                                                          pool));
1908251881Speter            }
1909251881Speter        }
1910251881Speter
1911251881Speter      for (i = 0; i < 2; i++)
1912251881Speter        {
1913251881Speter          SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1914251881Speter                                   APR_READ, APR_OS_DEFAULT, pool));
1915251881Speter        }
1916251881Speter
1917251881Speter      if (original_header == NULL)
1918251881Speter        {
1919251881Speter          SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1920251881Speter                                             pool));
1921251881Speter        }
1922251881Speter
1923251881Speter      if (modified_header == NULL)
1924251881Speter        {
1925251881Speter          SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1926251881Speter                                             pool));
1927251881Speter        }
1928251881Speter
1929251881Speter      SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1930251881Speter                                             original_header, modified_header,
1931251881Speter                                             pool));
1932251881Speter
1933289180Speter      SVN_ERR(svn_diff_output2(diff, &baton,
1934289180Speter                               &svn_diff__file_output_unified_vtable,
1935289180Speter                               cancel_func, cancel_baton));
1936251881Speter      SVN_ERR(output_unified_flush_hunk(&baton));
1937251881Speter
1938251881Speter      for (i = 0; i < 2; i++)
1939251881Speter        {
1940251881Speter          SVN_ERR(svn_io_file_close(baton.file[i], pool));
1941251881Speter        }
1942251881Speter    }
1943251881Speter
1944251881Speter  return SVN_NO_ERROR;
1945251881Speter}
1946251881Speter
1947251881Speter
1948251881Speter/** Display diff3 **/
1949251881Speter
1950251881Speter/* A stream to remember *leading* context.  Note that this stream does
1951251881Speter   *not* copy the data that it is remembering; it just saves
1952251881Speter   *pointers! */
1953251881Spetertypedef struct context_saver_t {
1954251881Speter  svn_stream_t *stream;
1955289180Speter  int context_size;
1956289180Speter  const char **data; /* const char *data[context_size] */
1957289180Speter  apr_size_t *len;   /* apr_size_t len[context_size] */
1958251881Speter  apr_size_t next_slot;
1959251881Speter  apr_size_t total_written;
1960251881Speter} context_saver_t;
1961251881Speter
1962251881Speter
1963251881Speterstatic svn_error_t *
1964251881Spetercontext_saver_stream_write(void *baton,
1965251881Speter                           const char *data,
1966251881Speter                           apr_size_t *len)
1967251881Speter{
1968251881Speter  context_saver_t *cs = baton;
1969289180Speter
1970289180Speter  if (cs->context_size > 0)
1971289180Speter    {
1972289180Speter      cs->data[cs->next_slot] = data;
1973289180Speter      cs->len[cs->next_slot] = *len;
1974289180Speter      cs->next_slot = (cs->next_slot + 1) % cs->context_size;
1975289180Speter      cs->total_written++;
1976289180Speter    }
1977251881Speter  return SVN_NO_ERROR;
1978251881Speter}
1979251881Speter
1980251881Spetertypedef struct svn_diff3__file_output_baton_t
1981251881Speter{
1982251881Speter  svn_stream_t *output_stream;
1983251881Speter
1984251881Speter  const char *path[3];
1985251881Speter
1986251881Speter  apr_off_t   current_line[3];
1987251881Speter
1988251881Speter  char       *buffer[3];
1989251881Speter  char       *endp[3];
1990251881Speter  char       *curp[3];
1991251881Speter
1992251881Speter  /* The following four members are in the encoding used for the output. */
1993251881Speter  const char *conflict_modified;
1994251881Speter  const char *conflict_original;
1995251881Speter  const char *conflict_separator;
1996251881Speter  const char *conflict_latest;
1997251881Speter
1998251881Speter  const char *marker_eol;
1999251881Speter
2000251881Speter  svn_diff_conflict_display_style_t conflict_style;
2001289180Speter  int context_size;
2002251881Speter
2003289180Speter  /* cancel support */
2004289180Speter  svn_cancel_func_t cancel_func;
2005289180Speter  void *cancel_baton;
2006289180Speter
2007251881Speter  /* The rest of the fields are for
2008251881Speter     svn_diff_conflict_display_only_conflicts only.  Note that for
2009251881Speter     these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
2010251881Speter     (soon after a conflict) a "trailing context stream", never the
2011251881Speter     actual output stream.*/
2012251881Speter  /* The actual output stream. */
2013251881Speter  svn_stream_t *real_output_stream;
2014251881Speter  context_saver_t *context_saver;
2015251881Speter  /* Used to allocate context_saver and trailing context streams, and
2016251881Speter     for some printfs. */
2017251881Speter  apr_pool_t *pool;
2018251881Speter} svn_diff3__file_output_baton_t;
2019251881Speter
2020251881Speterstatic svn_error_t *
2021251881Speterflush_context_saver(context_saver_t *cs,
2022251881Speter                    svn_stream_t *output_stream)
2023251881Speter{
2024251881Speter  int i;
2025289180Speter  for (i = 0; i < cs->context_size; i++)
2026251881Speter    {
2027289180Speter      apr_size_t slot = (i + cs->next_slot) % cs->context_size;
2028251881Speter      if (cs->data[slot])
2029251881Speter        {
2030251881Speter          apr_size_t len = cs->len[slot];
2031251881Speter          SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2032251881Speter        }
2033251881Speter    }
2034251881Speter  return SVN_NO_ERROR;
2035251881Speter}
2036251881Speter
2037251881Speterstatic void
2038251881Spetermake_context_saver(svn_diff3__file_output_baton_t *fob)
2039251881Speter{
2040251881Speter  context_saver_t *cs;
2041251881Speter
2042289180Speter  assert(fob->context_size > 0); /* Or nothing to save */
2043289180Speter
2044251881Speter  svn_pool_clear(fob->pool);
2045251881Speter  cs = apr_pcalloc(fob->pool, sizeof(*cs));
2046251881Speter  cs->stream = svn_stream_empty(fob->pool);
2047251881Speter  svn_stream_set_baton(cs->stream, cs);
2048251881Speter  svn_stream_set_write(cs->stream, context_saver_stream_write);
2049251881Speter  fob->context_saver = cs;
2050251881Speter  fob->output_stream = cs->stream;
2051289180Speter  cs->context_size = fob->context_size;
2052289180Speter  cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size);
2053289180Speter  cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size);
2054251881Speter}
2055251881Speter
2056251881Speter
2057289180Speter/* A stream which prints LINES_TO_PRINT (based on context size) lines to
2058251881Speter   BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2059251881Speter   a context_saver; used for *trailing* context. */
2060251881Speter
2061251881Speterstruct trailing_context_printer {
2062251881Speter  apr_size_t lines_to_print;
2063251881Speter  svn_diff3__file_output_baton_t *fob;
2064251881Speter};
2065251881Speter
2066251881Speter
2067251881Speter
2068251881Speterstatic svn_error_t *
2069251881Spetertrailing_context_printer_write(void *baton,
2070251881Speter                               const char *data,
2071251881Speter                               apr_size_t *len)
2072251881Speter{
2073251881Speter  struct trailing_context_printer *tcp = baton;
2074251881Speter  SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2075251881Speter  SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2076251881Speter  tcp->lines_to_print--;
2077251881Speter  if (tcp->lines_to_print == 0)
2078251881Speter    make_context_saver(tcp->fob);
2079251881Speter  return SVN_NO_ERROR;
2080251881Speter}
2081251881Speter
2082251881Speter
2083251881Speterstatic void
2084251881Spetermake_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2085251881Speter{
2086251881Speter  struct trailing_context_printer *tcp;
2087251881Speter  svn_stream_t *s;
2088251881Speter
2089251881Speter  svn_pool_clear(btn->pool);
2090251881Speter
2091251881Speter  tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2092289180Speter  tcp->lines_to_print = btn->context_size;
2093251881Speter  tcp->fob = btn;
2094251881Speter  s = svn_stream_empty(btn->pool);
2095251881Speter  svn_stream_set_baton(s, tcp);
2096251881Speter  svn_stream_set_write(s, trailing_context_printer_write);
2097251881Speter  btn->output_stream = s;
2098251881Speter}
2099251881Speter
2100251881Speter
2101251881Speter
2102251881Spetertypedef enum svn_diff3__file_output_type_e
2103251881Speter{
2104251881Speter  svn_diff3__file_output_skip,
2105251881Speter  svn_diff3__file_output_normal
2106251881Speter} svn_diff3__file_output_type_e;
2107251881Speter
2108251881Speter
2109251881Speterstatic svn_error_t *
2110251881Speteroutput_line(svn_diff3__file_output_baton_t *baton,
2111251881Speter            svn_diff3__file_output_type_e type, int idx)
2112251881Speter{
2113251881Speter  char *curp;
2114251881Speter  char *endp;
2115251881Speter  char *eol;
2116251881Speter  apr_size_t len;
2117251881Speter
2118251881Speter  curp = baton->curp[idx];
2119251881Speter  endp = baton->endp[idx];
2120251881Speter
2121251881Speter  /* Lazily update the current line even if we're at EOF.
2122251881Speter   */
2123251881Speter  baton->current_line[idx]++;
2124251881Speter
2125251881Speter  if (curp == endp)
2126251881Speter    return SVN_NO_ERROR;
2127251881Speter
2128251881Speter  eol = svn_eol__find_eol_start(curp, endp - curp);
2129251881Speter  if (!eol)
2130251881Speter    eol = endp;
2131251881Speter  else
2132251881Speter    {
2133251881Speter      svn_boolean_t had_cr = (*eol == '\r');
2134251881Speter      eol++;
2135251881Speter      if (had_cr && eol != endp && *eol == '\n')
2136251881Speter        eol++;
2137251881Speter    }
2138251881Speter
2139251881Speter  if (type != svn_diff3__file_output_skip)
2140251881Speter    {
2141251881Speter      len = eol - curp;
2142251881Speter      /* Note that the trailing context printer assumes that
2143251881Speter         svn_stream_write is called exactly once per line. */
2144251881Speter      SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2145251881Speter    }
2146251881Speter
2147251881Speter  baton->curp[idx] = eol;
2148251881Speter
2149251881Speter  return SVN_NO_ERROR;
2150251881Speter}
2151251881Speter
2152251881Speterstatic svn_error_t *
2153251881Speteroutput_marker_eol(svn_diff3__file_output_baton_t *btn)
2154251881Speter{
2155251881Speter  return svn_stream_puts(btn->output_stream, btn->marker_eol);
2156251881Speter}
2157251881Speter
2158251881Speterstatic svn_error_t *
2159251881Speteroutput_hunk(void *baton, int idx, apr_off_t target_line,
2160251881Speter            apr_off_t target_length)
2161251881Speter{
2162251881Speter  svn_diff3__file_output_baton_t *output_baton = baton;
2163251881Speter
2164251881Speter  /* Skip lines until we are at the start of the changed range */
2165251881Speter  while (output_baton->current_line[idx] < target_line)
2166251881Speter    {
2167251881Speter      SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2168251881Speter    }
2169251881Speter
2170251881Speter  target_line += target_length;
2171251881Speter
2172251881Speter  while (output_baton->current_line[idx] < target_line)
2173251881Speter    {
2174251881Speter      SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2175251881Speter    }
2176251881Speter
2177251881Speter  return SVN_NO_ERROR;
2178251881Speter}
2179251881Speter
2180251881Speterstatic svn_error_t *
2181251881Speteroutput_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2182251881Speter              apr_off_t modified_start, apr_off_t modified_length,
2183251881Speter              apr_off_t latest_start, apr_off_t latest_length)
2184251881Speter{
2185251881Speter  return output_hunk(baton, 1, modified_start, modified_length);
2186251881Speter}
2187251881Speter
2188251881Speterstatic svn_error_t *
2189251881Speteroutput_diff_modified(void *baton,
2190251881Speter                     apr_off_t original_start, apr_off_t original_length,
2191251881Speter                     apr_off_t modified_start, apr_off_t modified_length,
2192251881Speter                     apr_off_t latest_start, apr_off_t latest_length)
2193251881Speter{
2194251881Speter  return output_hunk(baton, 1, modified_start, modified_length);
2195251881Speter}
2196251881Speter
2197251881Speterstatic svn_error_t *
2198251881Speteroutput_diff_latest(void *baton,
2199251881Speter                   apr_off_t original_start, apr_off_t original_length,
2200251881Speter                   apr_off_t modified_start, apr_off_t modified_length,
2201251881Speter                   apr_off_t latest_start, apr_off_t latest_length)
2202251881Speter{
2203251881Speter  return output_hunk(baton, 2, latest_start, latest_length);
2204251881Speter}
2205251881Speter
2206251881Speterstatic svn_error_t *
2207251881Speteroutput_conflict(void *baton,
2208251881Speter                apr_off_t original_start, apr_off_t original_length,
2209251881Speter                apr_off_t modified_start, apr_off_t modified_length,
2210251881Speter                apr_off_t latest_start, apr_off_t latest_length,
2211251881Speter                svn_diff_t *diff);
2212251881Speter
2213251881Speterstatic const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2214251881Speter{
2215251881Speter  output_common,
2216251881Speter  output_diff_modified,
2217251881Speter  output_diff_latest,
2218251881Speter  output_diff_modified, /* output_diff_common */
2219251881Speter  output_conflict
2220251881Speter};
2221251881Speter
2222289180Speterstatic svn_error_t *
2223289180Speteroutput_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn,
2224289180Speter                                    const char *label,
2225289180Speter                                    apr_off_t start,
2226289180Speter                                    apr_off_t length)
2227289180Speter{
2228289180Speter  if (length == 1)
2229289180Speter    SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2230289180Speter                              "%s (%" APR_OFF_T_FMT ")",
2231289180Speter                              label, start + 1));
2232289180Speter  else
2233289180Speter    SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2234289180Speter                              "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")",
2235289180Speter                              label, start + 1, length));
2236251881Speter
2237289180Speter  SVN_ERR(output_marker_eol(btn));
2238251881Speter
2239289180Speter  return SVN_NO_ERROR;
2240289180Speter}
2241289180Speter
2242251881Speterstatic svn_error_t *
2243251881Speteroutput_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2244251881Speter                             apr_off_t original_start,
2245251881Speter                             apr_off_t original_length,
2246251881Speter                             apr_off_t modified_start,
2247251881Speter                             apr_off_t modified_length,
2248251881Speter                             apr_off_t latest_start,
2249251881Speter                             apr_off_t latest_length)
2250251881Speter{
2251251881Speter  /* Are we currently saving starting context (as opposed to printing
2252251881Speter     trailing context)?  If so, flush it. */
2253251881Speter  if (btn->output_stream == btn->context_saver->stream)
2254251881Speter    {
2255289180Speter      if (btn->context_saver->total_written > btn->context_size)
2256251881Speter        SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2257251881Speter      SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2258251881Speter    }
2259251881Speter
2260251881Speter  /* Print to the real output stream. */
2261251881Speter  btn->output_stream = btn->real_output_stream;
2262251881Speter
2263251881Speter  /* Output the conflict itself. */
2264289180Speter  SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified,
2265289180Speter                                              modified_start, modified_length));
2266251881Speter  SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2267251881Speter
2268289180Speter  SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original,
2269289180Speter                                              original_start, original_length));
2270251881Speter  SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2271251881Speter
2272251881Speter  SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2273251881Speter                            "%s%s", btn->conflict_separator, btn->marker_eol));
2274251881Speter  SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2275289180Speter  SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest,
2276289180Speter                                              latest_start, latest_length));
2277251881Speter
2278251881Speter  /* Go into print-trailing-context mode instead. */
2279251881Speter  make_trailing_context_printer(btn);
2280251881Speter
2281251881Speter  return SVN_NO_ERROR;
2282251881Speter}
2283251881Speter
2284251881Speter
2285251881Speterstatic svn_error_t *
2286251881Speteroutput_conflict(void *baton,
2287251881Speter                apr_off_t original_start, apr_off_t original_length,
2288251881Speter                apr_off_t modified_start, apr_off_t modified_length,
2289251881Speter                apr_off_t latest_start, apr_off_t latest_length,
2290251881Speter                svn_diff_t *diff)
2291251881Speter{
2292251881Speter  svn_diff3__file_output_baton_t *file_baton = baton;
2293251881Speter
2294251881Speter  svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2295251881Speter
2296251881Speter  if (style == svn_diff_conflict_display_only_conflicts)
2297251881Speter    return output_conflict_with_context(file_baton,
2298251881Speter                                        original_start, original_length,
2299251881Speter                                        modified_start, modified_length,
2300251881Speter                                        latest_start, latest_length);
2301251881Speter
2302251881Speter  if (style == svn_diff_conflict_display_resolved_modified_latest)
2303251881Speter    {
2304251881Speter      if (diff)
2305289180Speter        return svn_diff_output2(diff, baton,
2306289180Speter                                &svn_diff3__file_output_vtable,
2307289180Speter                                file_baton->cancel_func,
2308289180Speter                                file_baton->cancel_baton);
2309251881Speter      else
2310251881Speter        style = svn_diff_conflict_display_modified_latest;
2311251881Speter    }
2312251881Speter
2313251881Speter  if (style == svn_diff_conflict_display_modified_latest ||
2314251881Speter      style == svn_diff_conflict_display_modified_original_latest)
2315251881Speter    {
2316251881Speter      SVN_ERR(svn_stream_puts(file_baton->output_stream,
2317251881Speter                               file_baton->conflict_modified));
2318251881Speter      SVN_ERR(output_marker_eol(file_baton));
2319251881Speter
2320251881Speter      SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2321251881Speter
2322251881Speter      if (style == svn_diff_conflict_display_modified_original_latest)
2323251881Speter        {
2324251881Speter          SVN_ERR(svn_stream_puts(file_baton->output_stream,
2325251881Speter                                   file_baton->conflict_original));
2326251881Speter          SVN_ERR(output_marker_eol(file_baton));
2327251881Speter          SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2328251881Speter        }
2329251881Speter
2330251881Speter      SVN_ERR(svn_stream_puts(file_baton->output_stream,
2331251881Speter                              file_baton->conflict_separator));
2332251881Speter      SVN_ERR(output_marker_eol(file_baton));
2333251881Speter
2334251881Speter      SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2335251881Speter
2336251881Speter      SVN_ERR(svn_stream_puts(file_baton->output_stream,
2337251881Speter                              file_baton->conflict_latest));
2338251881Speter      SVN_ERR(output_marker_eol(file_baton));
2339251881Speter    }
2340251881Speter  else if (style == svn_diff_conflict_display_modified)
2341251881Speter    SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2342251881Speter  else if (style == svn_diff_conflict_display_latest)
2343251881Speter    SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2344251881Speter  else /* unknown style */
2345251881Speter    SVN_ERR_MALFUNCTION();
2346251881Speter
2347251881Speter  return SVN_NO_ERROR;
2348251881Speter}
2349251881Speter
2350251881Spetersvn_error_t *
2351289180Spetersvn_diff_file_output_merge3(svn_stream_t *output_stream,
2352251881Speter                            svn_diff_t *diff,
2353251881Speter                            const char *original_path,
2354251881Speter                            const char *modified_path,
2355251881Speter                            const char *latest_path,
2356251881Speter                            const char *conflict_original,
2357251881Speter                            const char *conflict_modified,
2358251881Speter                            const char *conflict_latest,
2359251881Speter                            const char *conflict_separator,
2360251881Speter                            svn_diff_conflict_display_style_t style,
2361289180Speter                            svn_cancel_func_t cancel_func,
2362289180Speter                            void *cancel_baton,
2363289180Speter                            apr_pool_t *scratch_pool)
2364251881Speter{
2365251881Speter  svn_diff3__file_output_baton_t baton;
2366251881Speter  apr_file_t *file[3];
2367251881Speter  int idx;
2368251881Speter#if APR_HAS_MMAP
2369251881Speter  apr_mmap_t *mm[3] = { 0 };
2370251881Speter#endif /* APR_HAS_MMAP */
2371251881Speter  const char *eol;
2372251881Speter  svn_boolean_t conflicts_only =
2373251881Speter    (style == svn_diff_conflict_display_only_conflicts);
2374251881Speter
2375251881Speter  memset(&baton, 0, sizeof(baton));
2376289180Speter  baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2377251881Speter  if (conflicts_only)
2378251881Speter    {
2379289180Speter      baton.pool = svn_pool_create(scratch_pool);
2380251881Speter      make_context_saver(&baton);
2381251881Speter      baton.real_output_stream = output_stream;
2382251881Speter    }
2383251881Speter  else
2384251881Speter    baton.output_stream = output_stream;
2385251881Speter  baton.path[0] = original_path;
2386251881Speter  baton.path[1] = modified_path;
2387251881Speter  baton.path[2] = latest_path;
2388251881Speter  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2389251881Speter                                    conflict_modified ? conflict_modified
2390289180Speter                                    : apr_psprintf(scratch_pool, "<<<<<<< %s",
2391251881Speter                                                   modified_path),
2392289180Speter                                    scratch_pool));
2393251881Speter  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2394251881Speter                                    conflict_original ? conflict_original
2395289180Speter                                    : apr_psprintf(scratch_pool, "||||||| %s",
2396251881Speter                                                   original_path),
2397289180Speter                                    scratch_pool));
2398251881Speter  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2399251881Speter                                    conflict_separator ? conflict_separator
2400289180Speter                                    : "=======", scratch_pool));
2401251881Speter  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2402251881Speter                                    conflict_latest ? conflict_latest
2403289180Speter                                    : apr_psprintf(scratch_pool, ">>>>>>> %s",
2404251881Speter                                                   latest_path),
2405289180Speter                                    scratch_pool));
2406251881Speter
2407251881Speter  baton.conflict_style = style;
2408251881Speter
2409251881Speter  for (idx = 0; idx < 3; idx++)
2410251881Speter    {
2411257936Speter      apr_size_t size;
2412251881Speter
2413251881Speter      SVN_ERR(map_or_read_file(&file[idx],
2414251881Speter                               MMAP_T_ARG(mm[idx])
2415251881Speter                               &baton.buffer[idx], &size,
2416289180Speter                               baton.path[idx], scratch_pool));
2417251881Speter
2418251881Speter      baton.curp[idx] = baton.buffer[idx];
2419251881Speter      baton.endp[idx] = baton.buffer[idx];
2420251881Speter
2421251881Speter      if (baton.endp[idx])
2422251881Speter        baton.endp[idx] += size;
2423251881Speter    }
2424251881Speter
2425251881Speter  /* Check what eol marker we should use for conflict markers.
2426251881Speter     We use the eol marker of the modified file and fall back on the
2427251881Speter     platform's eol marker if that file doesn't contain any newlines. */
2428251881Speter  eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2429251881Speter                            NULL);
2430251881Speter  if (! eol)
2431251881Speter    eol = APR_EOL_STR;
2432251881Speter  baton.marker_eol = eol;
2433251881Speter
2434289180Speter  baton.cancel_func = cancel_func;
2435289180Speter  baton.cancel_baton = cancel_baton;
2436251881Speter
2437289180Speter  SVN_ERR(svn_diff_output2(diff, &baton,
2438289180Speter                          &svn_diff3__file_output_vtable,
2439289180Speter                          cancel_func, cancel_baton));
2440289180Speter
2441251881Speter  for (idx = 0; idx < 3; idx++)
2442251881Speter    {
2443251881Speter#if APR_HAS_MMAP
2444251881Speter      if (mm[idx])
2445251881Speter        {
2446251881Speter          apr_status_t rv = apr_mmap_delete(mm[idx]);
2447251881Speter          if (rv != APR_SUCCESS)
2448251881Speter            {
2449251881Speter              return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2450251881Speter                                        baton.path[idx]);
2451251881Speter            }
2452251881Speter        }
2453251881Speter#endif /* APR_HAS_MMAP */
2454251881Speter
2455251881Speter      if (file[idx])
2456251881Speter        {
2457289180Speter          SVN_ERR(svn_io_file_close(file[idx], scratch_pool));
2458251881Speter        }
2459251881Speter    }
2460251881Speter
2461251881Speter  if (conflicts_only)
2462251881Speter    svn_pool_destroy(baton.pool);
2463251881Speter
2464251881Speter  return SVN_NO_ERROR;
2465251881Speter}
2466251881Speter
2467