1251881Speter/*
2251881Speter * parse-diff.c: functions for parsing diff files
3251881Speter *
4251881Speter * ====================================================================
5251881Speter *    Licensed to the Apache Software Foundation (ASF) under one
6251881Speter *    or more contributor license agreements.  See the NOTICE file
7251881Speter *    distributed with this work for additional information
8251881Speter *    regarding copyright ownership.  The ASF licenses this file
9251881Speter *    to you under the Apache License, Version 2.0 (the
10251881Speter *    "License"); you may not use this file except in compliance
11251881Speter *    with the License.  You may obtain a copy of the License at
12251881Speter *
13251881Speter *      http://www.apache.org/licenses/LICENSE-2.0
14251881Speter *
15251881Speter *    Unless required by applicable law or agreed to in writing,
16251881Speter *    software distributed under the License is distributed on an
17251881Speter *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18251881Speter *    KIND, either express or implied.  See the License for the
19251881Speter *    specific language governing permissions and limitations
20251881Speter *    under the License.
21251881Speter * ====================================================================
22251881Speter */
23251881Speter
24251881Speter#include <stdlib.h>
25251881Speter#include <stddef.h>
26251881Speter#include <string.h>
27251881Speter
28251881Speter#include "svn_hash.h"
29251881Speter#include "svn_types.h"
30251881Speter#include "svn_error.h"
31251881Speter#include "svn_io.h"
32251881Speter#include "svn_pools.h"
33251881Speter#include "svn_props.h"
34251881Speter#include "svn_string.h"
35251881Speter#include "svn_utf.h"
36251881Speter#include "svn_dirent_uri.h"
37251881Speter#include "svn_diff.h"
38299742Sdim#include "svn_ctype.h"
39299742Sdim#include "svn_mergeinfo.h"
40251881Speter
41251881Speter#include "private/svn_eol_private.h"
42251881Speter#include "private/svn_dep_compat.h"
43299742Sdim#include "private/svn_sorts_private.h"
44251881Speter
45251881Speter/* Helper macro for readability */
46251881Speter#define starts_with(str, start)  \
47251881Speter  (strncmp((str), (start), strlen(start)) == 0)
48251881Speter
49251881Speter/* Like strlen() but for string literals. */
50251881Speter#define STRLEN_LITERAL(str) (sizeof(str) - 1)
51251881Speter
52251881Speter/* This struct describes a range within a file, as well as the
53251881Speter * current cursor position within the range. All numbers are in bytes. */
54251881Speterstruct svn_diff__hunk_range {
55251881Speter  apr_off_t start;
56251881Speter  apr_off_t end;
57251881Speter  apr_off_t current;
58251881Speter};
59251881Speter
60251881Speterstruct svn_diff_hunk_t {
61251881Speter  /* The patch this hunk belongs to. */
62251881Speter  svn_patch_t *patch;
63251881Speter
64251881Speter  /* APR file handle to the patch file this hunk came from. */
65251881Speter  apr_file_t *apr_file;
66251881Speter
67251881Speter  /* Ranges used to keep track of this hunk's texts positions within
68251881Speter   * the patch file. */
69251881Speter  struct svn_diff__hunk_range diff_text_range;
70251881Speter  struct svn_diff__hunk_range original_text_range;
71251881Speter  struct svn_diff__hunk_range modified_text_range;
72251881Speter
73251881Speter  /* Hunk ranges as they appeared in the patch file.
74251881Speter   * All numbers are lines, not bytes. */
75251881Speter  svn_linenum_t original_start;
76251881Speter  svn_linenum_t original_length;
77251881Speter  svn_linenum_t modified_start;
78251881Speter  svn_linenum_t modified_length;
79251881Speter
80251881Speter  /* Number of lines of leading and trailing hunk context. */
81251881Speter  svn_linenum_t leading_context;
82251881Speter  svn_linenum_t trailing_context;
83251881Speter};
84251881Speter
85251881Spetervoid
86251881Spetersvn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk)
87251881Speter{
88251881Speter  hunk->diff_text_range.current = hunk->diff_text_range.start;
89251881Speter}
90251881Speter
91251881Spetervoid
92251881Spetersvn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk)
93251881Speter{
94251881Speter  if (hunk->patch->reverse)
95251881Speter    hunk->modified_text_range.current = hunk->modified_text_range.start;
96251881Speter  else
97251881Speter    hunk->original_text_range.current = hunk->original_text_range.start;
98251881Speter}
99251881Speter
100251881Spetervoid
101251881Spetersvn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk)
102251881Speter{
103251881Speter  if (hunk->patch->reverse)
104251881Speter    hunk->original_text_range.current = hunk->original_text_range.start;
105251881Speter  else
106251881Speter    hunk->modified_text_range.current = hunk->modified_text_range.start;
107251881Speter}
108251881Speter
109251881Spetersvn_linenum_t
110251881Spetersvn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk)
111251881Speter{
112251881Speter  return hunk->patch->reverse ? hunk->modified_start : hunk->original_start;
113251881Speter}
114251881Speter
115251881Spetersvn_linenum_t
116251881Spetersvn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk)
117251881Speter{
118251881Speter  return hunk->patch->reverse ? hunk->modified_length : hunk->original_length;
119251881Speter}
120251881Speter
121251881Spetersvn_linenum_t
122251881Spetersvn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk)
123251881Speter{
124251881Speter  return hunk->patch->reverse ? hunk->original_start : hunk->modified_start;
125251881Speter}
126251881Speter
127251881Spetersvn_linenum_t
128251881Spetersvn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk)
129251881Speter{
130251881Speter  return hunk->patch->reverse ? hunk->original_length : hunk->modified_length;
131251881Speter}
132251881Speter
133251881Spetersvn_linenum_t
134251881Spetersvn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk)
135251881Speter{
136251881Speter  return hunk->leading_context;
137251881Speter}
138251881Speter
139251881Spetersvn_linenum_t
140251881Spetersvn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk)
141251881Speter{
142251881Speter  return hunk->trailing_context;
143251881Speter}
144251881Speter
145251881Speter/* Try to parse a positive number from a decimal number encoded
146251881Speter * in the string NUMBER. Return parsed number in OFFSET, and return
147251881Speter * TRUE if parsing was successful. */
148251881Speterstatic svn_boolean_t
149251881Speterparse_offset(svn_linenum_t *offset, const char *number)
150251881Speter{
151251881Speter  svn_error_t *err;
152251881Speter  apr_uint64_t val;
153251881Speter
154251881Speter  err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10);
155251881Speter  if (err)
156251881Speter    {
157251881Speter      svn_error_clear(err);
158251881Speter      return FALSE;
159251881Speter    }
160251881Speter
161251881Speter  *offset = (svn_linenum_t)val;
162251881Speter
163251881Speter  return TRUE;
164251881Speter}
165251881Speter
166251881Speter/* Try to parse a hunk range specification from the string RANGE.
167251881Speter * Return parsed information in *START and *LENGTH, and return TRUE
168251881Speter * if the range parsed correctly. Note: This function may modify the
169251881Speter * input value RANGE. */
170251881Speterstatic svn_boolean_t
171251881Speterparse_range(svn_linenum_t *start, svn_linenum_t *length, char *range)
172251881Speter{
173251881Speter  char *comma;
174251881Speter
175251881Speter  if (*range == 0)
176251881Speter    return FALSE;
177251881Speter
178251881Speter  comma = strstr(range, ",");
179251881Speter  if (comma)
180251881Speter    {
181251881Speter      if (strlen(comma + 1) > 0)
182251881Speter        {
183251881Speter          /* Try to parse the length. */
184251881Speter          if (! parse_offset(length, comma + 1))
185251881Speter            return FALSE;
186251881Speter
187251881Speter          /* Snip off the end of the string,
188251881Speter           * so we can comfortably parse the line
189251881Speter           * number the hunk starts at. */
190251881Speter          *comma = '\0';
191251881Speter        }
192251881Speter       else
193251881Speter         /* A comma but no length? */
194251881Speter         return FALSE;
195251881Speter    }
196251881Speter  else
197251881Speter    {
198251881Speter      *length = 1;
199251881Speter    }
200251881Speter
201251881Speter  /* Try to parse the line number the hunk starts at. */
202251881Speter  return parse_offset(start, range);
203251881Speter}
204251881Speter
205251881Speter/* Try to parse a hunk header in string HEADER, putting parsed information
206251881Speter * into HUNK. Return TRUE if the header parsed correctly. ATAT is the
207251881Speter * character string used to delimit the hunk header.
208251881Speter * Do all allocations in POOL. */
209251881Speterstatic svn_boolean_t
210251881Speterparse_hunk_header(const char *header, svn_diff_hunk_t *hunk,
211251881Speter                  const char *atat, apr_pool_t *pool)
212251881Speter{
213251881Speter  const char *p;
214251881Speter  const char *start;
215251881Speter  svn_stringbuf_t *range;
216251881Speter
217251881Speter  p = header + strlen(atat);
218251881Speter  if (*p != ' ')
219251881Speter    /* No. */
220251881Speter    return FALSE;
221251881Speter  p++;
222251881Speter  if (*p != '-')
223251881Speter    /* Nah... */
224251881Speter    return FALSE;
225251881Speter  /* OK, this may be worth allocating some memory for... */
226251881Speter  range = svn_stringbuf_create_ensure(31, pool);
227251881Speter  start = ++p;
228251881Speter  while (*p && *p != ' ')
229251881Speter    {
230251881Speter      p++;
231251881Speter    }
232251881Speter
233251881Speter  if (*p != ' ')
234251881Speter    /* No no no... */
235251881Speter    return FALSE;
236251881Speter
237251881Speter  svn_stringbuf_appendbytes(range, start, p - start);
238251881Speter
239251881Speter  /* Try to parse the first range. */
240251881Speter  if (! parse_range(&hunk->original_start, &hunk->original_length, range->data))
241251881Speter    return FALSE;
242251881Speter
243251881Speter  /* Clear the stringbuf so we can reuse it for the second range. */
244251881Speter  svn_stringbuf_setempty(range);
245251881Speter  p++;
246251881Speter  if (*p != '+')
247251881Speter    /* Eeek! */
248251881Speter    return FALSE;
249251881Speter  /* OK, this may be worth copying... */
250251881Speter  start = ++p;
251251881Speter  while (*p && *p != ' ')
252251881Speter    {
253251881Speter      p++;
254251881Speter    }
255251881Speter  if (*p != ' ')
256251881Speter    /* No no no... */
257251881Speter    return FALSE;
258251881Speter
259251881Speter  svn_stringbuf_appendbytes(range, start, p - start);
260251881Speter
261251881Speter  /* Check for trailing @@ */
262251881Speter  p++;
263251881Speter  if (! starts_with(p, atat))
264251881Speter    return FALSE;
265251881Speter
266251881Speter  /* There may be stuff like C-function names after the trailing @@,
267251881Speter   * but we ignore that. */
268251881Speter
269251881Speter  /* Try to parse the second range. */
270251881Speter  if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data))
271251881Speter    return FALSE;
272251881Speter
273251881Speter  /* Hunk header is good. */
274251881Speter  return TRUE;
275251881Speter}
276251881Speter
277251881Speter/* Read a line of original or modified hunk text from the specified
278251881Speter * RANGE within FILE. FILE is expected to contain unidiff text.
279251881Speter * Leading unidiff symbols ('+', '-', and ' ') are removed from the line,
280251881Speter * Any lines commencing with the VERBOTEN character are discarded.
281251881Speter * VERBOTEN should be '+' or '-', depending on which form of hunk text
282251881Speter * is being read.
283251881Speter *
284251881Speter * All other parameters are as in svn_diff_hunk_readline_original_text()
285251881Speter * and svn_diff_hunk_readline_modified_text().
286251881Speter */
287251881Speterstatic svn_error_t *
288251881Speterhunk_readline_original_or_modified(apr_file_t *file,
289251881Speter                                   struct svn_diff__hunk_range *range,
290251881Speter                                   svn_stringbuf_t **stringbuf,
291251881Speter                                   const char **eol,
292251881Speter                                   svn_boolean_t *eof,
293251881Speter                                   char verboten,
294251881Speter                                   apr_pool_t *result_pool,
295251881Speter                                   apr_pool_t *scratch_pool)
296251881Speter{
297251881Speter  apr_size_t max_len;
298251881Speter  svn_boolean_t filtered;
299251881Speter  apr_off_t pos;
300251881Speter  svn_stringbuf_t *str;
301251881Speter
302251881Speter  if (range->current >= range->end)
303251881Speter    {
304251881Speter      /* We're past the range. Indicate that no bytes can be read. */
305251881Speter      *eof = TRUE;
306251881Speter      if (eol)
307251881Speter        *eol = NULL;
308251881Speter      *stringbuf = svn_stringbuf_create_empty(result_pool);
309251881Speter      return SVN_NO_ERROR;
310251881Speter    }
311251881Speter
312251881Speter  pos = 0;
313251881Speter  SVN_ERR(svn_io_file_seek(file, APR_CUR, &pos,  scratch_pool));
314251881Speter  SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool));
315251881Speter  do
316251881Speter    {
317251881Speter      max_len = range->end - range->current;
318251881Speter      SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len,
319251881Speter                                   result_pool, scratch_pool));
320251881Speter      range->current = 0;
321251881Speter      SVN_ERR(svn_io_file_seek(file, APR_CUR, &range->current, scratch_pool));
322251881Speter      filtered = (str->data[0] == verboten || str->data[0] == '\\');
323251881Speter    }
324251881Speter  while (filtered && ! *eof);
325251881Speter
326251881Speter  if (filtered)
327251881Speter    {
328251881Speter      /* EOF, return an empty string. */
329251881Speter      *stringbuf = svn_stringbuf_create_ensure(0, result_pool);
330251881Speter    }
331251881Speter  else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ')
332251881Speter    {
333251881Speter      /* Shave off leading unidiff symbols. */
334251881Speter      *stringbuf = svn_stringbuf_create(str->data + 1, result_pool);
335251881Speter    }
336251881Speter  else
337251881Speter    {
338251881Speter      /* Return the line as-is. */
339251881Speter      *stringbuf = svn_stringbuf_dup(str, result_pool);
340251881Speter    }
341251881Speter
342251881Speter  SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool));
343251881Speter
344251881Speter  return SVN_NO_ERROR;
345251881Speter}
346251881Speter
347251881Spetersvn_error_t *
348251881Spetersvn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk,
349251881Speter                                     svn_stringbuf_t **stringbuf,
350251881Speter                                     const char **eol,
351251881Speter                                     svn_boolean_t *eof,
352251881Speter                                     apr_pool_t *result_pool,
353251881Speter                                     apr_pool_t *scratch_pool)
354251881Speter{
355251881Speter  return svn_error_trace(
356251881Speter    hunk_readline_original_or_modified(hunk->apr_file,
357251881Speter                                       hunk->patch->reverse ?
358251881Speter                                         &hunk->modified_text_range :
359251881Speter                                         &hunk->original_text_range,
360251881Speter                                       stringbuf, eol, eof,
361251881Speter                                       hunk->patch->reverse ? '-' : '+',
362251881Speter                                       result_pool, scratch_pool));
363251881Speter}
364251881Speter
365251881Spetersvn_error_t *
366251881Spetersvn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk,
367251881Speter                                     svn_stringbuf_t **stringbuf,
368251881Speter                                     const char **eol,
369251881Speter                                     svn_boolean_t *eof,
370251881Speter                                     apr_pool_t *result_pool,
371251881Speter                                     apr_pool_t *scratch_pool)
372251881Speter{
373251881Speter  return svn_error_trace(
374251881Speter    hunk_readline_original_or_modified(hunk->apr_file,
375251881Speter                                       hunk->patch->reverse ?
376251881Speter                                         &hunk->original_text_range :
377251881Speter                                         &hunk->modified_text_range,
378251881Speter                                       stringbuf, eol, eof,
379251881Speter                                       hunk->patch->reverse ? '+' : '-',
380251881Speter                                       result_pool, scratch_pool));
381251881Speter}
382251881Speter
383251881Spetersvn_error_t *
384251881Spetersvn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk,
385251881Speter                                 svn_stringbuf_t **stringbuf,
386251881Speter                                 const char **eol,
387251881Speter                                 svn_boolean_t *eof,
388251881Speter                                 apr_pool_t *result_pool,
389251881Speter                                 apr_pool_t *scratch_pool)
390251881Speter{
391251881Speter  svn_stringbuf_t *line;
392251881Speter  apr_size_t max_len;
393251881Speter  apr_off_t pos;
394251881Speter
395251881Speter  if (hunk->diff_text_range.current >= hunk->diff_text_range.end)
396251881Speter    {
397251881Speter      /* We're past the range. Indicate that no bytes can be read. */
398251881Speter      *eof = TRUE;
399251881Speter      if (eol)
400251881Speter        *eol = NULL;
401251881Speter      *stringbuf = svn_stringbuf_create_empty(result_pool);
402251881Speter      return SVN_NO_ERROR;
403251881Speter    }
404251881Speter
405251881Speter  pos = 0;
406251881Speter  SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, &pos, scratch_pool));
407251881Speter  SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET,
408251881Speter                           &hunk->diff_text_range.current, scratch_pool));
409251881Speter  max_len = hunk->diff_text_range.end - hunk->diff_text_range.current;
410251881Speter  SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len,
411251881Speter                               result_pool,
412251881Speter                   scratch_pool));
413251881Speter  hunk->diff_text_range.current = 0;
414251881Speter  SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR,
415251881Speter                           &hunk->diff_text_range.current, scratch_pool));
416251881Speter  SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool));
417251881Speter
418251881Speter  if (hunk->patch->reverse)
419251881Speter    {
420299742Sdim      if (line->data[0] == '+')
421299742Sdim        line->data[0] = '-';
422299742Sdim      else if (line->data[0] == '-')
423299742Sdim        line->data[0] = '+';
424251881Speter    }
425251881Speter
426251881Speter  *stringbuf = line;
427251881Speter
428251881Speter  return SVN_NO_ERROR;
429251881Speter}
430251881Speter
431251881Speter/* Parse *PROP_NAME from HEADER as the part after the INDICATOR line.
432251881Speter * Allocate *PROP_NAME in RESULT_POOL.
433251881Speter * Set *PROP_NAME to NULL if no valid property name was found. */
434251881Speterstatic svn_error_t *
435251881Speterparse_prop_name(const char **prop_name, const char *header,
436251881Speter                const char *indicator, apr_pool_t *result_pool)
437251881Speter{
438251881Speter  SVN_ERR(svn_utf_cstring_to_utf8(prop_name,
439251881Speter                                  header + strlen(indicator),
440251881Speter                                  result_pool));
441251881Speter  if (**prop_name == '\0')
442251881Speter    *prop_name = NULL;
443251881Speter  else if (! svn_prop_name_is_valid(*prop_name))
444251881Speter    {
445251881Speter      svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool);
446251881Speter      svn_stringbuf_strip_whitespace(buf);
447251881Speter      *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL);
448251881Speter    }
449251881Speter
450251881Speter  return SVN_NO_ERROR;
451251881Speter}
452251881Speter
453299742Sdim
454299742Sdim/* A helper function to parse svn:mergeinfo diffs.
455299742Sdim *
456299742Sdim * These diffs use a special pretty-print format, for instance:
457299742Sdim *
458299742Sdim * Added: svn:mergeinfo
459299742Sdim * ## -0,0 +0,1 ##
460299742Sdim *   Merged /trunk:r2-3
461299742Sdim *
462299742Sdim * The hunk header has the following format:
463299742Sdim * ## -0,NUMBER_OF_REVERSE_MERGES +0,NUMBER_OF_FORWARD_MERGES ##
464299742Sdim *
465299742Sdim * At this point, the number of reverse merges has already been
466299742Sdim * parsed into HUNK->ORIGINAL_LENGTH, and the number of forward
467299742Sdim * merges has been parsed into HUNK->MODIFIED_LENGTH.
468299742Sdim *
469299742Sdim * The header is followed by a list of mergeinfo, one path per line.
470299742Sdim * This function parses such lines. Lines describing reverse merges
471299742Sdim * appear first, and then all lines describing forward merges appear.
472299742Sdim *
473299742Sdim * Parts of the line are affected by i18n. The words 'Merged'
474299742Sdim * and 'Reverse-merged' can appear in any language and at any
475299742Sdim * position within the line. We can only assume that a leading
476299742Sdim * '/' starts the merge source path, the path is followed by
477299742Sdim * ":r", which in turn is followed by a mergeinfo revision range,
478299742Sdim *  which is terminated by whitespace or end-of-string.
479299742Sdim *
480299742Sdim * If the current line meets the above criteria and we're able
481299742Sdim * to parse valid mergeinfo from it, the resulting mergeinfo
482299742Sdim * is added to patch->mergeinfo or patch->reverse_mergeinfo,
483299742Sdim * and we proceed to the next line.
484299742Sdim */
485299742Sdimstatic svn_error_t *
486299742Sdimparse_mergeinfo(svn_boolean_t *found_mergeinfo,
487299742Sdim                svn_stringbuf_t *line,
488299742Sdim                svn_diff_hunk_t *hunk,
489299742Sdim                svn_patch_t *patch,
490299742Sdim                apr_pool_t *result_pool,
491299742Sdim                apr_pool_t *scratch_pool)
492299742Sdim{
493299742Sdim  char *slash = strchr(line->data, '/');
494299742Sdim  char *colon = strrchr(line->data, ':');
495299742Sdim
496299742Sdim  *found_mergeinfo = FALSE;
497299742Sdim
498299742Sdim  if (slash && colon && colon[1] == 'r' && slash < colon)
499299742Sdim    {
500299742Sdim      svn_stringbuf_t *input;
501299742Sdim      svn_mergeinfo_t mergeinfo = NULL;
502299742Sdim      char *s;
503299742Sdim      svn_error_t *err;
504299742Sdim
505299742Sdim      input = svn_stringbuf_create_ensure(line->len, scratch_pool);
506299742Sdim
507299742Sdim      /* Copy the merge source path + colon */
508299742Sdim      s = slash;
509299742Sdim      while (s <= colon)
510299742Sdim        {
511299742Sdim          svn_stringbuf_appendbyte(input, *s);
512299742Sdim          s++;
513299742Sdim        }
514299742Sdim
515299742Sdim      /* skip 'r' after colon */
516299742Sdim      s++;
517299742Sdim
518299742Sdim      /* Copy the revision range. */
519299742Sdim      while (s < line->data + line->len)
520299742Sdim        {
521299742Sdim          if (svn_ctype_isspace(*s))
522299742Sdim            break;
523299742Sdim          svn_stringbuf_appendbyte(input, *s);
524299742Sdim          s++;
525299742Sdim        }
526299742Sdim
527299742Sdim      err = svn_mergeinfo_parse(&mergeinfo, input->data, result_pool);
528299742Sdim      if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
529299742Sdim        {
530299742Sdim          svn_error_clear(err);
531299742Sdim          mergeinfo = NULL;
532299742Sdim        }
533299742Sdim      else
534299742Sdim        SVN_ERR(err);
535299742Sdim
536299742Sdim      if (mergeinfo)
537299742Sdim        {
538299742Sdim          if (hunk->original_length > 0) /* reverse merges */
539299742Sdim            {
540299742Sdim              if (patch->reverse)
541299742Sdim                {
542299742Sdim                  if (patch->mergeinfo == NULL)
543299742Sdim                    patch->mergeinfo = mergeinfo;
544299742Sdim                  else
545299742Sdim                    SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
546299742Sdim                                                 mergeinfo,
547299742Sdim                                                 result_pool,
548299742Sdim                                                 scratch_pool));
549299742Sdim                }
550299742Sdim              else
551299742Sdim                {
552299742Sdim                  if (patch->reverse_mergeinfo == NULL)
553299742Sdim                    patch->reverse_mergeinfo = mergeinfo;
554299742Sdim                  else
555299742Sdim                    SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
556299742Sdim                                                 mergeinfo,
557299742Sdim                                                 result_pool,
558299742Sdim                                                 scratch_pool));
559299742Sdim                }
560299742Sdim              hunk->original_length--;
561299742Sdim            }
562299742Sdim          else if (hunk->modified_length > 0) /* forward merges */
563299742Sdim            {
564299742Sdim              if (patch->reverse)
565299742Sdim                {
566299742Sdim                  if (patch->reverse_mergeinfo == NULL)
567299742Sdim                    patch->reverse_mergeinfo = mergeinfo;
568299742Sdim                  else
569299742Sdim                    SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
570299742Sdim                                                 mergeinfo,
571299742Sdim                                                 result_pool,
572299742Sdim                                                 scratch_pool));
573299742Sdim                }
574299742Sdim              else
575299742Sdim                {
576299742Sdim                  if (patch->mergeinfo == NULL)
577299742Sdim                    patch->mergeinfo = mergeinfo;
578299742Sdim                  else
579299742Sdim                    SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
580299742Sdim                                                 mergeinfo,
581299742Sdim                                                 result_pool,
582299742Sdim                                                 scratch_pool));
583299742Sdim                }
584299742Sdim              hunk->modified_length--;
585299742Sdim            }
586299742Sdim
587299742Sdim          *found_mergeinfo = TRUE;
588299742Sdim        }
589299742Sdim    }
590299742Sdim
591299742Sdim  return SVN_NO_ERROR;
592299742Sdim}
593299742Sdim
594251881Speter/* Return the next *HUNK from a PATCH in APR_FILE.
595251881Speter * If no hunk can be found, set *HUNK to NULL.
596251881Speter * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK
597251881Speter * is the first belonging to a certain property, then PROP_NAME and
598251881Speter * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be
599251881Speter * NULL.  If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be
600251881Speter * treated as context lines.  Allocate results in RESULT_POOL.
601251881Speter * Use SCRATCH_POOL for all other allocations. */
602251881Speterstatic svn_error_t *
603251881Speterparse_next_hunk(svn_diff_hunk_t **hunk,
604251881Speter                svn_boolean_t *is_property,
605251881Speter                const char **prop_name,
606251881Speter                svn_diff_operation_kind_t *prop_operation,
607251881Speter                svn_patch_t *patch,
608251881Speter                apr_file_t *apr_file,
609251881Speter                svn_boolean_t ignore_whitespace,
610251881Speter                apr_pool_t *result_pool,
611251881Speter                apr_pool_t *scratch_pool)
612251881Speter{
613251881Speter  static const char * const minus = "--- ";
614251881Speter  static const char * const text_atat = "@@";
615251881Speter  static const char * const prop_atat = "##";
616251881Speter  svn_stringbuf_t *line;
617251881Speter  svn_boolean_t eof, in_hunk, hunk_seen;
618251881Speter  apr_off_t pos, last_line;
619251881Speter  apr_off_t start, end;
620251881Speter  apr_off_t original_end;
621251881Speter  apr_off_t modified_end;
622251881Speter  svn_linenum_t original_lines;
623251881Speter  svn_linenum_t modified_lines;
624251881Speter  svn_linenum_t leading_context;
625251881Speter  svn_linenum_t trailing_context;
626251881Speter  svn_boolean_t changed_line_seen;
627251881Speter  enum {
628251881Speter    noise_line,
629251881Speter    original_line,
630251881Speter    modified_line,
631251881Speter    context_line
632251881Speter  } last_line_type;
633251881Speter  apr_pool_t *iterpool;
634251881Speter
635251881Speter  *prop_operation = svn_diff_op_unchanged;
636251881Speter
637251881Speter  /* We only set this if we have a property hunk header. */
638251881Speter  *prop_name = NULL;
639251881Speter  *is_property = FALSE;
640251881Speter
641251881Speter  if (apr_file_eof(apr_file) == APR_EOF)
642251881Speter    {
643251881Speter      /* No more hunks here. */
644251881Speter      *hunk = NULL;
645251881Speter      return SVN_NO_ERROR;
646251881Speter    }
647251881Speter
648251881Speter  in_hunk = FALSE;
649251881Speter  hunk_seen = FALSE;
650251881Speter  leading_context = 0;
651251881Speter  trailing_context = 0;
652251881Speter  changed_line_seen = FALSE;
653251881Speter  original_end = 0;
654251881Speter  modified_end = 0;
655251881Speter  *hunk = apr_pcalloc(result_pool, sizeof(**hunk));
656251881Speter
657251881Speter  /* Get current seek position -- APR has no ftell() :( */
658251881Speter  pos = 0;
659251881Speter  SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, scratch_pool));
660251881Speter
661251881Speter  /* Start out assuming noise. */
662251881Speter  last_line_type = noise_line;
663251881Speter
664251881Speter  iterpool = svn_pool_create(scratch_pool);
665251881Speter  do
666251881Speter    {
667251881Speter
668251881Speter      svn_pool_clear(iterpool);
669251881Speter
670251881Speter      /* Remember the current line's offset, and read the line. */
671251881Speter      last_line = pos;
672251881Speter      SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
673251881Speter                                   iterpool, iterpool));
674251881Speter
675251881Speter      /* Update line offset for next iteration. */
676251881Speter      pos = 0;
677251881Speter      SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, iterpool));
678251881Speter
679251881Speter      /* Lines starting with a backslash indicate a missing EOL:
680251881Speter       * "\ No newline at end of file" or "end of property". */
681251881Speter      if (line->data[0] == '\\')
682251881Speter        {
683251881Speter          if (in_hunk)
684251881Speter            {
685251881Speter              char eolbuf[2];
686251881Speter              apr_size_t len;
687251881Speter              apr_off_t off;
688251881Speter              apr_off_t hunk_text_end;
689251881Speter
690251881Speter              /* Comment terminates the hunk text and says the hunk text
691251881Speter               * has no trailing EOL. Snip off trailing EOL which is part
692251881Speter               * of the patch file but not part of the hunk text. */
693251881Speter              off = last_line - 2;
694251881Speter              SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool));
695251881Speter              len = sizeof(eolbuf);
696251881Speter              SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len,
697251881Speter                                             &eof, iterpool));
698251881Speter              if (eolbuf[0] == '\r' && eolbuf[1] == '\n')
699251881Speter                hunk_text_end = last_line - 2;
700251881Speter              else if (eolbuf[1] == '\n' || eolbuf[1] == '\r')
701251881Speter                hunk_text_end = last_line - 1;
702251881Speter              else
703251881Speter                hunk_text_end = last_line;
704251881Speter
705251881Speter              if (last_line_type == original_line && original_end == 0)
706251881Speter                original_end = hunk_text_end;
707251881Speter              else if (last_line_type == modified_line && modified_end == 0)
708251881Speter                modified_end = hunk_text_end;
709251881Speter              else if (last_line_type == context_line)
710251881Speter                {
711251881Speter                  if (original_end == 0)
712251881Speter                    original_end = hunk_text_end;
713251881Speter                  if (modified_end == 0)
714251881Speter                    modified_end = hunk_text_end;
715251881Speter                }
716251881Speter
717251881Speter              SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool));
718251881Speter            }
719251881Speter
720251881Speter          continue;
721251881Speter        }
722251881Speter
723299742Sdim      if (in_hunk && *is_property && *prop_name &&
724299742Sdim          strcmp(*prop_name, SVN_PROP_MERGEINFO) == 0)
725299742Sdim        {
726299742Sdim          svn_boolean_t found_mergeinfo;
727299742Sdim
728299742Sdim          SVN_ERR(parse_mergeinfo(&found_mergeinfo, line, *hunk, patch,
729299742Sdim                                  result_pool, iterpool));
730299742Sdim          if (found_mergeinfo)
731299742Sdim            continue; /* Proceed to the next line in the patch. */
732299742Sdim        }
733299742Sdim
734251881Speter      if (in_hunk)
735251881Speter        {
736251881Speter          char c;
737251881Speter          static const char add = '+';
738251881Speter          static const char del = '-';
739251881Speter
740251881Speter          if (! hunk_seen)
741251881Speter            {
742251881Speter              /* We're reading the first line of the hunk, so the start
743251881Speter               * of the line just read is the hunk text's byte offset. */
744251881Speter              start = last_line;
745251881Speter            }
746251881Speter
747251881Speter          c = line->data[0];
748251881Speter          if (original_lines > 0 && modified_lines > 0 &&
749251881Speter              ((c == ' ')
750251881Speter               /* Tolerate chopped leading spaces on empty lines. */
751251881Speter               || (! eof && line->len == 0)
752251881Speter               /* Maybe tolerate chopped leading spaces on non-empty lines. */
753251881Speter               || (ignore_whitespace && c != del && c != add)))
754251881Speter            {
755251881Speter              /* It's a "context" line in the hunk. */
756251881Speter              hunk_seen = TRUE;
757251881Speter              original_lines--;
758251881Speter              modified_lines--;
759251881Speter              if (changed_line_seen)
760251881Speter                trailing_context++;
761251881Speter              else
762251881Speter                leading_context++;
763251881Speter              last_line_type = context_line;
764251881Speter            }
765251881Speter          else if (original_lines > 0 && c == del)
766251881Speter            {
767251881Speter              /* It's a "deleted" line in the hunk. */
768251881Speter              hunk_seen = TRUE;
769251881Speter              changed_line_seen = TRUE;
770251881Speter
771251881Speter              /* A hunk may have context in the middle. We only want
772251881Speter                 trailing lines of context. */
773251881Speter              if (trailing_context > 0)
774251881Speter                trailing_context = 0;
775251881Speter
776251881Speter              original_lines--;
777251881Speter              last_line_type = original_line;
778251881Speter            }
779251881Speter          else if (modified_lines > 0 && c == add)
780251881Speter            {
781251881Speter              /* It's an "added" line in the hunk. */
782251881Speter              hunk_seen = TRUE;
783251881Speter              changed_line_seen = TRUE;
784251881Speter
785251881Speter              /* A hunk may have context in the middle. We only want
786251881Speter                 trailing lines of context. */
787251881Speter              if (trailing_context > 0)
788251881Speter                trailing_context = 0;
789251881Speter
790251881Speter              modified_lines--;
791251881Speter              last_line_type = modified_line;
792251881Speter            }
793251881Speter          else
794251881Speter            {
795251881Speter              if (eof)
796251881Speter                {
797251881Speter                  /* The hunk ends at EOF. */
798251881Speter                  end = pos;
799251881Speter                }
800251881Speter              else
801251881Speter                {
802251881Speter                  /* The start of the current line marks the first byte
803251881Speter                   * after the hunk text. */
804251881Speter                  end = last_line;
805251881Speter                }
806251881Speter
807251881Speter              if (original_end == 0)
808251881Speter                original_end = end;
809251881Speter              if (modified_end == 0)
810251881Speter                modified_end = end;
811251881Speter              break; /* Hunk was empty or has been read. */
812251881Speter            }
813251881Speter        }
814251881Speter      else
815251881Speter        {
816251881Speter          if (starts_with(line->data, text_atat))
817251881Speter            {
818251881Speter              /* Looks like we have a hunk header, try to rip it apart. */
819251881Speter              in_hunk = parse_hunk_header(line->data, *hunk, text_atat,
820251881Speter                                          iterpool);
821251881Speter              if (in_hunk)
822251881Speter                {
823251881Speter                  original_lines = (*hunk)->original_length;
824251881Speter                  modified_lines = (*hunk)->modified_length;
825251881Speter                  *is_property = FALSE;
826251881Speter                }
827251881Speter              }
828251881Speter          else if (starts_with(line->data, prop_atat))
829251881Speter            {
830251881Speter              /* Looks like we have a property hunk header, try to rip it
831251881Speter               * apart. */
832251881Speter              in_hunk = parse_hunk_header(line->data, *hunk, prop_atat,
833251881Speter                                          iterpool);
834251881Speter              if (in_hunk)
835251881Speter                {
836251881Speter                  original_lines = (*hunk)->original_length;
837251881Speter                  modified_lines = (*hunk)->modified_length;
838251881Speter                  *is_property = TRUE;
839251881Speter                }
840251881Speter            }
841251881Speter          else if (starts_with(line->data, "Added: "))
842251881Speter            {
843251881Speter              SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ",
844251881Speter                                      result_pool));
845251881Speter              if (*prop_name)
846251881Speter                *prop_operation = svn_diff_op_added;
847251881Speter            }
848251881Speter          else if (starts_with(line->data, "Deleted: "))
849251881Speter            {
850251881Speter              SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ",
851251881Speter                                      result_pool));
852251881Speter              if (*prop_name)
853251881Speter                *prop_operation = svn_diff_op_deleted;
854251881Speter            }
855251881Speter          else if (starts_with(line->data, "Modified: "))
856251881Speter            {
857251881Speter              SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ",
858251881Speter                                      result_pool));
859251881Speter              if (*prop_name)
860251881Speter                *prop_operation = svn_diff_op_modified;
861251881Speter            }
862251881Speter          else if (starts_with(line->data, minus)
863251881Speter                   || starts_with(line->data, "diff --git "))
864251881Speter            /* This could be a header of another patch. Bail out. */
865251881Speter            break;
866251881Speter        }
867251881Speter    }
868251881Speter  /* Check for the line length since a file may not have a newline at the
869251881Speter   * end and we depend upon the last line to be an empty one. */
870251881Speter  while (! eof || line->len > 0);
871251881Speter  svn_pool_destroy(iterpool);
872251881Speter
873251881Speter  if (! eof)
874251881Speter    /* Rewind to the start of the line just read, so subsequent calls
875251881Speter     * to this function or svn_diff_parse_next_patch() don't end
876251881Speter     * up skipping the line -- it may contain a patch or hunk header. */
877251881Speter    SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
878251881Speter
879251881Speter  if (hunk_seen && start < end)
880251881Speter    {
881251881Speter      (*hunk)->patch = patch;
882251881Speter      (*hunk)->apr_file = apr_file;
883251881Speter      (*hunk)->leading_context = leading_context;
884251881Speter      (*hunk)->trailing_context = trailing_context;
885251881Speter      (*hunk)->diff_text_range.start = start;
886251881Speter      (*hunk)->diff_text_range.current = start;
887251881Speter      (*hunk)->diff_text_range.end = end;
888251881Speter      (*hunk)->original_text_range.start = start;
889251881Speter      (*hunk)->original_text_range.current = start;
890251881Speter      (*hunk)->original_text_range.end = original_end;
891251881Speter      (*hunk)->modified_text_range.start = start;
892251881Speter      (*hunk)->modified_text_range.current = start;
893251881Speter      (*hunk)->modified_text_range.end = modified_end;
894251881Speter    }
895251881Speter  else
896251881Speter    /* Something went wrong, just discard the result. */
897251881Speter    *hunk = NULL;
898251881Speter
899251881Speter  return SVN_NO_ERROR;
900251881Speter}
901251881Speter
902251881Speter/* Compare function for sorting hunks after parsing.
903251881Speter * We sort hunks by their original line offset. */
904251881Speterstatic int
905251881Spetercompare_hunks(const void *a, const void *b)
906251881Speter{
907251881Speter  const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a);
908251881Speter  const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b);
909251881Speter
910251881Speter  if (ha->original_start < hb->original_start)
911251881Speter    return -1;
912251881Speter  if (ha->original_start > hb->original_start)
913251881Speter    return 1;
914251881Speter  return 0;
915251881Speter}
916251881Speter
917251881Speter/* Possible states of the diff header parser. */
918251881Speterenum parse_state
919251881Speter{
920251881Speter   state_start,           /* initial */
921251881Speter   state_git_diff_seen,   /* diff --git */
922251881Speter   state_git_tree_seen,   /* a tree operation, rather then content change */
923251881Speter   state_git_minus_seen,  /* --- /dev/null; or --- a/ */
924251881Speter   state_git_plus_seen,   /* +++ /dev/null; or +++ a/ */
925251881Speter   state_move_from_seen,  /* rename from foo.c */
926251881Speter   state_copy_from_seen,  /* copy from foo.c */
927251881Speter   state_minus_seen,      /* --- foo.c */
928251881Speter   state_unidiff_found,   /* valid start of a regular unidiff header */
929251881Speter   state_git_header_found /* valid start of a --git diff header */
930251881Speter};
931251881Speter
932251881Speter/* Data type describing a valid state transition of the parser. */
933251881Speterstruct transition
934251881Speter{
935251881Speter  const char *expected_input;
936251881Speter  enum parse_state required_state;
937251881Speter
938251881Speter  /* A callback called upon each parser state transition. */
939251881Speter  svn_error_t *(*fn)(enum parse_state *new_state, char *input,
940251881Speter                     svn_patch_t *patch, apr_pool_t *result_pool,
941251881Speter                     apr_pool_t *scratch_pool);
942251881Speter};
943251881Speter
944251881Speter/* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */
945251881Speterstatic svn_error_t *
946251881Spetergrab_filename(const char **file_name, const char *line, apr_pool_t *result_pool,
947251881Speter              apr_pool_t *scratch_pool)
948251881Speter{
949251881Speter  const char *utf8_path;
950251881Speter  const char *canon_path;
951251881Speter
952251881Speter  /* Grab the filename and encode it in UTF-8. */
953251881Speter  /* TODO: Allow specifying the patch file's encoding.
954251881Speter   *       For now, we assume its encoding is native. */
955251881Speter  /* ### This can fail if the filename cannot be represented in the current
956251881Speter   * ### locale's encoding. */
957251881Speter  SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path,
958251881Speter                                  line,
959251881Speter                                  scratch_pool));
960251881Speter
961251881Speter  /* Canonicalize the path name. */
962251881Speter  canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool);
963251881Speter
964251881Speter  *file_name = apr_pstrdup(result_pool, canon_path);
965251881Speter
966251881Speter  return SVN_NO_ERROR;
967251881Speter}
968251881Speter
969251881Speter/* Parse the '--- ' line of a regular unidiff. */
970251881Speterstatic svn_error_t *
971251881Speterdiff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
972251881Speter           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
973251881Speter{
974251881Speter  /* If we can find a tab, it separates the filename from
975251881Speter   * the rest of the line which we can discard. */
976251881Speter  char *tab = strchr(line, '\t');
977251881Speter  if (tab)
978251881Speter    *tab = '\0';
979251881Speter
980251881Speter  SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "),
981251881Speter                        result_pool, scratch_pool));
982251881Speter
983251881Speter  *new_state = state_minus_seen;
984251881Speter
985251881Speter  return SVN_NO_ERROR;
986251881Speter}
987251881Speter
988251881Speter/* Parse the '+++ ' line of a regular unidiff. */
989251881Speterstatic svn_error_t *
990251881Speterdiff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
991251881Speter           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
992251881Speter{
993251881Speter  /* If we can find a tab, it separates the filename from
994251881Speter   * the rest of the line which we can discard. */
995251881Speter  char *tab = strchr(line, '\t');
996251881Speter  if (tab)
997251881Speter    *tab = '\0';
998251881Speter
999251881Speter  SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "),
1000251881Speter                        result_pool, scratch_pool));
1001251881Speter
1002251881Speter  *new_state = state_unidiff_found;
1003251881Speter
1004251881Speter  return SVN_NO_ERROR;
1005251881Speter}
1006251881Speter
1007251881Speter/* Parse the first line of a git extended unidiff. */
1008251881Speterstatic svn_error_t *
1009251881Spetergit_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1010251881Speter          apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1011251881Speter{
1012251881Speter  const char *old_path_start;
1013251881Speter  char *old_path_end;
1014251881Speter  const char *new_path_start;
1015251881Speter  const char *new_path_end;
1016251881Speter  char *new_path_marker;
1017251881Speter  const char *old_path_marker;
1018251881Speter
1019251881Speter  /* ### Add handling of escaped paths
1020251881Speter   * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html:
1021251881Speter   *
1022251881Speter   * TAB, LF, double quote and backslash characters in pathnames are
1023251881Speter   * represented as \t, \n, \" and \\, respectively. If there is need for
1024251881Speter   * such substitution then the whole pathname is put in double quotes.
1025251881Speter   */
1026251881Speter
1027251881Speter  /* Our line should look like this: 'diff --git a/path b/path'.
1028251881Speter   *
1029251881Speter   * If we find any deviations from that format, we return with state reset
1030251881Speter   * to start.
1031251881Speter   */
1032251881Speter  old_path_marker = strstr(line, " a/");
1033251881Speter
1034251881Speter  if (! old_path_marker)
1035251881Speter    {
1036251881Speter      *new_state = state_start;
1037251881Speter      return SVN_NO_ERROR;
1038251881Speter    }
1039251881Speter
1040251881Speter  if (! *(old_path_marker + 3))
1041251881Speter    {
1042251881Speter      *new_state = state_start;
1043251881Speter      return SVN_NO_ERROR;
1044251881Speter    }
1045251881Speter
1046251881Speter  new_path_marker = strstr(old_path_marker, " b/");
1047251881Speter
1048251881Speter  if (! new_path_marker)
1049251881Speter    {
1050251881Speter      *new_state = state_start;
1051251881Speter      return SVN_NO_ERROR;
1052251881Speter    }
1053251881Speter
1054251881Speter  if (! *(new_path_marker + 3))
1055251881Speter    {
1056251881Speter      *new_state = state_start;
1057251881Speter      return SVN_NO_ERROR;
1058251881Speter    }
1059251881Speter
1060251881Speter  /* By now, we know that we have a line on the form '--git diff a/.+ b/.+'
1061251881Speter   * We only need the filenames when we have deleted or added empty
1062251881Speter   * files. In those cases the old_path and new_path is identical on the
1063251881Speter   * 'diff --git' line.  For all other cases we fetch the filenames from
1064251881Speter   * other header lines. */
1065251881Speter  old_path_start = line + STRLEN_LITERAL("diff --git a/");
1066251881Speter  new_path_end = line + strlen(line);
1067251881Speter  new_path_start = old_path_start;
1068251881Speter
1069251881Speter  while (TRUE)
1070251881Speter    {
1071251881Speter      ptrdiff_t len_old;
1072251881Speter      ptrdiff_t len_new;
1073251881Speter
1074251881Speter      new_path_marker = strstr(new_path_start, " b/");
1075251881Speter
1076251881Speter      /* No new path marker, bail out. */
1077251881Speter      if (! new_path_marker)
1078251881Speter        break;
1079251881Speter
1080251881Speter      old_path_end = new_path_marker;
1081251881Speter      new_path_start = new_path_marker + STRLEN_LITERAL(" b/");
1082251881Speter
1083251881Speter      /* No path after the marker. */
1084251881Speter      if (! *new_path_start)
1085251881Speter        break;
1086251881Speter
1087251881Speter      len_old = old_path_end - old_path_start;
1088251881Speter      len_new = new_path_end - new_path_start;
1089251881Speter
1090251881Speter      /* Are the paths before and after the " b/" marker the same? */
1091251881Speter      if (len_old == len_new
1092251881Speter          && ! strncmp(old_path_start, new_path_start, len_old))
1093251881Speter        {
1094251881Speter          *old_path_end = '\0';
1095251881Speter          SVN_ERR(grab_filename(&patch->old_filename, old_path_start,
1096251881Speter                                result_pool, scratch_pool));
1097251881Speter
1098251881Speter          SVN_ERR(grab_filename(&patch->new_filename, new_path_start,
1099251881Speter                                result_pool, scratch_pool));
1100251881Speter          break;
1101251881Speter        }
1102251881Speter    }
1103251881Speter
1104251881Speter  /* We assume that the path is only modified until we've found a 'tree'
1105251881Speter   * header */
1106251881Speter  patch->operation = svn_diff_op_modified;
1107251881Speter
1108251881Speter  *new_state = state_git_diff_seen;
1109251881Speter  return SVN_NO_ERROR;
1110251881Speter}
1111251881Speter
1112251881Speter/* Parse the '--- ' line of a git extended unidiff. */
1113251881Speterstatic svn_error_t *
1114251881Spetergit_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1115251881Speter          apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1116251881Speter{
1117251881Speter  /* If we can find a tab, it separates the filename from
1118251881Speter   * the rest of the line which we can discard. */
1119251881Speter  char *tab = strchr(line, '\t');
1120251881Speter  if (tab)
1121251881Speter    *tab = '\0';
1122251881Speter
1123251881Speter  if (starts_with(line, "--- /dev/null"))
1124251881Speter    SVN_ERR(grab_filename(&patch->old_filename, "/dev/null",
1125251881Speter                          result_pool, scratch_pool));
1126251881Speter  else
1127251881Speter    SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"),
1128251881Speter                          result_pool, scratch_pool));
1129251881Speter
1130251881Speter  *new_state = state_git_minus_seen;
1131251881Speter  return SVN_NO_ERROR;
1132251881Speter}
1133251881Speter
1134251881Speter/* Parse the '+++ ' line of a git extended unidiff. */
1135251881Speterstatic svn_error_t *
1136251881Spetergit_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1137251881Speter          apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1138251881Speter{
1139251881Speter  /* If we can find a tab, it separates the filename from
1140251881Speter   * the rest of the line which we can discard. */
1141251881Speter  char *tab = strchr(line, '\t');
1142251881Speter  if (tab)
1143251881Speter    *tab = '\0';
1144251881Speter
1145251881Speter  if (starts_with(line, "+++ /dev/null"))
1146251881Speter    SVN_ERR(grab_filename(&patch->new_filename, "/dev/null",
1147251881Speter                          result_pool, scratch_pool));
1148251881Speter  else
1149251881Speter    SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"),
1150251881Speter                          result_pool, scratch_pool));
1151251881Speter
1152251881Speter  *new_state = state_git_header_found;
1153251881Speter  return SVN_NO_ERROR;
1154251881Speter}
1155251881Speter
1156251881Speter/* Parse the 'rename from ' line of a git extended unidiff. */
1157251881Speterstatic svn_error_t *
1158251881Spetergit_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1159251881Speter              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1160251881Speter{
1161251881Speter  SVN_ERR(grab_filename(&patch->old_filename,
1162251881Speter                        line + STRLEN_LITERAL("rename from "),
1163251881Speter                        result_pool, scratch_pool));
1164251881Speter
1165251881Speter  *new_state = state_move_from_seen;
1166251881Speter  return SVN_NO_ERROR;
1167251881Speter}
1168251881Speter
1169251881Speter/* Parse the 'rename to ' line of a git extended unidiff. */
1170251881Speterstatic svn_error_t *
1171251881Spetergit_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1172251881Speter            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1173251881Speter{
1174251881Speter  SVN_ERR(grab_filename(&patch->new_filename,
1175251881Speter                        line + STRLEN_LITERAL("rename to "),
1176251881Speter                        result_pool, scratch_pool));
1177251881Speter
1178251881Speter  patch->operation = svn_diff_op_moved;
1179251881Speter
1180251881Speter  *new_state = state_git_tree_seen;
1181251881Speter  return SVN_NO_ERROR;
1182251881Speter}
1183251881Speter
1184251881Speter/* Parse the 'copy from ' line of a git extended unidiff. */
1185251881Speterstatic svn_error_t *
1186251881Spetergit_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1187251881Speter              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1188251881Speter{
1189251881Speter  SVN_ERR(grab_filename(&patch->old_filename,
1190251881Speter                        line + STRLEN_LITERAL("copy from "),
1191251881Speter                        result_pool, scratch_pool));
1192251881Speter
1193251881Speter  *new_state = state_copy_from_seen;
1194251881Speter  return SVN_NO_ERROR;
1195251881Speter}
1196251881Speter
1197251881Speter/* Parse the 'copy to ' line of a git extended unidiff. */
1198251881Speterstatic svn_error_t *
1199251881Spetergit_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1200251881Speter            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1201251881Speter{
1202251881Speter  SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "),
1203251881Speter                        result_pool, scratch_pool));
1204251881Speter
1205251881Speter  patch->operation = svn_diff_op_copied;
1206251881Speter
1207251881Speter  *new_state = state_git_tree_seen;
1208251881Speter  return SVN_NO_ERROR;
1209251881Speter}
1210251881Speter
1211251881Speter/* Parse the 'new file ' line of a git extended unidiff. */
1212251881Speterstatic svn_error_t *
1213251881Spetergit_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1214251881Speter             apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1215251881Speter{
1216251881Speter  patch->operation = svn_diff_op_added;
1217251881Speter
1218251881Speter  /* Filename already retrieved from diff --git header. */
1219251881Speter
1220251881Speter  *new_state = state_git_tree_seen;
1221251881Speter  return SVN_NO_ERROR;
1222251881Speter}
1223251881Speter
1224251881Speter/* Parse the 'deleted file ' line of a git extended unidiff. */
1225251881Speterstatic svn_error_t *
1226251881Spetergit_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1227251881Speter                 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1228251881Speter{
1229251881Speter  patch->operation = svn_diff_op_deleted;
1230251881Speter
1231251881Speter  /* Filename already retrieved from diff --git header. */
1232251881Speter
1233251881Speter  *new_state = state_git_tree_seen;
1234251881Speter  return SVN_NO_ERROR;
1235251881Speter}
1236251881Speter
1237251881Speter/* Add a HUNK associated with the property PROP_NAME to PATCH. */
1238251881Speterstatic svn_error_t *
1239251881Speteradd_property_hunk(svn_patch_t *patch, const char *prop_name,
1240251881Speter                  svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation,
1241251881Speter                  apr_pool_t *result_pool)
1242251881Speter{
1243251881Speter  svn_prop_patch_t *prop_patch;
1244251881Speter
1245251881Speter  prop_patch = svn_hash_gets(patch->prop_patches, prop_name);
1246251881Speter
1247251881Speter  if (! prop_patch)
1248251881Speter    {
1249251881Speter      prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t));
1250251881Speter      prop_patch->name = prop_name;
1251251881Speter      prop_patch->operation = operation;
1252251881Speter      prop_patch->hunks = apr_array_make(result_pool, 1,
1253251881Speter                                         sizeof(svn_diff_hunk_t *));
1254251881Speter
1255251881Speter      svn_hash_sets(patch->prop_patches, prop_name, prop_patch);
1256251881Speter    }
1257251881Speter
1258251881Speter  APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk;
1259251881Speter
1260251881Speter  return SVN_NO_ERROR;
1261251881Speter}
1262251881Speter
1263251881Speterstruct svn_patch_file_t
1264251881Speter{
1265251881Speter  /* The APR file handle to the patch file. */
1266251881Speter  apr_file_t *apr_file;
1267251881Speter
1268251881Speter  /* The file offset at which the next patch is expected. */
1269251881Speter  apr_off_t next_patch_offset;
1270251881Speter};
1271251881Speter
1272251881Spetersvn_error_t *
1273251881Spetersvn_diff_open_patch_file(svn_patch_file_t **patch_file,
1274251881Speter                         const char *local_abspath,
1275251881Speter                         apr_pool_t *result_pool)
1276251881Speter{
1277251881Speter  svn_patch_file_t *p;
1278251881Speter
1279251881Speter  p = apr_palloc(result_pool, sizeof(*p));
1280251881Speter  SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath,
1281251881Speter                           APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
1282251881Speter                           result_pool));
1283251881Speter  p->next_patch_offset = 0;
1284251881Speter  *patch_file = p;
1285251881Speter
1286251881Speter  return SVN_NO_ERROR;
1287251881Speter}
1288251881Speter
1289251881Speter/* Parse hunks from APR_FILE and store them in PATCH->HUNKS.
1290251881Speter * Parsing stops if no valid next hunk can be found.
1291251881Speter * If IGNORE_WHITESPACE is TRUE, lines without
1292251881Speter * leading spaces will be treated as context lines.
1293251881Speter * Allocate results in RESULT_POOL.
1294251881Speter * Use SCRATCH_POOL for temporary allocations. */
1295251881Speterstatic svn_error_t *
1296251881Speterparse_hunks(svn_patch_t *patch, apr_file_t *apr_file,
1297251881Speter            svn_boolean_t ignore_whitespace,
1298251881Speter            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1299251881Speter{
1300251881Speter  svn_diff_hunk_t *hunk;
1301251881Speter  svn_boolean_t is_property;
1302251881Speter  const char *last_prop_name;
1303251881Speter  const char *prop_name;
1304251881Speter  svn_diff_operation_kind_t prop_operation;
1305251881Speter  apr_pool_t *iterpool;
1306251881Speter
1307251881Speter  last_prop_name = NULL;
1308251881Speter
1309251881Speter  patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *));
1310251881Speter  patch->prop_patches = apr_hash_make(result_pool);
1311251881Speter  iterpool = svn_pool_create(scratch_pool);
1312251881Speter  do
1313251881Speter    {
1314251881Speter      svn_pool_clear(iterpool);
1315251881Speter
1316251881Speter      SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation,
1317251881Speter                              patch, apr_file, ignore_whitespace, result_pool,
1318251881Speter                              iterpool));
1319251881Speter
1320251881Speter      if (hunk && is_property)
1321251881Speter        {
1322251881Speter          if (! prop_name)
1323251881Speter            prop_name = last_prop_name;
1324251881Speter          else
1325251881Speter            last_prop_name = prop_name;
1326299742Sdim
1327299742Sdim          /* Skip svn:mergeinfo properties.
1328299742Sdim           * Mergeinfo data cannot be represented as a hunk and
1329299742Sdim           * is therefore stored in PATCH itself. */
1330299742Sdim          if (strcmp(prop_name, SVN_PROP_MERGEINFO) == 0)
1331299742Sdim            continue;
1332299742Sdim
1333251881Speter          SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation,
1334251881Speter                                    result_pool));
1335251881Speter        }
1336251881Speter      else if (hunk)
1337251881Speter        {
1338251881Speter          APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk;
1339251881Speter          last_prop_name = NULL;
1340251881Speter        }
1341251881Speter
1342251881Speter    }
1343251881Speter  while (hunk);
1344251881Speter  svn_pool_destroy(iterpool);
1345251881Speter
1346251881Speter  return SVN_NO_ERROR;
1347251881Speter}
1348251881Speter
1349251881Speter/* State machine for the diff header parser.
1350251881Speter * Expected Input   Required state          Function to call */
1351251881Speterstatic struct transition transitions[] =
1352251881Speter{
1353251881Speter  {"--- ",          state_start,            diff_minus},
1354251881Speter  {"+++ ",          state_minus_seen,       diff_plus},
1355251881Speter  {"diff --git",    state_start,            git_start},
1356251881Speter  {"--- a/",        state_git_diff_seen,    git_minus},
1357251881Speter  {"--- a/",        state_git_tree_seen,    git_minus},
1358251881Speter  {"--- /dev/null", state_git_tree_seen,    git_minus},
1359251881Speter  {"+++ b/",        state_git_minus_seen,   git_plus},
1360251881Speter  {"+++ /dev/null", state_git_minus_seen,   git_plus},
1361251881Speter  {"rename from ",  state_git_diff_seen,    git_move_from},
1362251881Speter  {"rename to ",    state_move_from_seen,   git_move_to},
1363251881Speter  {"copy from ",    state_git_diff_seen,    git_copy_from},
1364251881Speter  {"copy to ",      state_copy_from_seen,   git_copy_to},
1365251881Speter  {"new file ",     state_git_diff_seen,    git_new_file},
1366251881Speter  {"deleted file ", state_git_diff_seen,    git_deleted_file},
1367251881Speter};
1368251881Speter
1369251881Spetersvn_error_t *
1370299742Sdimsvn_diff_parse_next_patch(svn_patch_t **patch_p,
1371251881Speter                          svn_patch_file_t *patch_file,
1372251881Speter                          svn_boolean_t reverse,
1373251881Speter                          svn_boolean_t ignore_whitespace,
1374251881Speter                          apr_pool_t *result_pool,
1375251881Speter                          apr_pool_t *scratch_pool)
1376251881Speter{
1377251881Speter  apr_off_t pos, last_line;
1378251881Speter  svn_boolean_t eof;
1379251881Speter  svn_boolean_t line_after_tree_header_read = FALSE;
1380251881Speter  apr_pool_t *iterpool;
1381299742Sdim  svn_patch_t *patch;
1382251881Speter  enum parse_state state = state_start;
1383251881Speter
1384251881Speter  if (apr_file_eof(patch_file->apr_file) == APR_EOF)
1385251881Speter    {
1386251881Speter      /* No more patches here. */
1387299742Sdim      *patch_p = NULL;
1388251881Speter      return SVN_NO_ERROR;
1389251881Speter    }
1390251881Speter
1391299742Sdim  patch = apr_pcalloc(result_pool, sizeof(*patch));
1392251881Speter
1393251881Speter  pos = patch_file->next_patch_offset;
1394251881Speter  SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool));
1395251881Speter
1396251881Speter  iterpool = svn_pool_create(scratch_pool);
1397251881Speter  do
1398251881Speter    {
1399251881Speter      svn_stringbuf_t *line;
1400251881Speter      svn_boolean_t valid_header_line = FALSE;
1401251881Speter      int i;
1402251881Speter
1403251881Speter      svn_pool_clear(iterpool);
1404251881Speter
1405251881Speter      /* Remember the current line's offset, and read the line. */
1406251881Speter      last_line = pos;
1407251881Speter      SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof,
1408251881Speter                                   APR_SIZE_MAX, iterpool, iterpool));
1409251881Speter
1410251881Speter      if (! eof)
1411251881Speter        {
1412251881Speter          /* Update line offset for next iteration. */
1413251881Speter          pos = 0;
1414251881Speter          SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, &pos,
1415251881Speter                                   iterpool));
1416251881Speter        }
1417251881Speter
1418251881Speter      /* Run the state machine. */
1419251881Speter      for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++)
1420251881Speter        {
1421251881Speter          if (starts_with(line->data, transitions[i].expected_input)
1422251881Speter              && state == transitions[i].required_state)
1423251881Speter            {
1424299742Sdim              SVN_ERR(transitions[i].fn(&state, line->data, patch,
1425251881Speter                                        result_pool, iterpool));
1426251881Speter              valid_header_line = TRUE;
1427251881Speter              break;
1428251881Speter            }
1429251881Speter        }
1430251881Speter
1431251881Speter      if (state == state_unidiff_found || state == state_git_header_found)
1432251881Speter        {
1433251881Speter          /* We have a valid diff header, yay! */
1434251881Speter          break;
1435251881Speter        }
1436251881Speter      else if (state == state_git_tree_seen && line_after_tree_header_read)
1437251881Speter        {
1438251881Speter          /* git patches can contain an index line after the file mode line */
1439251881Speter          if (!starts_with(line->data, "index "))
1440251881Speter          {
1441251881Speter            /* We have a valid diff header for a patch with only tree changes.
1442251881Speter             * Rewind to the start of the line just read, so subsequent calls
1443251881Speter             * to this function don't end up skipping the line -- it may
1444251881Speter             * contain a patch. */
1445251881Speter            SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
1446251881Speter                    scratch_pool));
1447251881Speter            break;
1448251881Speter          }
1449251881Speter        }
1450251881Speter      else if (state == state_git_tree_seen)
1451251881Speter        {
1452251881Speter          line_after_tree_header_read = TRUE;
1453251881Speter        }
1454251881Speter      else if (! valid_header_line && state != state_start
1455289166Speter               && state != state_git_diff_seen
1456251881Speter               && !starts_with(line->data, "index "))
1457251881Speter        {
1458251881Speter          /* We've encountered an invalid diff header.
1459251881Speter           *
1460251881Speter           * Rewind to the start of the line just read - it may be a new
1461251881Speter           * header that begins there. */
1462251881Speter          SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
1463251881Speter                                   scratch_pool));
1464251881Speter          state = state_start;
1465251881Speter        }
1466251881Speter
1467251881Speter    }
1468251881Speter  while (! eof);
1469251881Speter
1470299742Sdim  patch->reverse = reverse;
1471251881Speter  if (reverse)
1472251881Speter    {
1473251881Speter      const char *temp;
1474299742Sdim      temp = patch->old_filename;
1475299742Sdim      patch->old_filename = patch->new_filename;
1476299742Sdim      patch->new_filename = temp;
1477251881Speter    }
1478251881Speter
1479299742Sdim  if (patch->old_filename == NULL || patch->new_filename == NULL)
1480251881Speter    {
1481251881Speter      /* Something went wrong, just discard the result. */
1482299742Sdim      patch = NULL;
1483251881Speter    }
1484251881Speter  else
1485299742Sdim    SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace,
1486251881Speter                        result_pool, iterpool));
1487251881Speter
1488251881Speter  svn_pool_destroy(iterpool);
1489251881Speter
1490251881Speter  patch_file->next_patch_offset = 0;
1491251881Speter  SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR,
1492251881Speter                           &patch_file->next_patch_offset, scratch_pool));
1493251881Speter
1494299742Sdim  if (patch)
1495251881Speter    {
1496251881Speter      /* Usually, hunks appear in the patch sorted by their original line
1497251881Speter       * offset. But just in case they weren't parsed in this order for
1498251881Speter       * some reason, we sort them so that our caller can assume that hunks
1499251881Speter       * are sorted as if parsed from a usual patch. */
1500299742Sdim      svn_sort__array(patch->hunks, compare_hunks);
1501251881Speter    }
1502251881Speter
1503299742Sdim  *patch_p = patch;
1504251881Speter  return SVN_NO_ERROR;
1505251881Speter}
1506251881Speter
1507251881Spetersvn_error_t *
1508251881Spetersvn_diff_close_patch_file(svn_patch_file_t *patch_file,
1509251881Speter                          apr_pool_t *scratch_pool)
1510251881Speter{
1511251881Speter  return svn_error_trace(svn_io_file_close(patch_file->apr_file,
1512251881Speter                                           scratch_pool));
1513251881Speter}
1514