parse-diff.c revision 362181
1/*
2 * parse-diff.c: functions for parsing diff files
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24#include <stdlib.h>
25#include <stddef.h>
26#include <string.h>
27
28#include "svn_hash.h"
29#include "svn_types.h"
30#include "svn_error.h"
31#include "svn_io.h"
32#include "svn_pools.h"
33#include "svn_props.h"
34#include "svn_string.h"
35#include "svn_utf.h"
36#include "svn_dirent_uri.h"
37#include "svn_diff.h"
38#include "svn_ctype.h"
39#include "svn_mergeinfo.h"
40
41#include "private/svn_eol_private.h"
42#include "private/svn_dep_compat.h"
43#include "private/svn_diff_private.h"
44#include "private/svn_sorts_private.h"
45
46#include "diff.h"
47
48#include "svn_private_config.h"
49
50/* Helper macro for readability */
51#define starts_with(str, start)  \
52  (strncmp((str), (start), strlen(start)) == 0)
53
54/* Like strlen() but for string literals. */
55#define STRLEN_LITERAL(str) (sizeof(str) - 1)
56
57/* This struct describes a range within a file, as well as the
58 * current cursor position within the range. All numbers are in bytes. */
59struct svn_diff__hunk_range {
60  apr_off_t start;
61  apr_off_t end;
62  apr_off_t current;
63};
64
65struct svn_diff_hunk_t {
66  /* The patch this hunk belongs to. */
67  const svn_patch_t *patch;
68
69  /* APR file handle to the patch file this hunk came from. */
70  apr_file_t *apr_file;
71
72  /* Whether the hunk was interpreted as pretty-print mergeinfo. If so,
73     the hunk content is in PATCH and the rest of this hunk object is
74     mostly uninitialized. */
75  svn_boolean_t is_pretty_print_mergeinfo;
76
77  /* Ranges used to keep track of this hunk's texts positions within
78   * the patch file. */
79  struct svn_diff__hunk_range diff_text_range;
80  struct svn_diff__hunk_range original_text_range;
81  struct svn_diff__hunk_range modified_text_range;
82
83  /* Hunk ranges as they appeared in the patch file.
84   * All numbers are lines, not bytes. */
85  svn_linenum_t original_start;
86  svn_linenum_t original_length;
87  svn_linenum_t modified_start;
88  svn_linenum_t modified_length;
89
90  /* Number of lines of leading and trailing hunk context. */
91  svn_linenum_t leading_context;
92  svn_linenum_t trailing_context;
93
94  /* Did we see a 'file does not end with eol' marker in this hunk? */
95  svn_boolean_t original_no_final_eol;
96  svn_boolean_t modified_no_final_eol;
97
98  /* Fuzz penalty, triggered by bad patch targets */
99  svn_linenum_t original_fuzz;
100  svn_linenum_t modified_fuzz;
101};
102
103struct svn_diff_binary_patch_t {
104  /* The patch this hunk belongs to. */
105  const svn_patch_t *patch;
106
107  /* APR file handle to the patch file this hunk came from. */
108  apr_file_t *apr_file;
109
110  /* Offsets inside APR_FILE representing the location of the patch */
111  apr_off_t src_start;
112  apr_off_t src_end;
113  svn_filesize_t src_filesize; /* Expanded/final size */
114
115  /* Offsets inside APR_FILE representing the location of the patch */
116  apr_off_t dst_start;
117  apr_off_t dst_end;
118  svn_filesize_t dst_filesize; /* Expanded/final size */
119};
120
121/* Common guts of svn_diff_hunk__create_adds_single_line() and
122 * svn_diff_hunk__create_deletes_single_line().
123 *
124 * ADD is TRUE if adding and FALSE if deleting.
125 */
126static svn_error_t *
127add_or_delete_single_line(svn_diff_hunk_t **hunk_out,
128                          const char *line,
129                          const svn_patch_t *patch,
130                          svn_boolean_t add,
131                          apr_pool_t *result_pool,
132                          apr_pool_t *scratch_pool)
133{
134  svn_diff_hunk_t *hunk = apr_pcalloc(result_pool, sizeof(*hunk));
135  static const char *hunk_header[] = { "@@ -1 +0,0 @@\n", "@@ -0,0 +1 @@\n" };
136  const apr_size_t header_len = strlen(hunk_header[add]);
137  const apr_size_t len = strlen(line);
138  const apr_size_t end = header_len + (1 + len); /* The +1 is for the \n. */
139  svn_stringbuf_t *buf = svn_stringbuf_create_ensure(end + 1, scratch_pool);
140
141  hunk->patch = patch;
142
143  /* hunk->apr_file is created below. */
144
145  hunk->diff_text_range.start = header_len;
146  hunk->diff_text_range.current = header_len;
147
148  if (add)
149    {
150      hunk->original_text_range.start = 0; /* There's no "original" text. */
151      hunk->original_text_range.current = 0;
152      hunk->original_text_range.end = 0;
153      hunk->original_no_final_eol = FALSE;
154
155      hunk->modified_text_range.start = header_len;
156      hunk->modified_text_range.current = header_len;
157      hunk->modified_text_range.end = end;
158      hunk->modified_no_final_eol = TRUE;
159
160      hunk->original_start = 0;
161      hunk->original_length = 0;
162
163      hunk->modified_start = 1;
164      hunk->modified_length = 1;
165    }
166  else /* delete */
167    {
168      hunk->original_text_range.start = header_len;
169      hunk->original_text_range.current = header_len;
170      hunk->original_text_range.end = end;
171      hunk->original_no_final_eol = TRUE;
172
173      hunk->modified_text_range.start = 0; /* There's no "original" text. */
174      hunk->modified_text_range.current = 0;
175      hunk->modified_text_range.end = 0;
176      hunk->modified_no_final_eol = FALSE;
177
178      hunk->original_start = 1;
179      hunk->original_length = 1;
180
181      hunk->modified_start = 0;
182      hunk->modified_length = 0; /* setting to '1' works too */
183    }
184
185  hunk->leading_context = 0;
186  hunk->trailing_context = 0;
187
188  /* Create APR_FILE and put just a hunk in it (without a diff header).
189   * Save the offset of the last byte of the diff line. */
190  svn_stringbuf_appendbytes(buf, hunk_header[add], header_len);
191  svn_stringbuf_appendbyte(buf, add ? '+' : '-');
192  svn_stringbuf_appendbytes(buf, line, len);
193  svn_stringbuf_appendbyte(buf, '\n');
194  svn_stringbuf_appendcstr(buf, "\\ No newline at end of hunk\n");
195
196  hunk->diff_text_range.end = buf->len;
197
198  SVN_ERR(svn_io_open_unique_file3(&hunk->apr_file, NULL /* filename */,
199                                   NULL /* system tempdir */,
200                                   svn_io_file_del_on_pool_cleanup,
201                                   result_pool, scratch_pool));
202  SVN_ERR(svn_io_file_write_full(hunk->apr_file,
203                                 buf->data, buf->len,
204                                 NULL, scratch_pool));
205  /* No need to seek. */
206
207  *hunk_out = hunk;
208  return SVN_NO_ERROR;
209}
210
211svn_error_t *
212svn_diff_hunk__create_adds_single_line(svn_diff_hunk_t **hunk_out,
213                                       const char *line,
214                                       const svn_patch_t *patch,
215                                       apr_pool_t *result_pool,
216                                       apr_pool_t *scratch_pool)
217{
218  SVN_ERR(add_or_delete_single_line(hunk_out, line, patch,
219                                    (!patch->reverse),
220                                    result_pool, scratch_pool));
221  return SVN_NO_ERROR;
222}
223
224svn_error_t *
225svn_diff_hunk__create_deletes_single_line(svn_diff_hunk_t **hunk_out,
226                                          const char *line,
227                                          const svn_patch_t *patch,
228                                          apr_pool_t *result_pool,
229                                          apr_pool_t *scratch_pool)
230{
231  SVN_ERR(add_or_delete_single_line(hunk_out, line, patch,
232                                    patch->reverse,
233                                    result_pool, scratch_pool));
234  return SVN_NO_ERROR;
235}
236
237void
238svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk)
239{
240  hunk->diff_text_range.current = hunk->diff_text_range.start;
241}
242
243void
244svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk)
245{
246  if (hunk->patch->reverse)
247    hunk->modified_text_range.current = hunk->modified_text_range.start;
248  else
249    hunk->original_text_range.current = hunk->original_text_range.start;
250}
251
252void
253svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk)
254{
255  if (hunk->patch->reverse)
256    hunk->original_text_range.current = hunk->original_text_range.start;
257  else
258    hunk->modified_text_range.current = hunk->modified_text_range.start;
259}
260
261svn_linenum_t
262svn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk)
263{
264  return hunk->patch->reverse ? hunk->modified_start : hunk->original_start;
265}
266
267svn_linenum_t
268svn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk)
269{
270  return hunk->patch->reverse ? hunk->modified_length : hunk->original_length;
271}
272
273svn_linenum_t
274svn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk)
275{
276  return hunk->patch->reverse ? hunk->original_start : hunk->modified_start;
277}
278
279svn_linenum_t
280svn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk)
281{
282  return hunk->patch->reverse ? hunk->original_length : hunk->modified_length;
283}
284
285svn_linenum_t
286svn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk)
287{
288  return hunk->leading_context;
289}
290
291svn_linenum_t
292svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk)
293{
294  return hunk->trailing_context;
295}
296
297svn_linenum_t
298svn_diff_hunk__get_fuzz_penalty(const svn_diff_hunk_t *hunk)
299{
300  return hunk->patch->reverse ? hunk->original_fuzz : hunk->modified_fuzz;
301}
302
303/* Baton for the base85 stream implementation */
304struct base85_baton_t
305{
306  apr_file_t *file;
307  apr_pool_t *iterpool;
308  char buffer[52];        /* Bytes on current line */
309  apr_off_t next_pos;     /* Start position of next line */
310  apr_off_t end_pos;      /* Position after last line */
311  apr_size_t buf_size;    /* Bytes available (52 unless at eof) */
312  apr_size_t buf_pos;     /* Bytes in linebuffer */
313  svn_boolean_t done;     /* At eof? */
314};
315
316/* Implements svn_read_fn_t for the base85 read stream */
317static svn_error_t *
318read_handler_base85(void *baton, char *buffer, apr_size_t *len)
319{
320  struct base85_baton_t *b85b = baton;
321  apr_pool_t *iterpool = b85b->iterpool;
322  apr_size_t remaining = *len;
323  char *dest = buffer;
324
325  svn_pool_clear(iterpool);
326
327  if (b85b->done)
328    {
329      *len = 0;
330      return SVN_NO_ERROR;
331    }
332
333  while (remaining && (b85b->buf_size > b85b->buf_pos
334                       || b85b->next_pos < b85b->end_pos))
335    {
336      svn_stringbuf_t *line;
337      svn_boolean_t at_eof;
338
339      apr_size_t available = b85b->buf_size - b85b->buf_pos;
340      if (available)
341        {
342          apr_size_t n = (remaining < available) ? remaining : available;
343
344          memcpy(dest, b85b->buffer + b85b->buf_pos, n);
345          dest += n;
346          remaining -= n;
347          b85b->buf_pos += n;
348
349          if (!remaining)
350            return SVN_NO_ERROR; /* *len = OK */
351        }
352
353      if (b85b->next_pos >= b85b->end_pos)
354        break; /* At EOF */
355      SVN_ERR(svn_io_file_seek(b85b->file, APR_SET, &b85b->next_pos,
356                               iterpool));
357      SVN_ERR(svn_io_file_readline(b85b->file, &line, NULL, &at_eof,
358                                   APR_SIZE_MAX, iterpool, iterpool));
359      if (at_eof)
360        b85b->next_pos = b85b->end_pos;
361      else
362        {
363          SVN_ERR(svn_io_file_get_offset(&b85b->next_pos, b85b->file,
364                                         iterpool));
365        }
366
367      if (line->len && line->data[0] >= 'A' && line->data[0] <= 'Z')
368        b85b->buf_size = line->data[0] - 'A' + 1;
369      else if (line->len && line->data[0] >= 'a' && line->data[0] <= 'z')
370        b85b->buf_size = line->data[0] - 'a' + 26 + 1;
371      else
372        return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
373                                _("Unexpected data in base85 section"));
374
375      if (b85b->buf_size < 52)
376        b85b->next_pos = b85b->end_pos; /* Handle as EOF */
377
378      SVN_ERR(svn_diff__base85_decode_line(b85b->buffer, b85b->buf_size,
379                                           line->data + 1, line->len - 1,
380                                           iterpool));
381      b85b->buf_pos = 0;
382    }
383
384  *len -= remaining;
385  b85b->done = TRUE;
386
387  return SVN_NO_ERROR;
388}
389
390/* Implements svn_close_fn_t for the base85 read stream */
391static svn_error_t *
392close_handler_base85(void *baton)
393{
394  struct base85_baton_t *b85b = baton;
395
396  svn_pool_destroy(b85b->iterpool);
397
398  return SVN_NO_ERROR;
399}
400
401/* Gets a stream that reads decoded base85 data from a segment of a file.
402   The current implementation might assume that both start_pos and end_pos
403   are located at line boundaries. */
404static svn_stream_t *
405get_base85_data_stream(apr_file_t *file,
406                       apr_off_t start_pos,
407                       apr_off_t end_pos,
408                       apr_pool_t *result_pool)
409{
410  struct base85_baton_t *b85b = apr_pcalloc(result_pool, sizeof(*b85b));
411  svn_stream_t *base85s = svn_stream_create(b85b, result_pool);
412
413  b85b->file = file;
414  b85b->iterpool = svn_pool_create(result_pool);
415  b85b->next_pos = start_pos;
416  b85b->end_pos = end_pos;
417
418  svn_stream_set_read2(base85s, NULL /* only full read support */,
419                       read_handler_base85);
420  svn_stream_set_close(base85s, close_handler_base85);
421  return base85s;
422}
423
424/* Baton for the length verification stream functions */
425struct length_verify_baton_t
426{
427  svn_stream_t *inner;
428  svn_filesize_t remaining;
429};
430
431/* Implements svn_read_fn_t for the length verification stream */
432static svn_error_t *
433read_handler_length_verify(void *baton, char *buffer, apr_size_t *len)
434{
435  struct length_verify_baton_t *lvb = baton;
436  apr_size_t requested_len = *len;
437
438  SVN_ERR(svn_stream_read_full(lvb->inner, buffer, len));
439
440  if (*len > lvb->remaining)
441    return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
442                            _("Base85 data expands to longer than declared "
443                              "filesize"));
444  else if (requested_len > *len && *len != lvb->remaining)
445    return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
446                            _("Base85 data expands to smaller than declared "
447                              "filesize"));
448
449  lvb->remaining -= *len;
450
451  return SVN_NO_ERROR;
452}
453
454/* Implements svn_close_fn_t for the length verification stream */
455static svn_error_t *
456close_handler_length_verify(void *baton)
457{
458  struct length_verify_baton_t *lvb = baton;
459
460  return svn_error_trace(svn_stream_close(lvb->inner));
461}
462
463/* Gets a stream that verifies on reads that the inner stream is exactly
464   of the specified length */
465static svn_stream_t *
466get_verify_length_stream(svn_stream_t *inner,
467                         svn_filesize_t expected_size,
468                         apr_pool_t *result_pool)
469{
470  struct length_verify_baton_t *lvb = apr_palloc(result_pool, sizeof(*lvb));
471  svn_stream_t *len_stream = svn_stream_create(lvb, result_pool);
472
473  lvb->inner = inner;
474  lvb->remaining = expected_size;
475
476  svn_stream_set_read2(len_stream, NULL /* only full read support */,
477                       read_handler_length_verify);
478  svn_stream_set_close(len_stream, close_handler_length_verify);
479
480  return len_stream;
481}
482
483svn_stream_t *
484svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t *bpatch,
485                                         apr_pool_t *result_pool)
486{
487  svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->src_start,
488                                           bpatch->src_end, result_pool);
489
490  s = svn_stream_compressed(s, result_pool);
491
492  /* ### If we (ever) want to support the DELTA format, then we should hook the
493         undelta handling here */
494
495  return get_verify_length_stream(s, bpatch->src_filesize, result_pool);
496}
497
498svn_stream_t *
499svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t *bpatch,
500                                       apr_pool_t *result_pool)
501{
502  svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->dst_start,
503                                           bpatch->dst_end, result_pool);
504
505  s = svn_stream_compressed(s, result_pool);
506
507  /* ### If we (ever) want to support the DELTA format, then we should hook the
508  undelta handling here */
509
510  return get_verify_length_stream(s, bpatch->dst_filesize, result_pool);
511}
512
513/* Try to parse a positive number from a decimal number encoded
514 * in the string NUMBER. Return parsed number in OFFSET, and return
515 * TRUE if parsing was successful. */
516static svn_boolean_t
517parse_offset(svn_linenum_t *offset, const char *number)
518{
519  svn_error_t *err;
520  apr_uint64_t val;
521
522  err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10);
523  if (err)
524    {
525      svn_error_clear(err);
526      return FALSE;
527    }
528
529  *offset = (svn_linenum_t)val;
530
531  return TRUE;
532}
533
534/* Try to parse a hunk range specification from the string RANGE.
535 * Return parsed information in *START and *LENGTH, and return TRUE
536 * if the range parsed correctly. Note: This function may modify the
537 * input value RANGE. */
538static svn_boolean_t
539parse_range(svn_linenum_t *start, svn_linenum_t *length, char *range)
540{
541  char *comma;
542
543  if (*range == 0)
544    return FALSE;
545
546  comma = strstr(range, ",");
547  if (comma)
548    {
549      if (strlen(comma + 1) > 0)
550        {
551          /* Try to parse the length. */
552          if (! parse_offset(length, comma + 1))
553            return FALSE;
554
555          /* Snip off the end of the string,
556           * so we can comfortably parse the line
557           * number the hunk starts at. */
558          *comma = '\0';
559        }
560       else
561         /* A comma but no length? */
562         return FALSE;
563    }
564  else
565    {
566      *length = 1;
567    }
568
569  /* Try to parse the line number the hunk starts at. */
570  return parse_offset(start, range);
571}
572
573/* Try to parse a hunk header in string HEADER, putting parsed information
574 * into HUNK. Return TRUE if the header parsed correctly. ATAT is the
575 * character string used to delimit the hunk header.
576 * Do all allocations in POOL. */
577static svn_boolean_t
578parse_hunk_header(const char *header, svn_diff_hunk_t *hunk,
579                  const char *atat, apr_pool_t *pool)
580{
581  const char *p;
582  const char *start;
583  svn_stringbuf_t *range;
584
585  p = header + strlen(atat);
586  if (*p != ' ')
587    /* No. */
588    return FALSE;
589  p++;
590  if (*p != '-')
591    /* Nah... */
592    return FALSE;
593  /* OK, this may be worth allocating some memory for... */
594  range = svn_stringbuf_create_ensure(31, pool);
595  start = ++p;
596  while (*p && *p != ' ')
597    {
598      p++;
599    }
600
601  if (*p != ' ')
602    /* No no no... */
603    return FALSE;
604
605  svn_stringbuf_appendbytes(range, start, p - start);
606
607  /* Try to parse the first range. */
608  if (! parse_range(&hunk->original_start, &hunk->original_length, range->data))
609    return FALSE;
610
611  /* Clear the stringbuf so we can reuse it for the second range. */
612  svn_stringbuf_setempty(range);
613  p++;
614  if (*p != '+')
615    /* Eeek! */
616    return FALSE;
617  /* OK, this may be worth copying... */
618  start = ++p;
619  while (*p && *p != ' ')
620    {
621      p++;
622    }
623  if (*p != ' ')
624    /* No no no... */
625    return FALSE;
626
627  svn_stringbuf_appendbytes(range, start, p - start);
628
629  /* Check for trailing @@ */
630  p++;
631  if (! starts_with(p, atat))
632    return FALSE;
633
634  /* There may be stuff like C-function names after the trailing @@,
635   * but we ignore that. */
636
637  /* Try to parse the second range. */
638  if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data))
639    return FALSE;
640
641  /* Hunk header is good. */
642  return TRUE;
643}
644
645/* Read a line of original or modified hunk text from the specified
646 * RANGE within FILE. FILE is expected to contain unidiff text.
647 * Leading unidiff symbols ('+', '-', and ' ') are removed from the line,
648 * Any lines commencing with the VERBOTEN character are discarded.
649 * VERBOTEN should be '+' or '-', depending on which form of hunk text
650 * is being read. NO_FINAL_EOL declares if the hunk contains a no final
651 * EOL marker.
652 *
653 * All other parameters are as in svn_diff_hunk_readline_original_text()
654 * and svn_diff_hunk_readline_modified_text().
655 */
656static svn_error_t *
657hunk_readline_original_or_modified(apr_file_t *file,
658                                   struct svn_diff__hunk_range *range,
659                                   svn_stringbuf_t **stringbuf,
660                                   const char **eol,
661                                   svn_boolean_t *eof,
662                                   char verboten,
663                                   svn_boolean_t no_final_eol,
664                                   apr_pool_t *result_pool,
665                                   apr_pool_t *scratch_pool)
666{
667  apr_size_t max_len;
668  svn_boolean_t filtered;
669  apr_off_t pos;
670  svn_stringbuf_t *str;
671  const char *eol_p;
672  apr_pool_t *last_pool;
673
674  if (!eol)
675    eol = &eol_p;
676
677  if (range->current >= range->end)
678    {
679      /* We're past the range. Indicate that no bytes can be read. */
680      *eof = TRUE;
681      *eol = NULL;
682      *stringbuf = svn_stringbuf_create_empty(result_pool);
683      return SVN_NO_ERROR;
684    }
685
686  SVN_ERR(svn_io_file_get_offset(&pos, file, scratch_pool));
687  SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool));
688
689  /* It's not ITERPOOL because we use data allocated in LAST_POOL out
690     of the loop. */
691  last_pool = svn_pool_create(scratch_pool);
692  do
693    {
694      svn_pool_clear(last_pool);
695
696      max_len = range->end - range->current;
697      SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len,
698                                   last_pool, last_pool));
699      SVN_ERR(svn_io_file_get_offset(&range->current, file, last_pool));
700      filtered = (str->data[0] == verboten || str->data[0] == '\\');
701    }
702  while (filtered && ! *eof);
703
704  if (filtered)
705    {
706      /* EOF, return an empty string. */
707      *stringbuf = svn_stringbuf_create_ensure(0, result_pool);
708      *eol = NULL;
709    }
710  else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ')
711    {
712      /* Shave off leading unidiff symbols. */
713      *stringbuf = svn_stringbuf_create(str->data + 1, result_pool);
714    }
715  else
716    {
717      /* Return the line as-is. Handle as a chopped leading spaces */
718      *stringbuf = svn_stringbuf_dup(str, result_pool);
719    }
720
721  if (!filtered && *eof && !*eol && *str->data)
722    {
723      /* Ok, we miss a final EOL in the patch file, but didn't see a
724         no eol marker line.
725
726         We should report that we had an EOL or the patch code will
727         misbehave (and it knows nothing about no eol markers) */
728
729      if (!no_final_eol && eol != &eol_p)
730        {
731          apr_off_t start = 0;
732
733          SVN_ERR(svn_io_file_seek(file, APR_SET, &start, scratch_pool));
734
735          SVN_ERR(svn_io_file_readline(file, &str, eol, NULL, APR_SIZE_MAX,
736                                       scratch_pool, scratch_pool));
737
738          /* Every patch file that has hunks has at least one EOL*/
739          SVN_ERR_ASSERT(*eol != NULL);
740        }
741
742      *eof = FALSE;
743      /* Fall through to seek back to the right location */
744    }
745  SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool));
746
747  svn_pool_destroy(last_pool);
748  return SVN_NO_ERROR;
749}
750
751svn_error_t *
752svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk,
753                                     svn_stringbuf_t **stringbuf,
754                                     const char **eol,
755                                     svn_boolean_t *eof,
756                                     apr_pool_t *result_pool,
757                                     apr_pool_t *scratch_pool)
758{
759  return svn_error_trace(
760    hunk_readline_original_or_modified(hunk->apr_file,
761                                       hunk->patch->reverse ?
762                                         &hunk->modified_text_range :
763                                         &hunk->original_text_range,
764                                       stringbuf, eol, eof,
765                                       hunk->patch->reverse ? '-' : '+',
766                                       hunk->patch->reverse
767                                          ? hunk->modified_no_final_eol
768                                          : hunk->original_no_final_eol,
769                                       result_pool, scratch_pool));
770}
771
772svn_error_t *
773svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk,
774                                     svn_stringbuf_t **stringbuf,
775                                     const char **eol,
776                                     svn_boolean_t *eof,
777                                     apr_pool_t *result_pool,
778                                     apr_pool_t *scratch_pool)
779{
780  return svn_error_trace(
781    hunk_readline_original_or_modified(hunk->apr_file,
782                                       hunk->patch->reverse ?
783                                         &hunk->original_text_range :
784                                         &hunk->modified_text_range,
785                                       stringbuf, eol, eof,
786                                       hunk->patch->reverse ? '+' : '-',
787                                       hunk->patch->reverse
788                                          ? hunk->original_no_final_eol
789                                          : hunk->modified_no_final_eol,
790                                       result_pool, scratch_pool));
791}
792
793svn_error_t *
794svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk,
795                                 svn_stringbuf_t **stringbuf,
796                                 const char **eol,
797                                 svn_boolean_t *eof,
798                                 apr_pool_t *result_pool,
799                                 apr_pool_t *scratch_pool)
800{
801  svn_stringbuf_t *line;
802  apr_size_t max_len;
803  apr_off_t pos;
804  const char *eol_p;
805
806  if (!eol)
807    eol = &eol_p;
808
809  if (hunk->diff_text_range.current >= hunk->diff_text_range.end)
810    {
811      /* We're past the range. Indicate that no bytes can be read. */
812      *eof = TRUE;
813      *eol = NULL;
814      *stringbuf = svn_stringbuf_create_empty(result_pool);
815      return SVN_NO_ERROR;
816    }
817
818  SVN_ERR(svn_io_file_get_offset(&pos, hunk->apr_file, scratch_pool));
819  SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET,
820                           &hunk->diff_text_range.current, scratch_pool));
821  max_len = hunk->diff_text_range.end - hunk->diff_text_range.current;
822  SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len,
823                               result_pool,
824                   scratch_pool));
825  SVN_ERR(svn_io_file_get_offset(&hunk->diff_text_range.current,
826                                 hunk->apr_file, scratch_pool));
827
828  if (*eof && !*eol && *line->data)
829    {
830      /* Ok, we miss a final EOL in the patch file, but didn't see a
831          no eol marker line.
832
833          We should report that we had an EOL or the patch code will
834          misbehave (and it knows nothing about no eol markers) */
835
836      if (eol != &eol_p)
837        {
838          /* Lets pick the first eol we find in our patch file */
839          apr_off_t start = 0;
840          svn_stringbuf_t *str;
841
842          SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &start,
843                                   scratch_pool));
844
845          SVN_ERR(svn_io_file_readline(hunk->apr_file, &str, eol, NULL,
846                                       APR_SIZE_MAX,
847                                       scratch_pool, scratch_pool));
848
849          /* Every patch file that has hunks has at least one EOL*/
850          SVN_ERR_ASSERT(*eol != NULL);
851        }
852
853      *eof = FALSE;
854
855      /* Fall through to seek back to the right location */
856    }
857
858  SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool));
859
860  if (hunk->patch->reverse)
861    {
862      if (line->data[0] == '+')
863        line->data[0] = '-';
864      else if (line->data[0] == '-')
865        line->data[0] = '+';
866    }
867
868  *stringbuf = line;
869
870  return SVN_NO_ERROR;
871}
872
873/* Parse *PROP_NAME from HEADER as the part after the INDICATOR line.
874 * Allocate *PROP_NAME in RESULT_POOL.
875 * Set *PROP_NAME to NULL if no valid property name was found. */
876static svn_error_t *
877parse_prop_name(const char **prop_name, const char *header,
878                const char *indicator, apr_pool_t *result_pool)
879{
880  SVN_ERR(svn_utf_cstring_to_utf8(prop_name,
881                                  header + strlen(indicator),
882                                  result_pool));
883  if (**prop_name == '\0')
884    *prop_name = NULL;
885  else if (! svn_prop_name_is_valid(*prop_name))
886    {
887      svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool);
888      svn_stringbuf_strip_whitespace(buf);
889      *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL);
890    }
891
892  return SVN_NO_ERROR;
893}
894
895
896/* A helper function to parse svn:mergeinfo diffs.
897 *
898 * These diffs use a special pretty-print format, for instance:
899 *
900 * Added: svn:mergeinfo
901 * ## -0,0 +0,1 ##
902 *   Merged /trunk:r2-3
903 *
904 * The hunk header has the following format:
905 * ## -0,NUMBER_OF_REVERSE_MERGES +0,NUMBER_OF_FORWARD_MERGES ##
906 *
907 * The header is followed by a list of mergeinfo, one path per line.
908 * This function parses such lines. Lines describing reverse merges
909 * appear first, and then all lines describing forward merges appear.
910 *
911 * Parts of the line are affected by i18n. The words 'Merged'
912 * and 'Reverse-merged' can appear in any language and at any
913 * position within the line. We can only assume that a leading
914 * '/' starts the merge source path, the path is followed by
915 * ":r", which in turn is followed by a mergeinfo revision range,
916 *  which is terminated by whitespace or end-of-string.
917 *
918 * *NUMBER_OF_REVERSE_MERGES and *NUMBER_OF_FORWARD_MERGES are the
919 * numbers of reverse and forward merges remaining to be read. This
920 * function decrements *NUMBER_OF_REVERSE_MERGES for each LINE
921 * parsed until that is zero, then *NUMBER_OF_FORWARD_MERGES for
922 * each LINE parsed until that is zero. If both are zero, it parses
923 * and discards LINE.
924 *
925 * If LINE is successfully parsed, *FOUND_MERGEINFO is set to TRUE,
926 * otherwise to FALSE.
927 *
928 * If LINE is successfully parsed and counted, the resulting mergeinfo
929 * is added to PATCH->mergeinfo or PATCH->reverse_mergeinfo.
930 */
931static svn_error_t *
932parse_pretty_mergeinfo_line(svn_boolean_t *found_mergeinfo,
933                            svn_linenum_t *number_of_reverse_merges,
934                            svn_linenum_t *number_of_forward_merges,
935                            svn_stringbuf_t *line,
936                            svn_patch_t *patch,
937                            apr_pool_t *result_pool,
938                            apr_pool_t *scratch_pool)
939{
940  char *slash = strchr(line->data, '/');
941  char *colon = strrchr(line->data, ':');
942
943  *found_mergeinfo = FALSE;
944
945  if (slash && colon && colon[1] == 'r' && slash < colon)
946    {
947      svn_stringbuf_t *input;
948      svn_mergeinfo_t mergeinfo = NULL;
949      char *s;
950      svn_error_t *err;
951
952      input = svn_stringbuf_create_ensure(line->len, scratch_pool);
953
954      /* Copy the merge source path + colon */
955      s = slash;
956      while (s <= colon)
957        {
958          svn_stringbuf_appendbyte(input, *s);
959          s++;
960        }
961
962      /* skip 'r' after colon */
963      s++;
964
965      /* Copy the revision range. */
966      while (s < line->data + line->len)
967        {
968          if (svn_ctype_isspace(*s))
969            break;
970          svn_stringbuf_appendbyte(input, *s);
971          s++;
972        }
973
974      err = svn_mergeinfo_parse(&mergeinfo, input->data, result_pool);
975      if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
976        {
977          svn_error_clear(err);
978          mergeinfo = NULL;
979        }
980      else
981        SVN_ERR(err);
982
983      if (mergeinfo)
984        {
985          if (*number_of_reverse_merges > 0) /* reverse merges */
986            {
987              if (patch->reverse)
988                {
989                  if (patch->mergeinfo == NULL)
990                    patch->mergeinfo = mergeinfo;
991                  else
992                    SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
993                                                 mergeinfo,
994                                                 result_pool,
995                                                 scratch_pool));
996                }
997              else
998                {
999                  if (patch->reverse_mergeinfo == NULL)
1000                    patch->reverse_mergeinfo = mergeinfo;
1001                  else
1002                    SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
1003                                                 mergeinfo,
1004                                                 result_pool,
1005                                                 scratch_pool));
1006                }
1007              (*number_of_reverse_merges)--;
1008            }
1009          else if (number_of_forward_merges > 0) /* forward merges */
1010            {
1011              if (patch->reverse)
1012                {
1013                  if (patch->reverse_mergeinfo == NULL)
1014                    patch->reverse_mergeinfo = mergeinfo;
1015                  else
1016                    SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
1017                                                 mergeinfo,
1018                                                 result_pool,
1019                                                 scratch_pool));
1020                }
1021              else
1022                {
1023                  if (patch->mergeinfo == NULL)
1024                    patch->mergeinfo = mergeinfo;
1025                  else
1026                    SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
1027                                                 mergeinfo,
1028                                                 result_pool,
1029                                                 scratch_pool));
1030                }
1031              (*number_of_forward_merges)--;
1032            }
1033
1034          *found_mergeinfo = TRUE;
1035        }
1036    }
1037
1038  return SVN_NO_ERROR;
1039}
1040
1041/* Return the next *HUNK from a PATCH in APR_FILE.
1042 * If no hunk can be found, set *HUNK to NULL.
1043 * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK
1044 * is the first belonging to a certain property, then PROP_NAME and
1045 * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be
1046 * NULL.  If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be
1047 * treated as context lines.  Allocate results in RESULT_POOL.
1048 * Use SCRATCH_POOL for all other allocations. */
1049static svn_error_t *
1050parse_next_hunk(svn_diff_hunk_t **hunk,
1051                svn_boolean_t *is_property,
1052                const char **prop_name,
1053                svn_diff_operation_kind_t *prop_operation,
1054                svn_patch_t *patch,
1055                apr_file_t *apr_file,
1056                svn_boolean_t ignore_whitespace,
1057                apr_pool_t *result_pool,
1058                apr_pool_t *scratch_pool)
1059{
1060  static const char * const minus = "--- ";
1061  static const char * const text_atat = "@@";
1062  static const char * const prop_atat = "##";
1063  svn_stringbuf_t *line;
1064  svn_boolean_t eof, in_hunk, hunk_seen;
1065  apr_off_t pos, last_line;
1066  apr_off_t start, end;
1067  apr_off_t original_end;
1068  apr_off_t modified_end;
1069  svn_boolean_t original_no_final_eol = FALSE;
1070  svn_boolean_t modified_no_final_eol = FALSE;
1071  svn_linenum_t original_lines;
1072  svn_linenum_t modified_lines;
1073  svn_linenum_t leading_context;
1074  svn_linenum_t trailing_context;
1075  svn_boolean_t changed_line_seen;
1076  enum {
1077    noise_line,
1078    original_line,
1079    modified_line,
1080    context_line
1081  } last_line_type;
1082  apr_pool_t *iterpool;
1083
1084  *prop_operation = svn_diff_op_unchanged;
1085
1086  /* We only set this if we have a property hunk header. */
1087  *prop_name = NULL;
1088  *is_property = FALSE;
1089
1090  if (apr_file_eof(apr_file) == APR_EOF)
1091    {
1092      /* No more hunks here. */
1093      *hunk = NULL;
1094      return SVN_NO_ERROR;
1095    }
1096
1097  in_hunk = FALSE;
1098  hunk_seen = FALSE;
1099  leading_context = 0;
1100  trailing_context = 0;
1101  changed_line_seen = FALSE;
1102  original_end = 0;
1103  modified_end = 0;
1104  *hunk = apr_pcalloc(result_pool, sizeof(**hunk));
1105
1106  /* Get current seek position. */
1107  SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool));
1108
1109  /* Start out assuming noise. */
1110  last_line_type = noise_line;
1111
1112  iterpool = svn_pool_create(scratch_pool);
1113  do
1114    {
1115
1116      svn_pool_clear(iterpool);
1117
1118      /* Remember the current line's offset, and read the line. */
1119      last_line = pos;
1120      SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
1121                                   iterpool, iterpool));
1122
1123      /* Update line offset for next iteration. */
1124      SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool));
1125
1126      /* Lines starting with a backslash indicate a missing EOL:
1127       * "\ No newline at end of file" or "end of property". */
1128      if (line->data[0] == '\\')
1129        {
1130          if (in_hunk)
1131            {
1132              char eolbuf[2];
1133              apr_size_t len;
1134              apr_off_t off;
1135              apr_off_t hunk_text_end;
1136
1137              /* Comment terminates the hunk text and says the hunk text
1138               * has no trailing EOL. Snip off trailing EOL which is part
1139               * of the patch file but not part of the hunk text. */
1140              off = last_line - 2;
1141              SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool));
1142              len = sizeof(eolbuf);
1143              SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len,
1144                                             &eof, iterpool));
1145              if (eolbuf[0] == '\r' && eolbuf[1] == '\n')
1146                hunk_text_end = last_line - 2;
1147              else if (eolbuf[1] == '\n' || eolbuf[1] == '\r')
1148                hunk_text_end = last_line - 1;
1149              else
1150                hunk_text_end = last_line;
1151
1152              if (last_line_type == original_line && original_end == 0)
1153                original_end = hunk_text_end;
1154              else if (last_line_type == modified_line && modified_end == 0)
1155                modified_end = hunk_text_end;
1156              else if (last_line_type == context_line)
1157                {
1158                  if (original_end == 0)
1159                    original_end = hunk_text_end;
1160                  if (modified_end == 0)
1161                    modified_end = hunk_text_end;
1162                }
1163
1164              SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool));
1165              /* Set for the type and context by using != the other type */
1166              if (last_line_type != modified_line)
1167                original_no_final_eol = TRUE;
1168              if (last_line_type != original_line)
1169                modified_no_final_eol = TRUE;
1170            }
1171
1172          continue;
1173        }
1174
1175      if (in_hunk && *is_property && *prop_name &&
1176          strcmp(*prop_name, SVN_PROP_MERGEINFO) == 0)
1177        {
1178          svn_boolean_t found_pretty_mergeinfo_line;
1179
1180          if (! hunk_seen)
1181            {
1182              /* We're reading the first line of the hunk, so the start
1183               * of the line just read is the hunk text's byte offset. */
1184              start = last_line;
1185            }
1186
1187          SVN_ERR(parse_pretty_mergeinfo_line(&found_pretty_mergeinfo_line,
1188                                              &original_lines, &modified_lines,
1189                                              line, patch,
1190                                              result_pool, iterpool));
1191          if (found_pretty_mergeinfo_line)
1192            {
1193              hunk_seen = TRUE;
1194              (*hunk)->is_pretty_print_mergeinfo = TRUE;
1195              continue; /* Proceed to the next line in the svn:mergeinfo hunk. */
1196            }
1197
1198          if ((*hunk)->is_pretty_print_mergeinfo)
1199            {
1200              /* We have reached the end of the pretty-print-mergeinfo hunk.
1201                 (This format uses only one hunk.) */
1202              if (eof)
1203                {
1204                  /* The hunk ends at EOF. */
1205                  end = pos;
1206                }
1207              else
1208                {
1209                  /* The start of the current line marks the first byte
1210                   * after the hunk text. */
1211                  end = last_line;
1212                }
1213              original_end = end;
1214              modified_end = end;
1215              break;
1216            }
1217
1218          /* Otherwise, this is a property diff in the
1219             regular format so fall through to normal processing. */
1220        }
1221
1222      if (in_hunk)
1223        {
1224          char c;
1225          static const char add = '+';
1226          static const char del = '-';
1227
1228          if (! hunk_seen)
1229            {
1230              /* We're reading the first line of the hunk, so the start
1231               * of the line just read is the hunk text's byte offset. */
1232              start = last_line;
1233            }
1234
1235          c = line->data[0];
1236          if (c == ' '
1237              || ((original_lines > 0 && modified_lines > 0)
1238                  && (
1239               /* Tolerate chopped leading spaces on empty lines. */
1240                      (! eof && line->len == 0)
1241               /* Maybe tolerate chopped leading spaces on non-empty lines. */
1242                      || (ignore_whitespace && c != del && c != add))))
1243            {
1244              /* It's a "context" line in the hunk. */
1245              hunk_seen = TRUE;
1246              if (original_lines > 0)
1247                original_lines--;
1248              else
1249                {
1250                  (*hunk)->original_length++;
1251                  (*hunk)->original_fuzz++;
1252                }
1253              if (modified_lines > 0)
1254                modified_lines--;
1255              else
1256                {
1257                  (*hunk)->modified_length++;
1258                  (*hunk)->modified_fuzz++;
1259                }
1260              if (changed_line_seen)
1261                trailing_context++;
1262              else
1263                leading_context++;
1264              last_line_type = context_line;
1265            }
1266          else if (c == del
1267                   && (original_lines > 0 || line->data[1] != del))
1268            {
1269              /* It's a "deleted" line in the hunk. */
1270              hunk_seen = TRUE;
1271              changed_line_seen = TRUE;
1272
1273              /* A hunk may have context in the middle. We only want
1274                 trailing lines of context. */
1275              if (trailing_context > 0)
1276                trailing_context = 0;
1277
1278              if (original_lines > 0)
1279                original_lines--;
1280              else
1281                {
1282                  (*hunk)->original_length++;
1283                  (*hunk)->original_fuzz++;
1284                }
1285              last_line_type = original_line;
1286            }
1287          else if (c == add
1288                   && (modified_lines > 0 || line->data[1] != add))
1289            {
1290              /* It's an "added" line in the hunk. */
1291              hunk_seen = TRUE;
1292              changed_line_seen = TRUE;
1293
1294              /* A hunk may have context in the middle. We only want
1295                 trailing lines of context. */
1296              if (trailing_context > 0)
1297                trailing_context = 0;
1298
1299              if (modified_lines > 0)
1300                modified_lines--;
1301              else
1302                {
1303                  (*hunk)->modified_length++;
1304                  (*hunk)->modified_fuzz++;
1305                }
1306              last_line_type = modified_line;
1307            }
1308          else
1309            {
1310              if (eof)
1311                {
1312                  /* The hunk ends at EOF. */
1313                  end = pos;
1314                }
1315              else
1316                {
1317                  /* The start of the current line marks the first byte
1318                   * after the hunk text. */
1319                  end = last_line;
1320                }
1321              if (original_end == 0)
1322                original_end = end;
1323              if (modified_end == 0)
1324                modified_end = end;
1325              break; /* Hunk was empty or has been read. */
1326            }
1327        }
1328      else
1329        {
1330          if (starts_with(line->data, text_atat))
1331            {
1332              /* Looks like we have a hunk header, try to rip it apart. */
1333              in_hunk = parse_hunk_header(line->data, *hunk, text_atat,
1334                                          iterpool);
1335              if (in_hunk)
1336                {
1337                  original_lines = (*hunk)->original_length;
1338                  modified_lines = (*hunk)->modified_length;
1339                  *is_property = FALSE;
1340                }
1341              }
1342          else if (starts_with(line->data, prop_atat))
1343            {
1344              /* Looks like we have a property hunk header, try to rip it
1345               * apart. */
1346              in_hunk = parse_hunk_header(line->data, *hunk, prop_atat,
1347                                          iterpool);
1348              if (in_hunk)
1349                {
1350                  original_lines = (*hunk)->original_length;
1351                  modified_lines = (*hunk)->modified_length;
1352                  *is_property = TRUE;
1353                }
1354            }
1355          else if (starts_with(line->data, "Added: "))
1356            {
1357              SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ",
1358                                      result_pool));
1359              if (*prop_name)
1360                *prop_operation = (patch->reverse ? svn_diff_op_deleted
1361                                                  : svn_diff_op_added);
1362            }
1363          else if (starts_with(line->data, "Deleted: "))
1364            {
1365              SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ",
1366                                      result_pool));
1367              if (*prop_name)
1368                *prop_operation = (patch->reverse ? svn_diff_op_added
1369                                                  : svn_diff_op_deleted);
1370            }
1371          else if (starts_with(line->data, "Modified: "))
1372            {
1373              SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ",
1374                                      result_pool));
1375              if (*prop_name)
1376                *prop_operation = svn_diff_op_modified;
1377            }
1378          else if (starts_with(line->data, minus)
1379                   || starts_with(line->data, "diff --git "))
1380            /* This could be a header of another patch. Bail out. */
1381            break;
1382        }
1383    }
1384  /* Check for the line length since a file may not have a newline at the
1385   * end and we depend upon the last line to be an empty one. */
1386  while (! eof || line->len > 0);
1387  svn_pool_destroy(iterpool);
1388
1389  if (! eof)
1390    /* Rewind to the start of the line just read, so subsequent calls
1391     * to this function or svn_diff_parse_next_patch() don't end
1392     * up skipping the line -- it may contain a patch or hunk header. */
1393    SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
1394
1395  if (hunk_seen && start < end)
1396    {
1397      /* Did we get the number of context lines announced in the header?
1398
1399         If not... let's limit the number from the header to what we
1400         actually have, and apply a fuzz penalty */
1401      if (original_lines)
1402        {
1403          (*hunk)->original_length -= original_lines;
1404          (*hunk)->original_fuzz += original_lines;
1405        }
1406      if (modified_lines)
1407        {
1408          (*hunk)->modified_length -= modified_lines;
1409          (*hunk)->modified_fuzz += modified_lines;
1410        }
1411
1412      (*hunk)->patch = patch;
1413      (*hunk)->apr_file = apr_file;
1414      (*hunk)->leading_context = leading_context;
1415      (*hunk)->trailing_context = trailing_context;
1416      (*hunk)->diff_text_range.start = start;
1417      (*hunk)->diff_text_range.current = start;
1418      (*hunk)->diff_text_range.end = end;
1419      (*hunk)->original_text_range.start = start;
1420      (*hunk)->original_text_range.current = start;
1421      (*hunk)->original_text_range.end = original_end;
1422      (*hunk)->modified_text_range.start = start;
1423      (*hunk)->modified_text_range.current = start;
1424      (*hunk)->modified_text_range.end = modified_end;
1425      (*hunk)->original_no_final_eol = original_no_final_eol;
1426      (*hunk)->modified_no_final_eol = modified_no_final_eol;
1427    }
1428  else
1429    /* Something went wrong, just discard the result. */
1430    *hunk = NULL;
1431
1432  return SVN_NO_ERROR;
1433}
1434
1435/* Compare function for sorting hunks after parsing.
1436 * We sort hunks by their original line offset. */
1437static int
1438compare_hunks(const void *a, const void *b)
1439{
1440  const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a);
1441  const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b);
1442
1443  if (ha->original_start < hb->original_start)
1444    return -1;
1445  if (ha->original_start > hb->original_start)
1446    return 1;
1447  return 0;
1448}
1449
1450/* Possible states of the diff header parser. */
1451enum parse_state
1452{
1453   state_start,             /* initial */
1454   state_git_diff_seen,     /* diff --git */
1455   state_git_tree_seen,     /* a tree operation, rather than content change */
1456   state_git_minus_seen,    /* --- /dev/null; or --- a/ */
1457   state_git_plus_seen,     /* +++ /dev/null; or +++ a/ */
1458   state_old_mode_seen,     /* old mode 100644 */
1459   state_git_mode_seen,     /* new mode 100644 */
1460   state_move_from_seen,    /* rename from foo.c */
1461   state_copy_from_seen,    /* copy from foo.c */
1462   state_minus_seen,        /* --- foo.c */
1463   state_unidiff_found,     /* valid start of a regular unidiff header */
1464   state_git_header_found,  /* valid start of a --git diff header */
1465   state_binary_patch_found /* valid start of binary patch */
1466};
1467
1468/* Data type describing a valid state transition of the parser. */
1469struct transition
1470{
1471  const char *expected_input;
1472  enum parse_state required_state;
1473
1474  /* A callback called upon each parser state transition. */
1475  svn_error_t *(*fn)(enum parse_state *new_state, char *input,
1476                     svn_patch_t *patch, apr_pool_t *result_pool,
1477                     apr_pool_t *scratch_pool);
1478};
1479
1480/* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */
1481static svn_error_t *
1482grab_filename(const char **file_name, const char *line, apr_pool_t *result_pool,
1483              apr_pool_t *scratch_pool)
1484{
1485  const char *utf8_path;
1486  const char *canon_path;
1487
1488  /* Grab the filename and encode it in UTF-8. */
1489  /* TODO: Allow specifying the patch file's encoding.
1490   *       For now, we assume its encoding is native. */
1491  /* ### This can fail if the filename cannot be represented in the current
1492   * ### locale's encoding. */
1493  SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path,
1494                                  line,
1495                                  scratch_pool));
1496
1497  /* Canonicalize the path name. */
1498  canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool);
1499
1500  *file_name = apr_pstrdup(result_pool, canon_path);
1501
1502  return SVN_NO_ERROR;
1503}
1504
1505/* Parse the '--- ' line of a regular unidiff. */
1506static svn_error_t *
1507diff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1508           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1509{
1510  /* If we can find a tab, it separates the filename from
1511   * the rest of the line which we can discard. */
1512  char *tab = strchr(line, '\t');
1513  if (tab)
1514    *tab = '\0';
1515
1516  SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "),
1517                        result_pool, scratch_pool));
1518
1519  *new_state = state_minus_seen;
1520
1521  return SVN_NO_ERROR;
1522}
1523
1524/* Parse the '+++ ' line of a regular unidiff. */
1525static svn_error_t *
1526diff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1527           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1528{
1529  /* If we can find a tab, it separates the filename from
1530   * the rest of the line which we can discard. */
1531  char *tab = strchr(line, '\t');
1532  if (tab)
1533    *tab = '\0';
1534
1535  SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "),
1536                        result_pool, scratch_pool));
1537
1538  *new_state = state_unidiff_found;
1539
1540  return SVN_NO_ERROR;
1541}
1542
1543/* Parse the first line of a git extended unidiff. */
1544static svn_error_t *
1545git_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1546          apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1547{
1548  const char *old_path_start;
1549  char *old_path_end;
1550  const char *new_path_start;
1551  const char *new_path_end;
1552  char *new_path_marker;
1553  const char *old_path_marker;
1554
1555  /* ### Add handling of escaped paths
1556   * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html:
1557   *
1558   * TAB, LF, double quote and backslash characters in pathnames are
1559   * represented as \t, \n, \" and \\, respectively. If there is need for
1560   * such substitution then the whole pathname is put in double quotes.
1561   */
1562
1563  /* Our line should look like this: 'diff --git a/path b/path'.
1564   *
1565   * If we find any deviations from that format, we return with state reset
1566   * to start.
1567   */
1568  old_path_marker = strstr(line, " a/");
1569
1570  if (! old_path_marker)
1571    {
1572      *new_state = state_start;
1573      return SVN_NO_ERROR;
1574    }
1575
1576  if (! *(old_path_marker + 3))
1577    {
1578      *new_state = state_start;
1579      return SVN_NO_ERROR;
1580    }
1581
1582  new_path_marker = strstr(old_path_marker, " b/");
1583
1584  if (! new_path_marker)
1585    {
1586      *new_state = state_start;
1587      return SVN_NO_ERROR;
1588    }
1589
1590  if (! *(new_path_marker + 3))
1591    {
1592      *new_state = state_start;
1593      return SVN_NO_ERROR;
1594    }
1595
1596  /* By now, we know that we have a line on the form '--git diff a/.+ b/.+'
1597   * We only need the filenames when we have deleted or added empty
1598   * files. In those cases the old_path and new_path is identical on the
1599   * 'diff --git' line.  For all other cases we fetch the filenames from
1600   * other header lines. */
1601  old_path_start = line + STRLEN_LITERAL("diff --git a/");
1602  new_path_end = line + strlen(line);
1603  new_path_start = old_path_start;
1604
1605  while (TRUE)
1606    {
1607      ptrdiff_t len_old;
1608      ptrdiff_t len_new;
1609
1610      new_path_marker = strstr(new_path_start, " b/");
1611
1612      /* No new path marker, bail out. */
1613      if (! new_path_marker)
1614        break;
1615
1616      old_path_end = new_path_marker;
1617      new_path_start = new_path_marker + STRLEN_LITERAL(" b/");
1618
1619      /* No path after the marker. */
1620      if (! *new_path_start)
1621        break;
1622
1623      len_old = old_path_end - old_path_start;
1624      len_new = new_path_end - new_path_start;
1625
1626      /* Are the paths before and after the " b/" marker the same? */
1627      if (len_old == len_new
1628          && ! strncmp(old_path_start, new_path_start, len_old))
1629        {
1630          *old_path_end = '\0';
1631          SVN_ERR(grab_filename(&patch->old_filename, old_path_start,
1632                                result_pool, scratch_pool));
1633
1634          SVN_ERR(grab_filename(&patch->new_filename, new_path_start,
1635                                result_pool, scratch_pool));
1636          break;
1637        }
1638    }
1639
1640  /* We assume that the path is only modified until we've found a 'tree'
1641   * header */
1642  patch->operation = svn_diff_op_modified;
1643
1644  *new_state = state_git_diff_seen;
1645  return SVN_NO_ERROR;
1646}
1647
1648/* Parse the '--- ' line of a git extended unidiff. */
1649static svn_error_t *
1650git_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1651          apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1652{
1653  /* If we can find a tab, it separates the filename from
1654   * the rest of the line which we can discard. */
1655  char *tab = strchr(line, '\t');
1656  if (tab)
1657    *tab = '\0';
1658
1659  if (starts_with(line, "--- /dev/null"))
1660    SVN_ERR(grab_filename(&patch->old_filename, "/dev/null",
1661                          result_pool, scratch_pool));
1662  else
1663    SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"),
1664                          result_pool, scratch_pool));
1665
1666  *new_state = state_git_minus_seen;
1667  return SVN_NO_ERROR;
1668}
1669
1670/* Parse the '+++ ' line of a git extended unidiff. */
1671static svn_error_t *
1672git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1673          apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1674{
1675  /* If we can find a tab, it separates the filename from
1676   * the rest of the line which we can discard. */
1677  char *tab = strchr(line, '\t');
1678  if (tab)
1679    *tab = '\0';
1680
1681  if (starts_with(line, "+++ /dev/null"))
1682    SVN_ERR(grab_filename(&patch->new_filename, "/dev/null",
1683                          result_pool, scratch_pool));
1684  else
1685    SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"),
1686                          result_pool, scratch_pool));
1687
1688  *new_state = state_git_header_found;
1689  return SVN_NO_ERROR;
1690}
1691
1692/* Helper for git_old_mode() and git_new_mode().  Translate the git
1693 * file mode MODE_STR into a binary "executable?" and "symlink?" state. */
1694static svn_error_t *
1695parse_git_mode_bits(svn_tristate_t *executable_p,
1696                    svn_tristate_t *symlink_p,
1697                    const char *mode_str)
1698{
1699  apr_uint64_t mode;
1700  SVN_ERR(svn_cstring_strtoui64(&mode, mode_str,
1701                                0 /* min */,
1702                                0777777 /* max: six octal digits */,
1703                                010 /* radix (octal) */));
1704
1705  /* Note: 0644 and 0755 are the only modes that can occur for plain files.
1706   * We deliberately choose to parse only those values: we are strict in what
1707   * we accept _and_ in what we produce.
1708   *
1709   * (Having said that, though, we could consider relaxing the parser to also
1710   * map
1711   *     (mode & 0111) == 0000 -> svn_tristate_false
1712   *     (mode & 0111) == 0111 -> svn_tristate_true
1713   *        [anything else]    -> svn_tristate_unknown
1714   * .)
1715   */
1716
1717  switch (mode & 0777)
1718    {
1719      case 0644:
1720        *executable_p = svn_tristate_false;
1721        break;
1722
1723      case 0755:
1724        *executable_p = svn_tristate_true;
1725        break;
1726
1727      default:
1728        /* Ignore unknown values. */
1729        *executable_p = svn_tristate_unknown;
1730        break;
1731    }
1732
1733  switch (mode & 0170000 /* S_IFMT */)
1734    {
1735      case 0120000: /* S_IFLNK */
1736        *symlink_p = svn_tristate_true;
1737        break;
1738
1739      case 0100000: /* S_IFREG */
1740      case 0040000: /* S_IFDIR */
1741        *symlink_p = svn_tristate_false;
1742        break;
1743
1744      default:
1745        /* Ignore unknown values.
1746           (Including those generated by Subversion <= 1.9) */
1747        *symlink_p = svn_tristate_unknown;
1748        break;
1749    }
1750
1751  return SVN_NO_ERROR;
1752}
1753
1754/* Parse the 'old mode ' line of a git extended unidiff. */
1755static svn_error_t *
1756git_old_mode(enum parse_state *new_state, char *line, svn_patch_t *patch,
1757             apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1758{
1759  SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit,
1760                              &patch->old_symlink_bit,
1761                              line + STRLEN_LITERAL("old mode ")));
1762
1763#ifdef SVN_DEBUG
1764  /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */
1765  SVN_ERR_ASSERT(patch->old_executable_bit != svn_tristate_unknown);
1766#endif
1767
1768  *new_state = state_old_mode_seen;
1769  return SVN_NO_ERROR;
1770}
1771
1772/* Parse the 'new mode ' line of a git extended unidiff. */
1773static svn_error_t *
1774git_new_mode(enum parse_state *new_state, char *line, svn_patch_t *patch,
1775             apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1776{
1777  SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1778                              &patch->new_symlink_bit,
1779                              line + STRLEN_LITERAL("new mode ")));
1780
1781#ifdef SVN_DEBUG
1782  /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */
1783  SVN_ERR_ASSERT(patch->new_executable_bit != svn_tristate_unknown);
1784#endif
1785
1786  /* Don't touch patch->operation. */
1787
1788  *new_state = state_git_mode_seen;
1789  return SVN_NO_ERROR;
1790}
1791
1792static svn_error_t *
1793git_index(enum parse_state *new_state, char *line, svn_patch_t *patch,
1794          apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1795{
1796  /* We either have something like "index 33e5b38..0000000" (which we just
1797     ignore as we are not interested in git specific shas) or something like
1798     "index 33e5b38..0000000 120000" which tells us the mode, that isn't
1799     changed by applying this patch.
1800
1801     If the mode would have changed then we would see 'old mode' and 'new mode'
1802     lines.
1803  */
1804  line = strchr(line + STRLEN_LITERAL("index "), ' ');
1805
1806  if (line && patch->new_executable_bit == svn_tristate_unknown
1807           && patch->new_symlink_bit == svn_tristate_unknown
1808           && patch->operation != svn_diff_op_added
1809           && patch->operation != svn_diff_op_deleted)
1810    {
1811      SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1812                                  &patch->new_symlink_bit,
1813                                  line + 1));
1814
1815      /* There is no change.. so set the old values to the new values */
1816      patch->old_executable_bit = patch->new_executable_bit;
1817      patch->old_symlink_bit = patch->new_symlink_bit;
1818    }
1819
1820  /* This function doesn't change the state! */
1821  /* *new_state = *new_state */
1822  return SVN_NO_ERROR;
1823}
1824
1825/* Parse the 'rename from ' line of a git extended unidiff. */
1826static svn_error_t *
1827git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1828              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1829{
1830  SVN_ERR(grab_filename(&patch->old_filename,
1831                        line + STRLEN_LITERAL("rename from "),
1832                        result_pool, scratch_pool));
1833
1834  *new_state = state_move_from_seen;
1835  return SVN_NO_ERROR;
1836}
1837
1838/* Parse the 'rename to ' line of a git extended unidiff. */
1839static svn_error_t *
1840git_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1841            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1842{
1843  SVN_ERR(grab_filename(&patch->new_filename,
1844                        line + STRLEN_LITERAL("rename to "),
1845                        result_pool, scratch_pool));
1846
1847  patch->operation = svn_diff_op_moved;
1848
1849  *new_state = state_git_tree_seen;
1850  return SVN_NO_ERROR;
1851}
1852
1853/* Parse the 'copy from ' line of a git extended unidiff. */
1854static svn_error_t *
1855git_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1856              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1857{
1858  SVN_ERR(grab_filename(&patch->old_filename,
1859                        line + STRLEN_LITERAL("copy from "),
1860                        result_pool, scratch_pool));
1861
1862  *new_state = state_copy_from_seen;
1863  return SVN_NO_ERROR;
1864}
1865
1866/* Parse the 'copy to ' line of a git extended unidiff. */
1867static svn_error_t *
1868git_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1869            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1870{
1871  SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "),
1872                        result_pool, scratch_pool));
1873
1874  patch->operation = svn_diff_op_copied;
1875
1876  *new_state = state_git_tree_seen;
1877  return SVN_NO_ERROR;
1878}
1879
1880/* Parse the 'new file ' line of a git extended unidiff. */
1881static svn_error_t *
1882git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1883             apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1884{
1885  SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1886                              &patch->new_symlink_bit,
1887                              line + STRLEN_LITERAL("new file mode ")));
1888
1889  patch->operation = svn_diff_op_added;
1890
1891  /* Filename already retrieved from diff --git header. */
1892
1893  *new_state = state_git_tree_seen;
1894  return SVN_NO_ERROR;
1895}
1896
1897/* Parse the 'deleted file ' line of a git extended unidiff. */
1898static svn_error_t *
1899git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1900                 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1901{
1902  SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit,
1903                              &patch->old_symlink_bit,
1904                              line + STRLEN_LITERAL("deleted file mode ")));
1905
1906  patch->operation = svn_diff_op_deleted;
1907
1908  /* Filename already retrieved from diff --git header. */
1909
1910  *new_state = state_git_tree_seen;
1911  return SVN_NO_ERROR;
1912}
1913
1914/* Parse the 'GIT binary patch' header */
1915static svn_error_t *
1916binary_patch_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1917             apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1918{
1919  *new_state = state_binary_patch_found;
1920  return SVN_NO_ERROR;
1921}
1922
1923
1924/* Add a HUNK associated with the property PROP_NAME to PATCH. */
1925static svn_error_t *
1926add_property_hunk(svn_patch_t *patch, const char *prop_name,
1927                  svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation,
1928                  apr_pool_t *result_pool)
1929{
1930  svn_prop_patch_t *prop_patch;
1931
1932  prop_patch = svn_hash_gets(patch->prop_patches, prop_name);
1933
1934  if (! prop_patch)
1935    {
1936      prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t));
1937      prop_patch->name = prop_name;
1938      prop_patch->operation = operation;
1939      prop_patch->hunks = apr_array_make(result_pool, 1,
1940                                         sizeof(svn_diff_hunk_t *));
1941
1942      svn_hash_sets(patch->prop_patches, prop_name, prop_patch);
1943    }
1944
1945  APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk;
1946
1947  return SVN_NO_ERROR;
1948}
1949
1950struct svn_patch_file_t
1951{
1952  /* The APR file handle to the patch file. */
1953  apr_file_t *apr_file;
1954
1955  /* The file offset at which the next patch is expected. */
1956  apr_off_t next_patch_offset;
1957};
1958
1959svn_error_t *
1960svn_diff_open_patch_file(svn_patch_file_t **patch_file,
1961                         const char *local_abspath,
1962                         apr_pool_t *result_pool)
1963{
1964  svn_patch_file_t *p;
1965
1966  p = apr_palloc(result_pool, sizeof(*p));
1967  SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath,
1968                           APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
1969                           result_pool));
1970  p->next_patch_offset = 0;
1971  *patch_file = p;
1972
1973  return SVN_NO_ERROR;
1974}
1975
1976/* Parse hunks from APR_FILE and store them in PATCH->HUNKS.
1977 * Parsing stops if no valid next hunk can be found.
1978 * If IGNORE_WHITESPACE is TRUE, lines without
1979 * leading spaces will be treated as context lines.
1980 * Allocate results in RESULT_POOL.
1981 * Use SCRATCH_POOL for temporary allocations. */
1982static svn_error_t *
1983parse_hunks(svn_patch_t *patch, apr_file_t *apr_file,
1984            svn_boolean_t ignore_whitespace,
1985            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1986{
1987  svn_diff_hunk_t *hunk;
1988  svn_boolean_t is_property;
1989  const char *last_prop_name;
1990  const char *prop_name;
1991  svn_diff_operation_kind_t prop_operation;
1992  apr_pool_t *iterpool;
1993
1994  last_prop_name = NULL;
1995
1996  patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *));
1997  patch->prop_patches = apr_hash_make(result_pool);
1998  iterpool = svn_pool_create(scratch_pool);
1999  do
2000    {
2001      svn_pool_clear(iterpool);
2002
2003      SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation,
2004                              patch, apr_file, ignore_whitespace, result_pool,
2005                              iterpool));
2006
2007      if (hunk && is_property)
2008        {
2009          if (! prop_name)
2010            prop_name = last_prop_name;
2011          else
2012            last_prop_name = prop_name;
2013
2014          /* Skip pretty-printed svn:mergeinfo property hunks.
2015           * Pretty-printed mergeinfo data cannot be represented as a hunk and
2016           * is therefore stored in PATCH itself. */
2017          if (hunk->is_pretty_print_mergeinfo)
2018            continue;
2019
2020          SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation,
2021                                    result_pool));
2022        }
2023      else if (hunk)
2024        {
2025          APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk;
2026          last_prop_name = NULL;
2027        }
2028
2029    }
2030  while (hunk);
2031  svn_pool_destroy(iterpool);
2032
2033  return SVN_NO_ERROR;
2034}
2035
2036static svn_error_t *
2037parse_binary_patch(svn_patch_t *patch, apr_file_t *apr_file,
2038                   svn_boolean_t reverse,
2039                   apr_pool_t *result_pool, apr_pool_t *scratch_pool)
2040{
2041  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2042  apr_off_t pos, last_line;
2043  svn_stringbuf_t *line;
2044  svn_boolean_t eof = FALSE;
2045  svn_diff_binary_patch_t *bpatch = apr_pcalloc(result_pool, sizeof(*bpatch));
2046  svn_boolean_t in_blob = FALSE;
2047  svn_boolean_t in_src = FALSE;
2048
2049  bpatch->apr_file = apr_file;
2050
2051  patch->prop_patches = apr_hash_make(result_pool);
2052
2053  SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool));
2054
2055  while (!eof)
2056    {
2057      last_line = pos;
2058      SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
2059                               iterpool, iterpool));
2060
2061      /* Update line offset for next iteration. */
2062      SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool));
2063
2064      if (in_blob)
2065        {
2066          char c = line->data[0];
2067
2068          /* 66 = len byte + (52/4*5) chars */
2069          if (((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
2070              && line->len <= 66
2071              && !strchr(line->data, ':')
2072              && !strchr(line->data, ' '))
2073            {
2074              /* One more blop line */
2075              if (in_src)
2076                bpatch->src_end = pos;
2077              else
2078                bpatch->dst_end = pos;
2079            }
2080          else if (svn_stringbuf_first_non_whitespace(line) < line->len
2081                   && !(in_src && bpatch->src_start < last_line))
2082            {
2083              break; /* Bad patch */
2084            }
2085          else if (in_src)
2086            {
2087              patch->binary_patch = bpatch; /* SUCCESS! */
2088              break;
2089            }
2090          else
2091            {
2092              in_blob = FALSE;
2093              in_src = TRUE;
2094            }
2095        }
2096      else if (starts_with(line->data, "literal "))
2097        {
2098          apr_uint64_t expanded_size;
2099          svn_error_t *err = svn_cstring_strtoui64(&expanded_size,
2100                                                   &line->data[8],
2101                                                   0, APR_UINT64_MAX, 10);
2102
2103          if (err)
2104            {
2105              svn_error_clear(err);
2106              break;
2107            }
2108
2109          if (in_src)
2110            {
2111              bpatch->src_start = pos;
2112              bpatch->src_filesize = expanded_size;
2113            }
2114          else
2115            {
2116              bpatch->dst_start = pos;
2117              bpatch->dst_filesize = expanded_size;
2118            }
2119          in_blob = TRUE;
2120        }
2121      else
2122        break; /* We don't support GIT deltas (yet) */
2123    }
2124  svn_pool_destroy(iterpool);
2125
2126  if (!eof)
2127    /* Rewind to the start of the line just read, so subsequent calls
2128     * don't end up skipping the line. It may contain a patch or hunk header.*/
2129    SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
2130  else if (in_src
2131           && ((bpatch->src_end > bpatch->src_start) || !bpatch->src_filesize))
2132    {
2133      patch->binary_patch = bpatch; /* SUCCESS */
2134    }
2135
2136  /* Reverse patch if requested */
2137  if (reverse && patch->binary_patch)
2138    {
2139      apr_off_t tmp_start = bpatch->src_start;
2140      apr_off_t tmp_end = bpatch->src_end;
2141      svn_filesize_t tmp_filesize = bpatch->src_filesize;
2142
2143      bpatch->src_start = bpatch->dst_start;
2144      bpatch->src_end = bpatch->dst_end;
2145      bpatch->src_filesize = bpatch->dst_filesize;
2146
2147      bpatch->dst_start = tmp_start;
2148      bpatch->dst_end = tmp_end;
2149      bpatch->dst_filesize = tmp_filesize;
2150    }
2151
2152  return SVN_NO_ERROR;
2153}
2154
2155/* State machine for the diff header parser.
2156 * Expected Input   Required state          Function to call */
2157static struct transition transitions[] =
2158{
2159  {"--- ",              state_start,            diff_minus},
2160  {"+++ ",              state_minus_seen,       diff_plus},
2161
2162  {"diff --git",        state_start,            git_start},
2163  {"--- a/",            state_git_diff_seen,    git_minus},
2164  {"--- a/",            state_git_mode_seen,    git_minus},
2165  {"--- a/",            state_git_tree_seen,    git_minus},
2166  {"--- /dev/null",     state_git_mode_seen,    git_minus},
2167  {"--- /dev/null",     state_git_tree_seen,    git_minus},
2168  {"+++ b/",            state_git_minus_seen,   git_plus},
2169  {"+++ /dev/null",     state_git_minus_seen,   git_plus},
2170
2171  {"old mode ",         state_git_diff_seen,    git_old_mode},
2172  {"new mode ",         state_old_mode_seen,    git_new_mode},
2173
2174  {"rename from ",      state_git_diff_seen,    git_move_from},
2175  {"rename from ",      state_git_mode_seen,    git_move_from},
2176  {"rename to ",        state_move_from_seen,   git_move_to},
2177
2178  {"copy from ",        state_git_diff_seen,    git_copy_from},
2179  {"copy from ",        state_git_mode_seen,    git_copy_from},
2180  {"copy to ",          state_copy_from_seen,   git_copy_to},
2181
2182  {"new file ",         state_git_diff_seen,    git_new_file},
2183
2184  {"deleted file ",     state_git_diff_seen,    git_deleted_file},
2185
2186  {"index ",            state_git_diff_seen,    git_index},
2187  {"index ",            state_git_tree_seen,    git_index},
2188  {"index ",            state_git_mode_seen,    git_index},
2189
2190  {"GIT binary patch",  state_git_diff_seen,    binary_patch_start},
2191  {"GIT binary patch",  state_git_tree_seen,    binary_patch_start},
2192  {"GIT binary patch",  state_git_mode_seen,    binary_patch_start},
2193};
2194
2195svn_error_t *
2196svn_diff_parse_next_patch(svn_patch_t **patch_p,
2197                          svn_patch_file_t *patch_file,
2198                          svn_boolean_t reverse,
2199                          svn_boolean_t ignore_whitespace,
2200                          apr_pool_t *result_pool,
2201                          apr_pool_t *scratch_pool)
2202{
2203  apr_off_t pos, last_line;
2204  svn_boolean_t eof;
2205  svn_boolean_t line_after_tree_header_read = FALSE;
2206  apr_pool_t *iterpool;
2207  svn_patch_t *patch;
2208  enum parse_state state = state_start;
2209
2210  if (apr_file_eof(patch_file->apr_file) == APR_EOF)
2211    {
2212      /* No more patches here. */
2213      *patch_p = NULL;
2214      return SVN_NO_ERROR;
2215    }
2216
2217  patch = apr_pcalloc(result_pool, sizeof(*patch));
2218  patch->old_executable_bit = svn_tristate_unknown;
2219  patch->new_executable_bit = svn_tristate_unknown;
2220  patch->old_symlink_bit = svn_tristate_unknown;
2221  patch->new_symlink_bit = svn_tristate_unknown;
2222
2223  pos = patch_file->next_patch_offset;
2224  SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool));
2225
2226  iterpool = svn_pool_create(scratch_pool);
2227  do
2228    {
2229      svn_stringbuf_t *line;
2230      svn_boolean_t valid_header_line = FALSE;
2231      int i;
2232
2233      svn_pool_clear(iterpool);
2234
2235      /* Remember the current line's offset, and read the line. */
2236      last_line = pos;
2237      SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof,
2238                                   APR_SIZE_MAX, iterpool, iterpool));
2239
2240      if (! eof)
2241        {
2242          /* Update line offset for next iteration. */
2243          SVN_ERR(svn_io_file_get_offset(&pos, patch_file->apr_file,
2244                                         iterpool));
2245        }
2246
2247      /* Run the state machine. */
2248      for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++)
2249        {
2250          if (starts_with(line->data, transitions[i].expected_input)
2251              && state == transitions[i].required_state)
2252            {
2253              SVN_ERR(transitions[i].fn(&state, line->data, patch,
2254                                        result_pool, iterpool));
2255              valid_header_line = TRUE;
2256              break;
2257            }
2258        }
2259
2260      if (state == state_unidiff_found
2261          || state == state_git_header_found
2262          || state == state_binary_patch_found)
2263        {
2264          /* We have a valid diff header, yay! */
2265          break;
2266        }
2267      else if ((state == state_git_tree_seen || state == state_git_mode_seen)
2268               && line_after_tree_header_read
2269               && !valid_header_line)
2270        {
2271          /* We have a valid diff header for a patch with only tree changes.
2272           * Rewind to the start of the line just read, so subsequent calls
2273           * to this function don't end up skipping the line -- it may
2274           * contain a patch. */
2275          SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
2276                                   scratch_pool));
2277          break;
2278        }
2279      else if (state == state_git_tree_seen
2280               || state == state_git_mode_seen)
2281        {
2282          line_after_tree_header_read = TRUE;
2283        }
2284      else if (! valid_header_line && state != state_start
2285               && state != state_git_diff_seen)
2286        {
2287          /* We've encountered an invalid diff header.
2288           *
2289           * Rewind to the start of the line just read - it may be a new
2290           * header that begins there. */
2291          SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
2292                                   scratch_pool));
2293          state = state_start;
2294        }
2295
2296    }
2297  while (! eof);
2298
2299  patch->reverse = reverse;
2300  if (reverse)
2301    {
2302      const char *temp;
2303      svn_tristate_t ts_tmp;
2304
2305      temp = patch->old_filename;
2306      patch->old_filename = patch->new_filename;
2307      patch->new_filename = temp;
2308
2309      switch (patch->operation)
2310        {
2311          case svn_diff_op_added:
2312            patch->operation = svn_diff_op_deleted;
2313            break;
2314          case svn_diff_op_deleted:
2315            patch->operation = svn_diff_op_added;
2316            break;
2317
2318          case svn_diff_op_modified:
2319            break; /* Stays modified. */
2320
2321          case svn_diff_op_copied:
2322          case svn_diff_op_moved:
2323            break; /* Stays copied or moved, just in the other direction. */
2324          case svn_diff_op_unchanged:
2325            break; /* Stays unchanged, of course. */
2326        }
2327
2328      ts_tmp = patch->old_executable_bit;
2329      patch->old_executable_bit = patch->new_executable_bit;
2330      patch->new_executable_bit = ts_tmp;
2331
2332      ts_tmp = patch->old_symlink_bit;
2333      patch->old_symlink_bit = patch->new_symlink_bit;
2334      patch->new_symlink_bit = ts_tmp;
2335    }
2336
2337  if (patch->old_filename == NULL || patch->new_filename == NULL)
2338    {
2339      /* Something went wrong, just discard the result. */
2340      patch = NULL;
2341    }
2342  else
2343    {
2344      if (state == state_binary_patch_found)
2345        {
2346          SVN_ERR(parse_binary_patch(patch, patch_file->apr_file, reverse,
2347                                     result_pool, iterpool));
2348          /* And fall through in property parsing */
2349        }
2350
2351      SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace,
2352                          result_pool, iterpool));
2353    }
2354
2355  svn_pool_destroy(iterpool);
2356
2357  SVN_ERR(svn_io_file_get_offset(&patch_file->next_patch_offset,
2358                                 patch_file->apr_file, scratch_pool));
2359
2360  if (patch && patch->hunks)
2361    {
2362      /* Usually, hunks appear in the patch sorted by their original line
2363       * offset. But just in case they weren't parsed in this order for
2364       * some reason, we sort them so that our caller can assume that hunks
2365       * are sorted as if parsed from a usual patch. */
2366      svn_sort__array(patch->hunks, compare_hunks);
2367    }
2368
2369  *patch_p = patch;
2370  return SVN_NO_ERROR;
2371}
2372
2373svn_error_t *
2374svn_diff_close_patch_file(svn_patch_file_t *patch_file,
2375                          apr_pool_t *scratch_pool)
2376{
2377  return svn_error_trace(svn_io_file_close(patch_file->apr_file,
2378                                           scratch_pool));
2379}
2380