util.c revision 289166
1/*
2 * util.c :  routines for doing diffs
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <apr.h>
26#include <apr_general.h>
27
28#include "svn_hash.h"
29#include "svn_pools.h"
30#include "svn_dirent_uri.h"
31#include "svn_props.h"
32#include "svn_mergeinfo.h"
33#include "svn_error.h"
34#include "svn_diff.h"
35#include "svn_types.h"
36#include "svn_ctype.h"
37#include "svn_sorts.h"
38#include "svn_utf.h"
39#include "svn_version.h"
40
41#include "private/svn_diff_private.h"
42#include "diff.h"
43
44#include "svn_private_config.h"
45
46
47svn_boolean_t
48svn_diff_contains_conflicts(svn_diff_t *diff)
49{
50  while (diff != NULL)
51    {
52      if (diff->type == svn_diff__type_conflict)
53        {
54          return TRUE;
55        }
56
57      diff = diff->next;
58    }
59
60  return FALSE;
61}
62
63svn_boolean_t
64svn_diff_contains_diffs(svn_diff_t *diff)
65{
66  while (diff != NULL)
67    {
68      if (diff->type != svn_diff__type_common)
69        {
70          return TRUE;
71        }
72
73      diff = diff->next;
74    }
75
76  return FALSE;
77}
78
79svn_error_t *
80svn_diff_output(svn_diff_t *diff,
81                void *output_baton,
82                const svn_diff_output_fns_t *vtable)
83{
84  svn_error_t *(*output_fn)(void *,
85                            apr_off_t, apr_off_t,
86                            apr_off_t, apr_off_t,
87                            apr_off_t, apr_off_t);
88
89  while (diff != NULL)
90    {
91      switch (diff->type)
92        {
93        case svn_diff__type_common:
94          output_fn = vtable->output_common;
95          break;
96
97        case svn_diff__type_diff_common:
98          output_fn = vtable->output_diff_common;
99          break;
100
101        case svn_diff__type_diff_modified:
102          output_fn = vtable->output_diff_modified;
103          break;
104
105        case svn_diff__type_diff_latest:
106          output_fn = vtable->output_diff_latest;
107          break;
108
109        case svn_diff__type_conflict:
110          output_fn = NULL;
111          if (vtable->output_conflict != NULL)
112            {
113              SVN_ERR(vtable->output_conflict(output_baton,
114                               diff->original_start, diff->original_length,
115                               diff->modified_start, diff->modified_length,
116                               diff->latest_start, diff->latest_length,
117                               diff->resolved_diff));
118            }
119          break;
120
121        default:
122          output_fn = NULL;
123          break;
124        }
125
126      if (output_fn != NULL)
127        {
128          SVN_ERR(output_fn(output_baton,
129                            diff->original_start, diff->original_length,
130                            diff->modified_start, diff->modified_length,
131                            diff->latest_start, diff->latest_length));
132        }
133
134      diff = diff->next;
135    }
136
137  return SVN_NO_ERROR;
138}
139
140
141void
142svn_diff__normalize_buffer(char **tgt,
143                           apr_off_t *lengthp,
144                           svn_diff__normalize_state_t *statep,
145                           const char *buf,
146                           const svn_diff_file_options_t *opts)
147{
148  /* Variables for looping through BUF */
149  const char *curp, *endp;
150
151  /* Variable to record normalizing state */
152  svn_diff__normalize_state_t state = *statep;
153
154  /* Variables to track what needs copying into the target buffer */
155  const char *start = buf;
156  apr_size_t include_len = 0;
157  svn_boolean_t last_skipped = FALSE; /* makes sure we set 'start' */
158
159  /* Variable to record the state of the target buffer */
160  char *tgt_newend = *tgt;
161
162  /* If this is a noop, then just get out of here. */
163  if (! opts->ignore_space && ! opts->ignore_eol_style)
164    {
165      *tgt = (char *)buf;
166      return;
167    }
168
169
170  /* It only took me forever to get this routine right,
171     so here my thoughts go:
172
173    Below, we loop through the data, doing 2 things:
174
175     - Normalizing
176     - Copying other data
177
178     The routine tries its hardest *not* to copy data, but instead
179     returning a pointer into already normalized existing data.
180
181     To this end, a block 'other data' shouldn't be copied when found,
182     but only as soon as it can't be returned in-place.
183
184     On a character level, there are 3 possible operations:
185
186     - Skip the character (don't include in the normalized data)
187     - Include the character (do include in the normalizad data)
188     - Include as another character
189       This is essentially the same as skipping the current character
190       and inserting a given character in the output data.
191
192    The macros below (SKIP, INCLUDE and INCLUDE_AS) are defined to
193    handle the character based operations.  The macros themselves
194    collect character level data into blocks.
195
196    At all times designate the START, INCLUDED_LEN and CURP pointers
197    an included and and skipped block like this:
198
199      [ start, start + included_len ) [ start + included_len, curp )
200             INCLUDED                        EXCLUDED
201
202    When the routine flips from skipping to including, the last
203    included block has to be flushed to the output buffer.
204  */
205
206  /* Going from including to skipping; only schedules the current
207     included section for flushing.
208     Also, simply chop off the character if it's the first in the buffer,
209     so we can possibly just return the remainder of the buffer */
210#define SKIP             \
211  do {                   \
212    if (start == curp)   \
213       ++start;          \
214    last_skipped = TRUE; \
215  } while (0)
216
217#define INCLUDE                \
218  do {                         \
219    if (last_skipped)          \
220      COPY_INCLUDED_SECTION;   \
221    ++include_len;             \
222    last_skipped = FALSE;      \
223  } while (0)
224
225#define COPY_INCLUDED_SECTION                     \
226  do {                                            \
227    if (include_len > 0)                          \
228      {                                           \
229         memmove(tgt_newend, start, include_len); \
230         tgt_newend += include_len;               \
231         include_len = 0;                         \
232      }                                           \
233    start = curp;                                 \
234  } while (0)
235
236  /* Include the current character as character X.
237     If the current character already *is* X, add it to the
238     currently included region, increasing chances for consecutive
239     fully normalized blocks. */
240#define INCLUDE_AS(x)          \
241  do {                         \
242    if (*curp == (x))          \
243      INCLUDE;                 \
244    else                       \
245      {                        \
246        INSERT((x));           \
247        SKIP;                  \
248      }                        \
249  } while (0)
250
251  /* Insert character X in the output buffer */
252#define INSERT(x)              \
253  do {                         \
254    COPY_INCLUDED_SECTION;     \
255    *tgt_newend++ = (x);       \
256  } while (0)
257
258  for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp)
259    {
260      switch (*curp)
261        {
262        case '\r':
263          if (opts->ignore_eol_style)
264            INCLUDE_AS('\n');
265          else
266            INCLUDE;
267          state = svn_diff__normalize_state_cr;
268          break;
269
270        case '\n':
271          if (state == svn_diff__normalize_state_cr
272              && opts->ignore_eol_style)
273            SKIP;
274          else
275            INCLUDE;
276          state = svn_diff__normalize_state_normal;
277          break;
278
279        default:
280          if (svn_ctype_isspace(*curp)
281              && opts->ignore_space != svn_diff_file_ignore_space_none)
282            {
283              /* Whitespace but not '\r' or '\n' */
284              if (state != svn_diff__normalize_state_whitespace
285                  && opts->ignore_space
286                     == svn_diff_file_ignore_space_change)
287                /*### If we can postpone insertion of the space
288                  until the next non-whitespace character,
289                  we have a potential of reducing the number of copies:
290                  If this space is followed by more spaces,
291                  this will cause a block-copy.
292                  If the next non-space block is considered normalized
293                  *and* preceded by a space, we can take advantage of that. */
294                /* Note, the above optimization applies to 90% of the source
295                   lines in our own code, since it (generally) doesn't use
296                   more than one space per blank section, except for the
297                   beginning of a line. */
298                INCLUDE_AS(' ');
299              else
300                SKIP;
301              state = svn_diff__normalize_state_whitespace;
302            }
303          else
304            {
305              /* Non-whitespace character, or whitespace character in
306                 svn_diff_file_ignore_space_none mode. */
307              INCLUDE;
308              state = svn_diff__normalize_state_normal;
309            }
310        }
311    }
312
313  /* If we're not in whitespace, flush the last chunk of data.
314   * Note that this will work correctly when this is the last chunk of the
315   * file:
316   * * If there is an eol, it will either have been output when we entered
317   *   the state_cr, or it will be output now.
318   * * If there is no eol and we're not in whitespace, then we just output
319   *   everything below.
320   * * If there's no eol and we are in whitespace, we want to ignore
321   *   whitespace unconditionally. */
322
323  if (*tgt == tgt_newend)
324    {
325      /* we haven't copied any data in to *tgt and our chunk consists
326         only of one block of (already normalized) data.
327         Just return the block. */
328      *tgt = (char *)start;
329      *lengthp = include_len;
330    }
331  else
332    {
333      COPY_INCLUDED_SECTION;
334      *lengthp = tgt_newend - *tgt;
335    }
336
337  *statep = state;
338
339#undef SKIP
340#undef INCLUDE
341#undef INCLUDE_AS
342#undef INSERT
343#undef COPY_INCLUDED_SECTION
344}
345
346svn_error_t *
347svn_diff__unified_append_no_newline_msg(svn_stringbuf_t *stringbuf,
348                                        const char *header_encoding,
349                                        apr_pool_t *scratch_pool)
350{
351  const char *out_str;
352
353  SVN_ERR(svn_utf_cstring_from_utf8_ex2(
354            &out_str,
355            APR_EOL_STR
356            SVN_DIFF__NO_NEWLINE_AT_END_OF_FILE APR_EOL_STR,
357            header_encoding, scratch_pool));
358  svn_stringbuf_appendcstr(stringbuf, out_str);
359  return SVN_NO_ERROR;
360}
361
362svn_error_t *
363svn_diff__unified_write_hunk_header(svn_stream_t *output_stream,
364                                    const char *header_encoding,
365                                    const char *hunk_delimiter,
366                                    apr_off_t old_start,
367                                    apr_off_t old_length,
368                                    apr_off_t new_start,
369                                    apr_off_t new_length,
370                                    const char *hunk_extra_context,
371                                    apr_pool_t *scratch_pool)
372{
373  SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
374                                      scratch_pool,
375                                      "%s -%" APR_OFF_T_FMT,
376                                      hunk_delimiter, old_start));
377  /* If the hunk length is 1, suppress the number of lines in the hunk
378   * (it is 1 implicitly) */
379  if (old_length != 1)
380    {
381      SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
382                                          scratch_pool,
383                                          ",%" APR_OFF_T_FMT, old_length));
384    }
385
386  SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
387                                      scratch_pool,
388                                      " +%" APR_OFF_T_FMT, new_start));
389  if (new_length != 1)
390    {
391      SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
392                                          scratch_pool,
393                                          ",%" APR_OFF_T_FMT, new_length));
394    }
395
396  if (hunk_extra_context == NULL)
397      hunk_extra_context = "";
398  SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
399                                      scratch_pool,
400                                      " %s%s%s" APR_EOL_STR,
401                                      hunk_delimiter,
402                                      hunk_extra_context[0] ? " " : "",
403                                      hunk_extra_context));
404  return SVN_NO_ERROR;
405}
406
407svn_error_t *
408svn_diff__unidiff_write_header(svn_stream_t *output_stream,
409                               const char *header_encoding,
410                               const char *old_header,
411                               const char *new_header,
412                               apr_pool_t *scratch_pool)
413{
414  SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
415                                      scratch_pool,
416                                      "--- %s" APR_EOL_STR
417                                      "+++ %s" APR_EOL_STR,
418                                      old_header,
419                                      new_header));
420  return SVN_NO_ERROR;
421}
422
423/* A helper function for display_prop_diffs.  Output the differences between
424   the mergeinfo stored in ORIG_MERGEINFO_VAL and NEW_MERGEINFO_VAL in a
425   human-readable form to OUTSTREAM, using ENCODING.  Use POOL for temporary
426   allocations. */
427static svn_error_t *
428display_mergeinfo_diff(const char *old_mergeinfo_val,
429                       const char *new_mergeinfo_val,
430                       const char *encoding,
431                       svn_stream_t *outstream,
432                       apr_pool_t *pool)
433{
434  apr_hash_t *old_mergeinfo_hash, *new_mergeinfo_hash, *added, *deleted;
435  apr_pool_t *iterpool = svn_pool_create(pool);
436  apr_hash_index_t *hi;
437
438  if (old_mergeinfo_val)
439    SVN_ERR(svn_mergeinfo_parse(&old_mergeinfo_hash, old_mergeinfo_val, pool));
440  else
441    old_mergeinfo_hash = NULL;
442
443  if (new_mergeinfo_val)
444    SVN_ERR(svn_mergeinfo_parse(&new_mergeinfo_hash, new_mergeinfo_val, pool));
445  else
446    new_mergeinfo_hash = NULL;
447
448  SVN_ERR(svn_mergeinfo_diff2(&deleted, &added, old_mergeinfo_hash,
449                              new_mergeinfo_hash,
450                              TRUE, pool, pool));
451
452  for (hi = apr_hash_first(pool, deleted);
453       hi; hi = apr_hash_next(hi))
454    {
455      const char *from_path = svn__apr_hash_index_key(hi);
456      svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi);
457      svn_string_t *merge_revstr;
458
459      svn_pool_clear(iterpool);
460      SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
461                                      iterpool));
462
463      SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
464                                          _("   Reverse-merged %s:r%s%s"),
465                                          from_path, merge_revstr->data,
466                                          APR_EOL_STR));
467    }
468
469  for (hi = apr_hash_first(pool, added);
470       hi; hi = apr_hash_next(hi))
471    {
472      const char *from_path = svn__apr_hash_index_key(hi);
473      svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi);
474      svn_string_t *merge_revstr;
475
476      svn_pool_clear(iterpool);
477      SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
478                                      iterpool));
479
480      SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
481                                          _("   Merged %s:r%s%s"),
482                                          from_path, merge_revstr->data,
483                                          APR_EOL_STR));
484    }
485
486  svn_pool_destroy(iterpool);
487  return SVN_NO_ERROR;
488}
489
490/* qsort callback handling svn_prop_t by name */
491static int
492propchange_sort(const void *k1, const void *k2)
493{
494  const svn_prop_t *propchange1 = k1;
495  const svn_prop_t *propchange2 = k2;
496
497  return strcmp(propchange1->name, propchange2->name);
498}
499
500svn_error_t *
501svn_diff__display_prop_diffs(svn_stream_t *outstream,
502                             const char *encoding,
503                             const apr_array_header_t *propchanges,
504                             apr_hash_t *original_props,
505                             svn_boolean_t pretty_print_mergeinfo,
506                             apr_pool_t *scratch_pool)
507{
508  apr_pool_t *pool = scratch_pool;
509  apr_pool_t *iterpool = svn_pool_create(pool);
510  apr_array_header_t *changes = apr_array_copy(scratch_pool, propchanges);
511  int i;
512
513  qsort(changes->elts, changes->nelts, changes->elt_size, propchange_sort);
514
515  for (i = 0; i < changes->nelts; i++)
516    {
517      const char *action;
518      const svn_string_t *original_value;
519      const svn_prop_t *propchange
520        = &APR_ARRAY_IDX(changes, i, svn_prop_t);
521
522      if (original_props)
523        original_value = svn_hash_gets(original_props, propchange->name);
524      else
525        original_value = NULL;
526
527      /* If the property doesn't exist on either side, or if it exists
528         with the same value, skip it.  This can happen if the client is
529         hitting an old mod_dav_svn server that doesn't understand the
530         "send-all" REPORT style. */
531      if ((! (original_value || propchange->value))
532          || (original_value && propchange->value
533              && svn_string_compare(original_value, propchange->value)))
534        continue;
535
536      svn_pool_clear(iterpool);
537
538      if (! original_value)
539        action = "Added";
540      else if (! propchange->value)
541        action = "Deleted";
542      else
543        action = "Modified";
544      SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
545                                          "%s: %s%s", action,
546                                          propchange->name, APR_EOL_STR));
547
548      if (pretty_print_mergeinfo
549          && strcmp(propchange->name, SVN_PROP_MERGEINFO) == 0)
550        {
551          const char *orig = original_value ? original_value->data : NULL;
552          const char *val = propchange->value ? propchange->value->data : NULL;
553          svn_error_t *err = display_mergeinfo_diff(orig, val, encoding,
554                                                    outstream, iterpool);
555
556          /* Issue #3896: If we can't pretty-print mergeinfo differences
557             because invalid mergeinfo is present, then don't let the diff
558             fail, just print the diff as any other property. */
559          if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
560            {
561              svn_error_clear(err);
562            }
563          else
564            {
565              SVN_ERR(err);
566              continue;
567            }
568        }
569
570      {
571        svn_diff_t *diff;
572        svn_diff_file_options_t options = { 0 };
573        const svn_string_t *orig
574          = original_value ? original_value
575                           : svn_string_create_empty(iterpool);
576        const svn_string_t *val
577          = propchange->value ? propchange->value
578                              : svn_string_create_empty(iterpool);
579
580        SVN_ERR(svn_diff_mem_string_diff(&diff, orig, val, &options,
581                                         iterpool));
582
583        /* UNIX patch will try to apply a diff even if the diff header
584         * is missing. It tries to be helpful by asking the user for a
585         * target filename when it can't determine the target filename
586         * from the diff header. But there usually are no files which
587         * UNIX patch could apply the property diff to, so we use "##"
588         * instead of "@@" as the default hunk delimiter for property diffs.
589         * We also supress the diff header. */
590        SVN_ERR(svn_diff_mem_string_output_unified2(
591                  outstream, diff, FALSE /* no header */, "##", NULL, NULL,
592                  encoding, orig, val, iterpool));
593      }
594    }
595  svn_pool_destroy(iterpool);
596
597  return SVN_NO_ERROR;
598}
599
600
601/* Return the library version number. */
602const svn_version_t *
603svn_diff_version(void)
604{
605  SVN_VERSION_BODY;
606}
607