util.c revision 299742
1/*
2 * util.c :  routines for doing diffs
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <apr.h>
26#include <apr_general.h>
27
28#include "svn_hash.h"
29#include "svn_pools.h"
30#include "svn_dirent_uri.h"
31#include "svn_props.h"
32#include "svn_mergeinfo.h"
33#include "svn_error.h"
34#include "svn_diff.h"
35#include "svn_types.h"
36#include "svn_ctype.h"
37#include "svn_utf.h"
38#include "svn_version.h"
39
40#include "private/svn_diff_private.h"
41#include "private/svn_sorts_private.h"
42#include "diff.h"
43
44#include "svn_private_config.h"
45
46
47svn_boolean_t
48svn_diff_contains_conflicts(svn_diff_t *diff)
49{
50  while (diff != NULL)
51    {
52      if (diff->type == svn_diff__type_conflict)
53        {
54          return TRUE;
55        }
56
57      diff = diff->next;
58    }
59
60  return FALSE;
61}
62
63svn_boolean_t
64svn_diff_contains_diffs(svn_diff_t *diff)
65{
66  while (diff != NULL)
67    {
68      if (diff->type != svn_diff__type_common)
69        {
70          return TRUE;
71        }
72
73      diff = diff->next;
74    }
75
76  return FALSE;
77}
78
79svn_error_t *
80svn_diff_output2(svn_diff_t *diff,
81                 void *output_baton,
82                 const svn_diff_output_fns_t *vtable,
83                 svn_cancel_func_t cancel_func,
84                 void *cancel_baton)
85{
86  svn_error_t *(*output_fn)(void *,
87                            apr_off_t, apr_off_t,
88                            apr_off_t, apr_off_t,
89                            apr_off_t, apr_off_t);
90
91  while (diff != NULL)
92    {
93      if (cancel_func)
94        SVN_ERR(cancel_func(cancel_baton));
95
96      switch (diff->type)
97        {
98        case svn_diff__type_common:
99          output_fn = vtable->output_common;
100          break;
101
102        case svn_diff__type_diff_common:
103          output_fn = vtable->output_diff_common;
104          break;
105
106        case svn_diff__type_diff_modified:
107          output_fn = vtable->output_diff_modified;
108          break;
109
110        case svn_diff__type_diff_latest:
111          output_fn = vtable->output_diff_latest;
112          break;
113
114        case svn_diff__type_conflict:
115          output_fn = NULL;
116          if (vtable->output_conflict != NULL)
117            {
118              SVN_ERR(vtable->output_conflict(output_baton,
119                               diff->original_start, diff->original_length,
120                               diff->modified_start, diff->modified_length,
121                               diff->latest_start, diff->latest_length,
122                               diff->resolved_diff));
123            }
124          break;
125
126        default:
127          output_fn = NULL;
128          break;
129        }
130
131      if (output_fn != NULL)
132        {
133          SVN_ERR(output_fn(output_baton,
134                            diff->original_start, diff->original_length,
135                            diff->modified_start, diff->modified_length,
136                            diff->latest_start, diff->latest_length));
137        }
138
139      diff = diff->next;
140    }
141
142  return SVN_NO_ERROR;
143}
144
145
146void
147svn_diff__normalize_buffer(char **tgt,
148                           apr_off_t *lengthp,
149                           svn_diff__normalize_state_t *statep,
150                           const char *buf,
151                           const svn_diff_file_options_t *opts)
152{
153  /* Variables for looping through BUF */
154  const char *curp, *endp;
155
156  /* Variable to record normalizing state */
157  svn_diff__normalize_state_t state = *statep;
158
159  /* Variables to track what needs copying into the target buffer */
160  const char *start = buf;
161  apr_size_t include_len = 0;
162  svn_boolean_t last_skipped = FALSE; /* makes sure we set 'start' */
163
164  /* Variable to record the state of the target buffer */
165  char *tgt_newend = *tgt;
166
167  /* If this is a noop, then just get out of here. */
168  if (! opts->ignore_space && ! opts->ignore_eol_style)
169    {
170      *tgt = (char *)buf;
171      return;
172    }
173
174
175  /* It only took me forever to get this routine right,
176     so here my thoughts go:
177
178    Below, we loop through the data, doing 2 things:
179
180     - Normalizing
181     - Copying other data
182
183     The routine tries its hardest *not* to copy data, but instead
184     returning a pointer into already normalized existing data.
185
186     To this end, a block 'other data' shouldn't be copied when found,
187     but only as soon as it can't be returned in-place.
188
189     On a character level, there are 3 possible operations:
190
191     - Skip the character (don't include in the normalized data)
192     - Include the character (do include in the normalizad data)
193     - Include as another character
194       This is essentially the same as skipping the current character
195       and inserting a given character in the output data.
196
197    The macros below (SKIP, INCLUDE and INCLUDE_AS) are defined to
198    handle the character based operations.  The macros themselves
199    collect character level data into blocks.
200
201    At all times designate the START, INCLUDED_LEN and CURP pointers
202    an included and and skipped block like this:
203
204      [ start, start + included_len ) [ start + included_len, curp )
205             INCLUDED                        EXCLUDED
206
207    When the routine flips from skipping to including, the last
208    included block has to be flushed to the output buffer.
209  */
210
211  /* Going from including to skipping; only schedules the current
212     included section for flushing.
213     Also, simply chop off the character if it's the first in the buffer,
214     so we can possibly just return the remainder of the buffer */
215#define SKIP             \
216  do {                   \
217    if (start == curp)   \
218       ++start;          \
219    last_skipped = TRUE; \
220  } while (0)
221
222#define INCLUDE                \
223  do {                         \
224    if (last_skipped)          \
225      COPY_INCLUDED_SECTION;   \
226    ++include_len;             \
227    last_skipped = FALSE;      \
228  } while (0)
229
230#define COPY_INCLUDED_SECTION                     \
231  do {                                            \
232    if (include_len > 0)                          \
233      {                                           \
234         memmove(tgt_newend, start, include_len); \
235         tgt_newend += include_len;               \
236         include_len = 0;                         \
237      }                                           \
238    start = curp;                                 \
239  } while (0)
240
241  /* Include the current character as character X.
242     If the current character already *is* X, add it to the
243     currently included region, increasing chances for consecutive
244     fully normalized blocks. */
245#define INCLUDE_AS(x)          \
246  do {                         \
247    if (*curp == (x))          \
248      INCLUDE;                 \
249    else                       \
250      {                        \
251        INSERT((x));           \
252        SKIP;                  \
253      }                        \
254  } while (0)
255
256  /* Insert character X in the output buffer */
257#define INSERT(x)              \
258  do {                         \
259    COPY_INCLUDED_SECTION;     \
260    *tgt_newend++ = (x);       \
261  } while (0)
262
263  for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp)
264    {
265      switch (*curp)
266        {
267        case '\r':
268          if (opts->ignore_eol_style)
269            INCLUDE_AS('\n');
270          else
271            INCLUDE;
272          state = svn_diff__normalize_state_cr;
273          break;
274
275        case '\n':
276          if (state == svn_diff__normalize_state_cr
277              && opts->ignore_eol_style)
278            SKIP;
279          else
280            INCLUDE;
281          state = svn_diff__normalize_state_normal;
282          break;
283
284        default:
285          if (svn_ctype_isspace(*curp)
286              && opts->ignore_space != svn_diff_file_ignore_space_none)
287            {
288              /* Whitespace but not '\r' or '\n' */
289              if (state != svn_diff__normalize_state_whitespace
290                  && opts->ignore_space
291                     == svn_diff_file_ignore_space_change)
292                /*### If we can postpone insertion of the space
293                  until the next non-whitespace character,
294                  we have a potential of reducing the number of copies:
295                  If this space is followed by more spaces,
296                  this will cause a block-copy.
297                  If the next non-space block is considered normalized
298                  *and* preceded by a space, we can take advantage of that. */
299                /* Note, the above optimization applies to 90% of the source
300                   lines in our own code, since it (generally) doesn't use
301                   more than one space per blank section, except for the
302                   beginning of a line. */
303                INCLUDE_AS(' ');
304              else
305                SKIP;
306              state = svn_diff__normalize_state_whitespace;
307            }
308          else
309            {
310              /* Non-whitespace character, or whitespace character in
311                 svn_diff_file_ignore_space_none mode. */
312              INCLUDE;
313              state = svn_diff__normalize_state_normal;
314            }
315        }
316    }
317
318  /* If we're not in whitespace, flush the last chunk of data.
319   * Note that this will work correctly when this is the last chunk of the
320   * file:
321   * * If there is an eol, it will either have been output when we entered
322   *   the state_cr, or it will be output now.
323   * * If there is no eol and we're not in whitespace, then we just output
324   *   everything below.
325   * * If there's no eol and we are in whitespace, we want to ignore
326   *   whitespace unconditionally. */
327
328  if (*tgt == tgt_newend)
329    {
330      /* we haven't copied any data in to *tgt and our chunk consists
331         only of one block of (already normalized) data.
332         Just return the block. */
333      *tgt = (char *)start;
334      *lengthp = include_len;
335    }
336  else
337    {
338      COPY_INCLUDED_SECTION;
339      *lengthp = tgt_newend - *tgt;
340    }
341
342  *statep = state;
343
344#undef SKIP
345#undef INCLUDE
346#undef INCLUDE_AS
347#undef INSERT
348#undef COPY_INCLUDED_SECTION
349}
350
351svn_error_t *
352svn_diff__unified_append_no_newline_msg(svn_stringbuf_t *stringbuf,
353                                        const char *header_encoding,
354                                        apr_pool_t *scratch_pool)
355{
356  const char *out_str;
357
358  SVN_ERR(svn_utf_cstring_from_utf8_ex2(
359            &out_str,
360            APR_EOL_STR
361            SVN_DIFF__NO_NEWLINE_AT_END_OF_FILE APR_EOL_STR,
362            header_encoding, scratch_pool));
363  svn_stringbuf_appendcstr(stringbuf, out_str);
364  return SVN_NO_ERROR;
365}
366
367svn_error_t *
368svn_diff__unified_write_hunk_header(svn_stream_t *output_stream,
369                                    const char *header_encoding,
370                                    const char *hunk_delimiter,
371                                    apr_off_t old_start,
372                                    apr_off_t old_length,
373                                    apr_off_t new_start,
374                                    apr_off_t new_length,
375                                    const char *hunk_extra_context,
376                                    apr_pool_t *scratch_pool)
377{
378  SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
379                                      scratch_pool,
380                                      "%s -%" APR_OFF_T_FMT,
381                                      hunk_delimiter, old_start));
382  /* If the hunk length is 1, suppress the number of lines in the hunk
383   * (it is 1 implicitly) */
384  if (old_length != 1)
385    {
386      SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
387                                          scratch_pool,
388                                          ",%" APR_OFF_T_FMT, old_length));
389    }
390
391  SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
392                                      scratch_pool,
393                                      " +%" APR_OFF_T_FMT, new_start));
394  if (new_length != 1)
395    {
396      SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
397                                          scratch_pool,
398                                          ",%" APR_OFF_T_FMT, new_length));
399    }
400
401  if (hunk_extra_context == NULL)
402      hunk_extra_context = "";
403  SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
404                                      scratch_pool,
405                                      " %s%s%s" APR_EOL_STR,
406                                      hunk_delimiter,
407                                      hunk_extra_context[0] ? " " : "",
408                                      hunk_extra_context));
409  return SVN_NO_ERROR;
410}
411
412svn_error_t *
413svn_diff__unidiff_write_header(svn_stream_t *output_stream,
414                               const char *header_encoding,
415                               const char *old_header,
416                               const char *new_header,
417                               apr_pool_t *scratch_pool)
418{
419  SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
420                                      scratch_pool,
421                                      "--- %s" APR_EOL_STR
422                                      "+++ %s" APR_EOL_STR,
423                                      old_header,
424                                      new_header));
425  return SVN_NO_ERROR;
426}
427
428/* A helper function for display_prop_diffs.  Output the differences between
429   the mergeinfo stored in ORIG_MERGEINFO_VAL and NEW_MERGEINFO_VAL in a
430   human-readable form to OUTSTREAM, using ENCODING.  Use POOL for temporary
431   allocations. */
432static svn_error_t *
433display_mergeinfo_diff(const char *old_mergeinfo_val,
434                       const char *new_mergeinfo_val,
435                       const char *encoding,
436                       svn_stream_t *outstream,
437                       apr_pool_t *pool)
438{
439  apr_hash_t *old_mergeinfo_hash, *new_mergeinfo_hash, *added, *deleted;
440  apr_pool_t *iterpool = svn_pool_create(pool);
441  apr_hash_index_t *hi;
442
443  if (old_mergeinfo_val)
444    SVN_ERR(svn_mergeinfo_parse(&old_mergeinfo_hash, old_mergeinfo_val, pool));
445  else
446    old_mergeinfo_hash = NULL;
447
448  if (new_mergeinfo_val)
449    SVN_ERR(svn_mergeinfo_parse(&new_mergeinfo_hash, new_mergeinfo_val, pool));
450  else
451    new_mergeinfo_hash = NULL;
452
453  SVN_ERR(svn_mergeinfo_diff2(&deleted, &added, old_mergeinfo_hash,
454                              new_mergeinfo_hash,
455                              TRUE, pool, pool));
456
457  /* Print a hint for 'svn patch' or smilar tools, indicating the
458   * number of reverse-merges and forward-merges. */
459  SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, pool,
460                                      "## -0,%u +0,%u ##%s",
461                                      apr_hash_count(deleted),
462                                      apr_hash_count(added),
463                                      APR_EOL_STR));
464
465  for (hi = apr_hash_first(pool, deleted);
466       hi; hi = apr_hash_next(hi))
467    {
468      const char *from_path = apr_hash_this_key(hi);
469      svn_rangelist_t *merge_revarray = apr_hash_this_val(hi);
470      svn_string_t *merge_revstr;
471
472      svn_pool_clear(iterpool);
473      SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
474                                      iterpool));
475
476      SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
477                                          _("   Reverse-merged %s:r%s%s"),
478                                          from_path, merge_revstr->data,
479                                          APR_EOL_STR));
480    }
481
482  for (hi = apr_hash_first(pool, added);
483       hi; hi = apr_hash_next(hi))
484    {
485      const char *from_path = apr_hash_this_key(hi);
486      svn_rangelist_t *merge_revarray = apr_hash_this_val(hi);
487      svn_string_t *merge_revstr;
488
489      svn_pool_clear(iterpool);
490      SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
491                                      iterpool));
492
493      SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
494                                          _("   Merged %s:r%s%s"),
495                                          from_path, merge_revstr->data,
496                                          APR_EOL_STR));
497    }
498
499  svn_pool_destroy(iterpool);
500  return SVN_NO_ERROR;
501}
502
503/* svn_sort__array callback handling svn_prop_t by name */
504static int
505propchange_sort(const void *k1, const void *k2)
506{
507  const svn_prop_t *propchange1 = k1;
508  const svn_prop_t *propchange2 = k2;
509
510  return strcmp(propchange1->name, propchange2->name);
511}
512
513svn_error_t *
514svn_diff__display_prop_diffs(svn_stream_t *outstream,
515                             const char *encoding,
516                             const apr_array_header_t *propchanges,
517                             apr_hash_t *original_props,
518                             svn_boolean_t pretty_print_mergeinfo,
519                             int context_size,
520                             svn_cancel_func_t cancel_func,
521                             void *cancel_baton,
522                             apr_pool_t *scratch_pool)
523{
524  apr_pool_t *pool = scratch_pool;
525  apr_pool_t *iterpool = svn_pool_create(pool);
526  apr_array_header_t *changes = apr_array_copy(scratch_pool, propchanges);
527  int i;
528
529  svn_sort__array(changes, propchange_sort);
530
531  for (i = 0; i < changes->nelts; i++)
532    {
533      const char *action;
534      const svn_string_t *original_value;
535      const svn_prop_t *propchange
536        = &APR_ARRAY_IDX(changes, i, svn_prop_t);
537
538      if (original_props)
539        original_value = svn_hash_gets(original_props, propchange->name);
540      else
541        original_value = NULL;
542
543      /* If the property doesn't exist on either side, or if it exists
544         with the same value, skip it.  This can happen if the client is
545         hitting an old mod_dav_svn server that doesn't understand the
546         "send-all" REPORT style. */
547      if ((! (original_value || propchange->value))
548          || (original_value && propchange->value
549              && svn_string_compare(original_value, propchange->value)))
550        continue;
551
552      svn_pool_clear(iterpool);
553
554      if (! original_value)
555        action = "Added";
556      else if (! propchange->value)
557        action = "Deleted";
558      else
559        action = "Modified";
560      SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
561                                          "%s: %s%s", action,
562                                          propchange->name, APR_EOL_STR));
563
564      if (pretty_print_mergeinfo
565          && strcmp(propchange->name, SVN_PROP_MERGEINFO) == 0)
566        {
567          const char *orig = original_value ? original_value->data : NULL;
568          const char *val = propchange->value ? propchange->value->data : NULL;
569          svn_error_t *err = display_mergeinfo_diff(orig, val, encoding,
570                                                    outstream, iterpool);
571
572          /* Issue #3896: If we can't pretty-print mergeinfo differences
573             because invalid mergeinfo is present, then don't let the diff
574             fail, just print the diff as any other property. */
575          if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
576            {
577              svn_error_clear(err);
578            }
579          else
580            {
581              SVN_ERR(err);
582              continue;
583            }
584        }
585
586      {
587        svn_diff_t *diff;
588        svn_diff_file_options_t options = { 0 };
589        const svn_string_t *orig
590          = original_value ? original_value
591                           : svn_string_create_empty(iterpool);
592        const svn_string_t *val
593          = propchange->value ? propchange->value
594                              : svn_string_create_empty(iterpool);
595
596        SVN_ERR(svn_diff_mem_string_diff(&diff, orig, val, &options,
597                                         iterpool));
598
599        /* UNIX patch will try to apply a diff even if the diff header
600         * is missing. It tries to be helpful by asking the user for a
601         * target filename when it can't determine the target filename
602         * from the diff header. But there usually are no files which
603         * UNIX patch could apply the property diff to, so we use "##"
604         * instead of "@@" as the default hunk delimiter for property diffs.
605         * We also suppress the diff header. */
606        SVN_ERR(svn_diff_mem_string_output_unified3(
607                  outstream, diff, FALSE /* no header */, "##", NULL, NULL,
608                  encoding, orig, val, context_size,
609                  cancel_func, cancel_baton, iterpool));
610      }
611    }
612  svn_pool_destroy(iterpool);
613
614  return SVN_NO_ERROR;
615}
616
617
618/* Return the library version number. */
619const svn_version_t *
620svn_diff_version(void)
621{
622  SVN_VERSION_BODY;
623}
624