1/*
2 * diff3.c :  routines for doing diffs
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <apr.h>
26#include <apr_pools.h>
27#include <apr_general.h>
28
29#include "svn_pools.h"
30#include "svn_error.h"
31#include "svn_diff.h"
32#include "svn_sorts.h"
33#include "svn_types.h"
34
35#include "diff.h"
36
37
38void
39svn_diff__resolve_conflict(svn_diff_t *hunk,
40                           svn_diff__position_t **position_list1,
41                           svn_diff__position_t **position_list2,
42                           svn_diff__token_index_t num_tokens,
43                           apr_pool_t *pool)
44{
45  apr_off_t modified_start = hunk->modified_start + 1;
46  apr_off_t latest_start = hunk->latest_start + 1;
47  apr_off_t common_length;
48  apr_off_t modified_length = hunk->modified_length;
49  apr_off_t latest_length = hunk->latest_length;
50  svn_diff__position_t *start_position[2];
51  svn_diff__position_t *position[2];
52  svn_diff__token_index_t *token_counts[2];
53  svn_diff__lcs_t *lcs = NULL;
54  svn_diff__lcs_t **lcs_ref = &lcs;
55  svn_diff_t **diff_ref = &hunk->resolved_diff;
56  apr_pool_t *subpool;
57
58  /* First find the starting positions for the
59   * comparison
60   */
61
62  start_position[0] = *position_list1;
63  start_position[1] = *position_list2;
64
65  while (start_position[0]->offset < modified_start)
66    start_position[0] = start_position[0]->next;
67
68  while (start_position[1]->offset < latest_start)
69    start_position[1] = start_position[1]->next;
70
71  position[0] = start_position[0];
72  position[1] = start_position[1];
73
74  common_length = modified_length < latest_length
75                ? modified_length : latest_length;
76
77  while (common_length > 0
78         && position[0]->token_index == position[1]->token_index)
79    {
80      position[0] = position[0]->next;
81      position[1] = position[1]->next;
82
83      common_length--;
84    }
85
86  if (common_length == 0
87      && modified_length == latest_length)
88    {
89      hunk->type = svn_diff__type_diff_common;
90      hunk->resolved_diff = NULL;
91
92      *position_list1 = position[0];
93      *position_list2 = position[1];
94
95      return;
96    }
97
98  hunk->type = svn_diff__type_conflict;
99
100  /* ### If we have a conflict we can try to find the
101   * ### common parts in it by getting an lcs between
102   * ### modified (start to start + length) and
103   * ### latest (start to start + length).
104   * ### We use this lcs to create a simple diff.  Only
105   * ### where there is a diff between the two, we have
106   * ### a conflict.
107   * ### This raises a problem; several common diffs and
108   * ### conflicts can occur within the same original
109   * ### block.  This needs some thought.
110   * ###
111   * ### NB: We can use the node _pointers_ to identify
112   * ###     different tokens
113   */
114
115  subpool = svn_pool_create(pool);
116
117  /* Calculate how much of the two sequences was
118   * actually the same.
119   */
120  common_length = (modified_length < latest_length
121                  ? modified_length : latest_length)
122                - common_length;
123
124  /* If there were matching symbols at the start of
125   * both sequences, record that fact.
126   */
127  if (common_length > 0)
128    {
129      lcs = apr_palloc(subpool, sizeof(*lcs));
130      lcs->next = NULL;
131      lcs->position[0] = start_position[0];
132      lcs->position[1] = start_position[1];
133      lcs->length = common_length;
134
135      lcs_ref = &lcs->next;
136    }
137
138  modified_length -= common_length;
139  latest_length -= common_length;
140
141  modified_start = start_position[0]->offset;
142  latest_start = start_position[1]->offset;
143
144  start_position[0] = position[0];
145  start_position[1] = position[1];
146
147  /* Create a new ring for svn_diff__lcs to grok.
148   * We can safely do this given we don't need the
149   * positions we processed anymore.
150   */
151  if (modified_length == 0)
152    {
153      *position_list1 = position[0];
154      position[0] = NULL;
155    }
156  else
157    {
158      while (--modified_length)
159        position[0] = position[0]->next;
160
161      *position_list1 = position[0]->next;
162      position[0]->next = start_position[0];
163    }
164
165  if (latest_length == 0)
166    {
167      *position_list2 = position[1];
168      position[1] = NULL;
169    }
170  else
171    {
172      while (--latest_length)
173        position[1] = position[1]->next;
174
175      *position_list2 = position[1]->next;
176      position[1]->next = start_position[1];
177    }
178
179  token_counts[0] = svn_diff__get_token_counts(position[0], num_tokens,
180                                               subpool);
181  token_counts[1] = svn_diff__get_token_counts(position[1], num_tokens,
182                                               subpool);
183
184  *lcs_ref = svn_diff__lcs(position[0], position[1], token_counts[0],
185                           token_counts[1], num_tokens, 0, 0, subpool);
186
187  /* Fix up the EOF lcs element in case one of
188   * the two sequences was NULL.
189   */
190  if ((*lcs_ref)->position[0]->offset == 1)
191    (*lcs_ref)->position[0] = *position_list1;
192
193  if ((*lcs_ref)->position[1]->offset == 1)
194    (*lcs_ref)->position[1] = *position_list2;
195
196  /* Produce the resolved diff */
197  while (1)
198    {
199      if (modified_start < lcs->position[0]->offset
200          || latest_start < lcs->position[1]->offset)
201        {
202          (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
203
204          (*diff_ref)->type = svn_diff__type_conflict;
205          (*diff_ref)->original_start = hunk->original_start;
206          (*diff_ref)->original_length = hunk->original_length;
207          (*diff_ref)->modified_start = modified_start - 1;
208          (*diff_ref)->modified_length = lcs->position[0]->offset
209                                         - modified_start;
210          (*diff_ref)->latest_start = latest_start - 1;
211          (*diff_ref)->latest_length = lcs->position[1]->offset
212                                       - latest_start;
213          (*diff_ref)->resolved_diff = NULL;
214
215          diff_ref = &(*diff_ref)->next;
216        }
217
218      /* Detect the EOF */
219      if (lcs->length == 0)
220        break;
221
222      modified_start = lcs->position[0]->offset;
223      latest_start = lcs->position[1]->offset;
224
225      (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
226
227      (*diff_ref)->type = svn_diff__type_diff_common;
228      (*diff_ref)->original_start = hunk->original_start;
229      (*diff_ref)->original_length = hunk->original_length;
230      (*diff_ref)->modified_start = modified_start - 1;
231      (*diff_ref)->modified_length = lcs->length;
232      (*diff_ref)->latest_start = latest_start - 1;
233      (*diff_ref)->latest_length = lcs->length;
234      (*diff_ref)->resolved_diff = NULL;
235
236      diff_ref = &(*diff_ref)->next;
237
238      modified_start += lcs->length;
239      latest_start += lcs->length;
240
241      lcs = lcs->next;
242    }
243
244  *diff_ref = NULL;
245
246  svn_pool_destroy(subpool);
247}
248
249
250svn_error_t *
251svn_diff_diff3_2(svn_diff_t **diff,
252                 void *diff_baton,
253                 const svn_diff_fns2_t *vtable,
254                 apr_pool_t *pool)
255{
256  svn_diff__tree_t *tree;
257  svn_diff__position_t *position_list[3];
258  svn_diff__token_index_t num_tokens;
259  svn_diff__token_index_t *token_counts[3];
260  svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
261                                        svn_diff_datasource_modified,
262                                        svn_diff_datasource_latest};
263  svn_diff__lcs_t *lcs_om;
264  svn_diff__lcs_t *lcs_ol;
265  apr_pool_t *subpool;
266  apr_pool_t *treepool;
267  apr_off_t prefix_lines = 0;
268  apr_off_t suffix_lines = 0;
269
270  *diff = NULL;
271
272  subpool = svn_pool_create(pool);
273  treepool = svn_pool_create(pool);
274
275  svn_diff__tree_create(&tree, treepool);
276
277  SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines,
278                                   datasource, 3));
279
280  SVN_ERR(svn_diff__get_tokens(&position_list[0],
281                               tree,
282                               diff_baton, vtable,
283                               svn_diff_datasource_original,
284                               prefix_lines,
285                               subpool));
286
287  SVN_ERR(svn_diff__get_tokens(&position_list[1],
288                               tree,
289                               diff_baton, vtable,
290                               svn_diff_datasource_modified,
291                               prefix_lines,
292                               subpool));
293
294  SVN_ERR(svn_diff__get_tokens(&position_list[2],
295                               tree,
296                               diff_baton, vtable,
297                               svn_diff_datasource_latest,
298                               prefix_lines,
299                               subpool));
300
301  num_tokens = svn_diff__get_node_count(tree);
302
303  /* Get rid of the tokens, we don't need them to calc the diff */
304  if (vtable->token_discard_all != NULL)
305    vtable->token_discard_all(diff_baton);
306
307  /* We don't need the nodes in the tree either anymore, nor the tree itself */
308  svn_pool_destroy(treepool);
309
310  token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens,
311                                               subpool);
312  token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens,
313                                               subpool);
314  token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens,
315                                               subpool);
316
317  /* Get the lcs for original-modified and original-latest */
318  lcs_om = svn_diff__lcs(position_list[0], position_list[1], token_counts[0],
319                         token_counts[1], num_tokens, prefix_lines,
320                         suffix_lines, subpool);
321  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], token_counts[0],
322                         token_counts[2], num_tokens, prefix_lines,
323                         suffix_lines, subpool);
324
325  /* Produce a merged diff */
326  {
327    svn_diff_t **diff_ref = diff;
328
329    apr_off_t original_start = 1;
330    apr_off_t modified_start = 1;
331    apr_off_t latest_start = 1;
332    apr_off_t original_sync;
333    apr_off_t modified_sync;
334    apr_off_t latest_sync;
335    apr_off_t common_length;
336    apr_off_t modified_length;
337    apr_off_t latest_length;
338    svn_boolean_t is_modified;
339    svn_boolean_t is_latest;
340    svn_diff__position_t sentinel_position[2];
341
342    /* Point the position lists to the start of the list
343     * so that common_diff/conflict detection actually is
344     * able to work.
345     */
346    if (position_list[1])
347      {
348        sentinel_position[0].next = position_list[1]->next;
349        sentinel_position[0].offset = position_list[1]->offset + 1;
350        position_list[1]->next = &sentinel_position[0];
351        position_list[1] = sentinel_position[0].next;
352      }
353    else
354      {
355        sentinel_position[0].offset = prefix_lines + 1;
356        sentinel_position[0].next = NULL;
357        position_list[1] = &sentinel_position[0];
358      }
359
360    if (position_list[2])
361      {
362        sentinel_position[1].next = position_list[2]->next;
363        sentinel_position[1].offset = position_list[2]->offset + 1;
364        position_list[2]->next = &sentinel_position[1];
365        position_list[2] = sentinel_position[1].next;
366      }
367    else
368      {
369        sentinel_position[1].offset = prefix_lines + 1;
370        sentinel_position[1].next = NULL;
371        position_list[2] = &sentinel_position[1];
372      }
373
374    while (1)
375      {
376        /* Find the sync points */
377        while (1)
378          {
379            if (lcs_om->position[0]->offset > lcs_ol->position[0]->offset)
380              {
381                original_sync = lcs_om->position[0]->offset;
382
383                while (lcs_ol->position[0]->offset + lcs_ol->length
384                       < original_sync)
385                  lcs_ol = lcs_ol->next;
386
387                /* If the sync point is the EOF, and our current lcs segment
388                 * doesn't reach as far as EOF, we need to skip this segment.
389                 */
390                if (lcs_om->length == 0 && lcs_ol->length > 0
391                    && lcs_ol->position[0]->offset + lcs_ol->length
392                       == original_sync
393                    && lcs_ol->position[1]->offset + lcs_ol->length
394                       != lcs_ol->next->position[1]->offset)
395                  lcs_ol = lcs_ol->next;
396
397                if (lcs_ol->position[0]->offset <= original_sync)
398                    break;
399              }
400            else
401              {
402                original_sync = lcs_ol->position[0]->offset;
403
404                while (lcs_om->position[0]->offset + lcs_om->length
405                       < original_sync)
406                  lcs_om = lcs_om->next;
407
408                /* If the sync point is the EOF, and our current lcs segment
409                 * doesn't reach as far as EOF, we need to skip this segment.
410                 */
411                if (lcs_ol->length == 0 && lcs_om->length > 0
412                    && lcs_om->position[0]->offset + lcs_om->length
413                       == original_sync
414                    && lcs_om->position[1]->offset + lcs_om->length
415                       != lcs_om->next->position[1]->offset)
416                  lcs_om = lcs_om->next;
417
418                if (lcs_om->position[0]->offset <= original_sync)
419                    break;
420              }
421          }
422
423        modified_sync = lcs_om->position[1]->offset
424                      + (original_sync - lcs_om->position[0]->offset);
425        latest_sync = lcs_ol->position[1]->offset
426                    + (original_sync - lcs_ol->position[0]->offset);
427
428        /* Determine what is modified, if anything */
429        is_modified = lcs_om->position[0]->offset - original_start > 0
430                      || lcs_om->position[1]->offset - modified_start > 0;
431
432        is_latest = lcs_ol->position[0]->offset - original_start > 0
433                    || lcs_ol->position[1]->offset - latest_start > 0;
434
435        if (is_modified || is_latest)
436          {
437            modified_length = modified_sync - modified_start;
438            latest_length = latest_sync - latest_start;
439
440            (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
441
442            (*diff_ref)->original_start = original_start - 1;
443            (*diff_ref)->original_length = original_sync - original_start;
444            (*diff_ref)->modified_start = modified_start - 1;
445            (*diff_ref)->modified_length = modified_length;
446            (*diff_ref)->latest_start = latest_start - 1;
447            (*diff_ref)->latest_length = latest_length;
448            (*diff_ref)->resolved_diff = NULL;
449
450            if (is_modified && is_latest)
451              {
452                svn_diff__resolve_conflict(*diff_ref,
453                                           &position_list[1],
454                                           &position_list[2],
455                                           num_tokens,
456                                           pool);
457              }
458            else if (is_modified)
459              {
460                (*diff_ref)->type = svn_diff__type_diff_modified;
461              }
462            else
463              {
464                (*diff_ref)->type = svn_diff__type_diff_latest;
465              }
466
467            diff_ref = &(*diff_ref)->next;
468          }
469
470        /* Detect EOF */
471        if (lcs_om->length == 0 || lcs_ol->length == 0)
472            break;
473
474        modified_length = lcs_om->length
475                          - (original_sync - lcs_om->position[0]->offset);
476        latest_length = lcs_ol->length
477                        - (original_sync - lcs_ol->position[0]->offset);
478        common_length = MIN(modified_length, latest_length);
479
480        if (common_length > 0)
481          {
482            (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
483
484            (*diff_ref)->type = svn_diff__type_common;
485            (*diff_ref)->original_start = original_sync - 1;
486            (*diff_ref)->original_length = common_length;
487            (*diff_ref)->modified_start = modified_sync - 1;
488            (*diff_ref)->modified_length = common_length;
489            (*diff_ref)->latest_start = latest_sync - 1;
490            (*diff_ref)->latest_length = common_length;
491            (*diff_ref)->resolved_diff = NULL;
492
493            diff_ref = &(*diff_ref)->next;
494          }
495
496        /* Set the new offsets */
497        original_start = original_sync + common_length;
498        modified_start = modified_sync + common_length;
499        latest_start = latest_sync + common_length;
500
501        /* Make it easier for diff_common/conflict detection
502           by recording last lcs start positions
503         */
504        if (position_list[1]->offset < lcs_om->position[1]->offset)
505          position_list[1] = lcs_om->position[1];
506
507        if (position_list[2]->offset < lcs_ol->position[1]->offset)
508          position_list[2] = lcs_ol->position[1];
509
510        /* Make sure we are pointing to lcs entries beyond
511         * the range we just processed
512         */
513        while (original_start >= lcs_om->position[0]->offset + lcs_om->length
514               && lcs_om->length > 0)
515          {
516            lcs_om = lcs_om->next;
517          }
518
519        while (original_start >= lcs_ol->position[0]->offset + lcs_ol->length
520               && lcs_ol->length > 0)
521          {
522            lcs_ol = lcs_ol->next;
523          }
524      }
525
526    *diff_ref = NULL;
527  }
528
529  svn_pool_destroy(subpool);
530
531  return SVN_NO_ERROR;
532}
533