diff3.c revision 251881
1/*
2 * diff3.c :  routines for doing diffs
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <apr.h>
26#include <apr_pools.h>
27#include <apr_general.h>
28
29#include "svn_pools.h"
30#include "svn_error.h"
31#include "svn_diff.h"
32#include "svn_types.h"
33
34#include "diff.h"
35
36
37void
38svn_diff__resolve_conflict(svn_diff_t *hunk,
39                           svn_diff__position_t **position_list1,
40                           svn_diff__position_t **position_list2,
41                           svn_diff__token_index_t num_tokens,
42                           apr_pool_t *pool)
43{
44  apr_off_t modified_start = hunk->modified_start + 1;
45  apr_off_t latest_start = hunk->latest_start + 1;
46  apr_off_t common_length;
47  apr_off_t modified_length = hunk->modified_length;
48  apr_off_t latest_length = hunk->latest_length;
49  svn_diff__position_t *start_position[2];
50  svn_diff__position_t *position[2];
51  svn_diff__token_index_t *token_counts[2];
52  svn_diff__lcs_t *lcs = NULL;
53  svn_diff__lcs_t **lcs_ref = &lcs;
54  svn_diff_t **diff_ref = &hunk->resolved_diff;
55  apr_pool_t *subpool;
56
57  /* First find the starting positions for the
58   * comparison
59   */
60
61  start_position[0] = *position_list1;
62  start_position[1] = *position_list2;
63
64  while (start_position[0]->offset < modified_start)
65    start_position[0] = start_position[0]->next;
66
67  while (start_position[1]->offset < latest_start)
68    start_position[1] = start_position[1]->next;
69
70  position[0] = start_position[0];
71  position[1] = start_position[1];
72
73  common_length = modified_length < latest_length
74                ? modified_length : latest_length;
75
76  while (common_length > 0
77         && position[0]->token_index == position[1]->token_index)
78    {
79      position[0] = position[0]->next;
80      position[1] = position[1]->next;
81
82      common_length--;
83    }
84
85  if (common_length == 0
86      && modified_length == latest_length)
87    {
88      hunk->type = svn_diff__type_diff_common;
89      hunk->resolved_diff = NULL;
90
91      *position_list1 = position[0];
92      *position_list2 = position[1];
93
94      return;
95    }
96
97  hunk->type = svn_diff__type_conflict;
98
99  /* ### If we have a conflict we can try to find the
100   * ### common parts in it by getting an lcs between
101   * ### modified (start to start + length) and
102   * ### latest (start to start + length).
103   * ### We use this lcs to create a simple diff.  Only
104   * ### where there is a diff between the two, we have
105   * ### a conflict.
106   * ### This raises a problem; several common diffs and
107   * ### conflicts can occur within the same original
108   * ### block.  This needs some thought.
109   * ###
110   * ### NB: We can use the node _pointers_ to identify
111   * ###     different tokens
112   */
113
114  subpool = svn_pool_create(pool);
115
116  /* Calculate how much of the two sequences was
117   * actually the same.
118   */
119  common_length = (modified_length < latest_length
120                  ? modified_length : latest_length)
121                - common_length;
122
123  /* If there were matching symbols at the start of
124   * both sequences, record that fact.
125   */
126  if (common_length > 0)
127    {
128      lcs = apr_palloc(subpool, sizeof(*lcs));
129      lcs->next = NULL;
130      lcs->position[0] = start_position[0];
131      lcs->position[1] = start_position[1];
132      lcs->length = common_length;
133
134      lcs_ref = &lcs->next;
135    }
136
137  modified_length -= common_length;
138  latest_length -= common_length;
139
140  modified_start = start_position[0]->offset;
141  latest_start = start_position[1]->offset;
142
143  start_position[0] = position[0];
144  start_position[1] = position[1];
145
146  /* Create a new ring for svn_diff__lcs to grok.
147   * We can safely do this given we don't need the
148   * positions we processed anymore.
149   */
150  if (modified_length == 0)
151    {
152      *position_list1 = position[0];
153      position[0] = NULL;
154    }
155  else
156    {
157      while (--modified_length)
158        position[0] = position[0]->next;
159
160      *position_list1 = position[0]->next;
161      position[0]->next = start_position[0];
162    }
163
164  if (latest_length == 0)
165    {
166      *position_list2 = position[1];
167      position[1] = NULL;
168    }
169  else
170    {
171      while (--latest_length)
172        position[1] = position[1]->next;
173
174      *position_list2 = position[1]->next;
175      position[1]->next = start_position[1];
176    }
177
178  token_counts[0] = svn_diff__get_token_counts(position[0], num_tokens,
179                                               subpool);
180  token_counts[1] = svn_diff__get_token_counts(position[1], num_tokens,
181                                               subpool);
182
183  *lcs_ref = svn_diff__lcs(position[0], position[1], token_counts[0],
184                           token_counts[1], num_tokens, 0, 0, subpool);
185
186  /* Fix up the EOF lcs element in case one of
187   * the two sequences was NULL.
188   */
189  if ((*lcs_ref)->position[0]->offset == 1)
190    (*lcs_ref)->position[0] = *position_list1;
191
192  if ((*lcs_ref)->position[1]->offset == 1)
193    (*lcs_ref)->position[1] = *position_list2;
194
195  /* Produce the resolved diff */
196  while (1)
197    {
198      if (modified_start < lcs->position[0]->offset
199          || latest_start < lcs->position[1]->offset)
200        {
201          (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
202
203          (*diff_ref)->type = svn_diff__type_conflict;
204          (*diff_ref)->original_start = hunk->original_start;
205          (*diff_ref)->original_length = hunk->original_length;
206          (*diff_ref)->modified_start = modified_start - 1;
207          (*diff_ref)->modified_length = lcs->position[0]->offset
208                                         - modified_start;
209          (*diff_ref)->latest_start = latest_start - 1;
210          (*diff_ref)->latest_length = lcs->position[1]->offset
211                                       - latest_start;
212          (*diff_ref)->resolved_diff = NULL;
213
214          diff_ref = &(*diff_ref)->next;
215        }
216
217      /* Detect the EOF */
218      if (lcs->length == 0)
219        break;
220
221      modified_start = lcs->position[0]->offset;
222      latest_start = lcs->position[1]->offset;
223
224      (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
225
226      (*diff_ref)->type = svn_diff__type_diff_common;
227      (*diff_ref)->original_start = hunk->original_start;
228      (*diff_ref)->original_length = hunk->original_length;
229      (*diff_ref)->modified_start = modified_start - 1;
230      (*diff_ref)->modified_length = lcs->length;
231      (*diff_ref)->latest_start = latest_start - 1;
232      (*diff_ref)->latest_length = lcs->length;
233      (*diff_ref)->resolved_diff = NULL;
234
235      diff_ref = &(*diff_ref)->next;
236
237      modified_start += lcs->length;
238      latest_start += lcs->length;
239
240      lcs = lcs->next;
241    }
242
243  *diff_ref = NULL;
244
245  svn_pool_destroy(subpool);
246}
247
248
249svn_error_t *
250svn_diff_diff3_2(svn_diff_t **diff,
251                 void *diff_baton,
252                 const svn_diff_fns2_t *vtable,
253                 apr_pool_t *pool)
254{
255  svn_diff__tree_t *tree;
256  svn_diff__position_t *position_list[3];
257  svn_diff__token_index_t num_tokens;
258  svn_diff__token_index_t *token_counts[3];
259  svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
260                                        svn_diff_datasource_modified,
261                                        svn_diff_datasource_latest};
262  svn_diff__lcs_t *lcs_om;
263  svn_diff__lcs_t *lcs_ol;
264  apr_pool_t *subpool;
265  apr_pool_t *treepool;
266  apr_off_t prefix_lines = 0;
267  apr_off_t suffix_lines = 0;
268
269  *diff = NULL;
270
271  subpool = svn_pool_create(pool);
272  treepool = svn_pool_create(pool);
273
274  svn_diff__tree_create(&tree, treepool);
275
276  SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines,
277                                   datasource, 3));
278
279  SVN_ERR(svn_diff__get_tokens(&position_list[0],
280                               tree,
281                               diff_baton, vtable,
282                               svn_diff_datasource_original,
283                               prefix_lines,
284                               subpool));
285
286  SVN_ERR(svn_diff__get_tokens(&position_list[1],
287                               tree,
288                               diff_baton, vtable,
289                               svn_diff_datasource_modified,
290                               prefix_lines,
291                               subpool));
292
293  SVN_ERR(svn_diff__get_tokens(&position_list[2],
294                               tree,
295                               diff_baton, vtable,
296                               svn_diff_datasource_latest,
297                               prefix_lines,
298                               subpool));
299
300  num_tokens = svn_diff__get_node_count(tree);
301
302  /* Get rid of the tokens, we don't need them to calc the diff */
303  if (vtable->token_discard_all != NULL)
304    vtable->token_discard_all(diff_baton);
305
306  /* We don't need the nodes in the tree either anymore, nor the tree itself */
307  svn_pool_destroy(treepool);
308
309  token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens,
310                                               subpool);
311  token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens,
312                                               subpool);
313  token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens,
314                                               subpool);
315
316  /* Get the lcs for original-modified and original-latest */
317  lcs_om = svn_diff__lcs(position_list[0], position_list[1], token_counts[0],
318                         token_counts[1], num_tokens, prefix_lines,
319                         suffix_lines, subpool);
320  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], token_counts[0],
321                         token_counts[2], num_tokens, prefix_lines,
322                         suffix_lines, subpool);
323
324  /* Produce a merged diff */
325  {
326    svn_diff_t **diff_ref = diff;
327
328    apr_off_t original_start = 1;
329    apr_off_t modified_start = 1;
330    apr_off_t latest_start = 1;
331    apr_off_t original_sync;
332    apr_off_t modified_sync;
333    apr_off_t latest_sync;
334    apr_off_t common_length;
335    apr_off_t modified_length;
336    apr_off_t latest_length;
337    svn_boolean_t is_modified;
338    svn_boolean_t is_latest;
339    svn_diff__position_t sentinel_position[2];
340
341    /* Point the position lists to the start of the list
342     * so that common_diff/conflict detection actually is
343     * able to work.
344     */
345    if (position_list[1])
346      {
347        sentinel_position[0].next = position_list[1]->next;
348        sentinel_position[0].offset = position_list[1]->offset + 1;
349        position_list[1]->next = &sentinel_position[0];
350        position_list[1] = sentinel_position[0].next;
351      }
352    else
353      {
354        sentinel_position[0].offset = prefix_lines + 1;
355        sentinel_position[0].next = NULL;
356        position_list[1] = &sentinel_position[0];
357      }
358
359    if (position_list[2])
360      {
361        sentinel_position[1].next = position_list[2]->next;
362        sentinel_position[1].offset = position_list[2]->offset + 1;
363        position_list[2]->next = &sentinel_position[1];
364        position_list[2] = sentinel_position[1].next;
365      }
366    else
367      {
368        sentinel_position[1].offset = prefix_lines + 1;
369        sentinel_position[1].next = NULL;
370        position_list[2] = &sentinel_position[1];
371      }
372
373    while (1)
374      {
375        /* Find the sync points */
376        while (1)
377          {
378            if (lcs_om->position[0]->offset > lcs_ol->position[0]->offset)
379              {
380                original_sync = lcs_om->position[0]->offset;
381
382                while (lcs_ol->position[0]->offset + lcs_ol->length
383                       < original_sync)
384                  lcs_ol = lcs_ol->next;
385
386                /* If the sync point is the EOF, and our current lcs segment
387                 * doesn't reach as far as EOF, we need to skip this segment.
388                 */
389                if (lcs_om->length == 0 && lcs_ol->length > 0
390                    && lcs_ol->position[0]->offset + lcs_ol->length
391                       == original_sync
392                    && lcs_ol->position[1]->offset + lcs_ol->length
393                       != lcs_ol->next->position[1]->offset)
394                  lcs_ol = lcs_ol->next;
395
396                if (lcs_ol->position[0]->offset <= original_sync)
397                    break;
398              }
399            else
400              {
401                original_sync = lcs_ol->position[0]->offset;
402
403                while (lcs_om->position[0]->offset + lcs_om->length
404                       < original_sync)
405                  lcs_om = lcs_om->next;
406
407                /* If the sync point is the EOF, and our current lcs segment
408                 * doesn't reach as far as EOF, we need to skip this segment.
409                 */
410                if (lcs_ol->length == 0 && lcs_om->length > 0
411                    && lcs_om->position[0]->offset + lcs_om->length
412                       == original_sync
413                    && lcs_om->position[1]->offset + lcs_om->length
414                       != lcs_om->next->position[1]->offset)
415                  lcs_om = lcs_om->next;
416
417                if (lcs_om->position[0]->offset <= original_sync)
418                    break;
419              }
420          }
421
422        modified_sync = lcs_om->position[1]->offset
423                      + (original_sync - lcs_om->position[0]->offset);
424        latest_sync = lcs_ol->position[1]->offset
425                    + (original_sync - lcs_ol->position[0]->offset);
426
427        /* Determine what is modified, if anything */
428        is_modified = lcs_om->position[0]->offset - original_start > 0
429                      || lcs_om->position[1]->offset - modified_start > 0;
430
431        is_latest = lcs_ol->position[0]->offset - original_start > 0
432                    || lcs_ol->position[1]->offset - latest_start > 0;
433
434        if (is_modified || is_latest)
435          {
436            modified_length = modified_sync - modified_start;
437            latest_length = latest_sync - latest_start;
438
439            (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
440
441            (*diff_ref)->original_start = original_start - 1;
442            (*diff_ref)->original_length = original_sync - original_start;
443            (*diff_ref)->modified_start = modified_start - 1;
444            (*diff_ref)->modified_length = modified_length;
445            (*diff_ref)->latest_start = latest_start - 1;
446            (*diff_ref)->latest_length = latest_length;
447            (*diff_ref)->resolved_diff = NULL;
448
449            if (is_modified && is_latest)
450              {
451                svn_diff__resolve_conflict(*diff_ref,
452                                           &position_list[1],
453                                           &position_list[2],
454                                           num_tokens,
455                                           pool);
456              }
457            else if (is_modified)
458              {
459                (*diff_ref)->type = svn_diff__type_diff_modified;
460              }
461            else
462              {
463                (*diff_ref)->type = svn_diff__type_diff_latest;
464              }
465
466            diff_ref = &(*diff_ref)->next;
467          }
468
469        /* Detect EOF */
470        if (lcs_om->length == 0 || lcs_ol->length == 0)
471            break;
472
473        modified_length = lcs_om->length
474                          - (original_sync - lcs_om->position[0]->offset);
475        latest_length = lcs_ol->length
476                        - (original_sync - lcs_ol->position[0]->offset);
477        common_length = modified_length < latest_length
478                        ? modified_length : latest_length;
479
480        (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
481
482        (*diff_ref)->type = svn_diff__type_common;
483        (*diff_ref)->original_start = original_sync - 1;
484        (*diff_ref)->original_length = common_length;
485        (*diff_ref)->modified_start = modified_sync - 1;
486        (*diff_ref)->modified_length = common_length;
487        (*diff_ref)->latest_start = latest_sync - 1;
488        (*diff_ref)->latest_length = common_length;
489        (*diff_ref)->resolved_diff = NULL;
490
491        diff_ref = &(*diff_ref)->next;
492
493        /* Set the new offsets */
494        original_start = original_sync + common_length;
495        modified_start = modified_sync + common_length;
496        latest_start = latest_sync + common_length;
497
498        /* Make it easier for diff_common/conflict detection
499           by recording last lcs start positions
500         */
501        if (position_list[1]->offset < lcs_om->position[1]->offset)
502          position_list[1] = lcs_om->position[1];
503
504        if (position_list[2]->offset < lcs_ol->position[1]->offset)
505          position_list[2] = lcs_ol->position[1];
506
507        /* Make sure we are pointing to lcs entries beyond
508         * the range we just processed
509         */
510        while (original_start >= lcs_om->position[0]->offset + lcs_om->length
511               && lcs_om->length > 0)
512          {
513            lcs_om = lcs_om->next;
514          }
515
516        while (original_start >= lcs_ol->position[0]->offset + lcs_ol->length
517               && lcs_ol->length > 0)
518          {
519            lcs_ol = lcs_ol->next;
520          }
521      }
522
523    *diff_ref = NULL;
524  }
525
526  svn_pool_destroy(subpool);
527
528  return SVN_NO_ERROR;
529}
530