diff3.c revision 251886
133965Sjdp/*
233965Sjdp * diff3.c :  routines for doing diffs
333965Sjdp *
433965Sjdp * ====================================================================
533965Sjdp *    Licensed to the Apache Software Foundation (ASF) under one
633965Sjdp *    or more contributor license agreements.  See the NOTICE file
733965Sjdp *    distributed with this work for additional information
833965Sjdp *    regarding copyright ownership.  The ASF licenses this file
933965Sjdp *    to you under the Apache License, Version 2.0 (the
1033965Sjdp *    "License"); you may not use this file except in compliance
1133965Sjdp *    with the License.  You may obtain a copy of the License at
1233965Sjdp *
1333965Sjdp *      http://www.apache.org/licenses/LICENSE-2.0
1433965Sjdp *
1533965Sjdp *    Unless required by applicable law or agreed to in writing,
1633965Sjdp *    software distributed under the License is distributed on an
17218822Sdim *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18218822Sdim *    KIND, either express or implied.  See the License for the
1933965Sjdp *    specific language governing permissions and limitations
20218822Sdim *    under the License.
2133965Sjdp * ====================================================================
2277298Sobrien */
2333965Sjdp
2433965Sjdp
2533965Sjdp#include <apr.h>
2633965Sjdp#include <apr_pools.h>
2733965Sjdp#include <apr_general.h>
2833965Sjdp
2933965Sjdp#include "svn_pools.h"
3033965Sjdp#include "svn_error.h"
3133965Sjdp#include "svn_diff.h"
3233965Sjdp#include "svn_types.h"
3333965Sjdp
3433965Sjdp#include "diff.h"
3533965Sjdp
3677298Sobrien
3777298Sobrienvoid
3877298Sobriensvn_diff__resolve_conflict(svn_diff_t *hunk,
3933965Sjdp                           svn_diff__position_t **position_list1,
40218822Sdim                           svn_diff__position_t **position_list2,
41218822Sdim                           svn_diff__token_index_t num_tokens,
4233965Sjdp                           apr_pool_t *pool)
4333965Sjdp{
4477298Sobrien  apr_off_t modified_start = hunk->modified_start + 1;
4577298Sobrien  apr_off_t latest_start = hunk->latest_start + 1;
4633965Sjdp  apr_off_t common_length;
4733965Sjdp  apr_off_t modified_length = hunk->modified_length;
4833965Sjdp  apr_off_t latest_length = hunk->latest_length;
4933965Sjdp  svn_diff__position_t *start_position[2];
5033965Sjdp  svn_diff__position_t *position[2];
5133965Sjdp  svn_diff__token_index_t *token_counts[2];
5233965Sjdp  svn_diff__lcs_t *lcs = NULL;
5333965Sjdp  svn_diff__lcs_t **lcs_ref = &lcs;
5433965Sjdp  svn_diff_t **diff_ref = &hunk->resolved_diff;
5533965Sjdp  apr_pool_t *subpool;
5633965Sjdp
5733965Sjdp  /* First find the starting positions for the
5833965Sjdp   * comparison
5933965Sjdp   */
6033965Sjdp
6133965Sjdp  start_position[0] = *position_list1;
6233965Sjdp  start_position[1] = *position_list2;
6333965Sjdp
6433965Sjdp  while (start_position[0]->offset < modified_start)
6533965Sjdp    start_position[0] = start_position[0]->next;
6633965Sjdp
6733965Sjdp  while (start_position[1]->offset < latest_start)
6833965Sjdp    start_position[1] = start_position[1]->next;
6933965Sjdp
7033965Sjdp  position[0] = start_position[0];
7133965Sjdp  position[1] = start_position[1];
7233965Sjdp
7333965Sjdp  common_length = modified_length < latest_length
7433965Sjdp                ? modified_length : latest_length;
7533965Sjdp
7633965Sjdp  while (common_length > 0
7733965Sjdp         && position[0]->token_index == position[1]->token_index)
7833965Sjdp    {
7933965Sjdp      position[0] = position[0]->next;
8033965Sjdp      position[1] = position[1]->next;
8133965Sjdp
8233965Sjdp      common_length--;
8333965Sjdp    }
8433965Sjdp
8533965Sjdp  if (common_length == 0
86218822Sdim      && modified_length == latest_length)
8733965Sjdp    {
8833965Sjdp      hunk->type = svn_diff__type_diff_common;
8933965Sjdp      hunk->resolved_diff = NULL;
9033965Sjdp
9133965Sjdp      *position_list1 = position[0];
9233965Sjdp      *position_list2 = position[1];
9333965Sjdp
9433965Sjdp      return;
9533965Sjdp    }
9633965Sjdp
9733965Sjdp  hunk->type = svn_diff__type_conflict;
9833965Sjdp
9933965Sjdp  /* ### If we have a conflict we can try to find the
10033965Sjdp   * ### common parts in it by getting an lcs between
10133965Sjdp   * ### modified (start to start + length) and
10233965Sjdp   * ### latest (start to start + length).
10333965Sjdp   * ### We use this lcs to create a simple diff.  Only
10433965Sjdp   * ### where there is a diff between the two, we have
10533965Sjdp   * ### a conflict.
10633965Sjdp   * ### This raises a problem; several common diffs and
10733965Sjdp   * ### conflicts can occur within the same original
10833965Sjdp   * ### block.  This needs some thought.
10933965Sjdp   * ###
11033965Sjdp   * ### NB: We can use the node _pointers_ to identify
11133965Sjdp   * ###     different tokens
11233965Sjdp   */
11333965Sjdp
11433965Sjdp  subpool = svn_pool_create(pool);
115218822Sdim
11633965Sjdp  /* Calculate how much of the two sequences was
11733965Sjdp   * actually the same.
11833965Sjdp   */
11933965Sjdp  common_length = (modified_length < latest_length
12033965Sjdp                  ? modified_length : latest_length)
12133965Sjdp                - common_length;
12233965Sjdp
12333965Sjdp  /* If there were matching symbols at the start of
12433965Sjdp   * both sequences, record that fact.
12533965Sjdp   */
12633965Sjdp  if (common_length > 0)
12733965Sjdp    {
12833965Sjdp      lcs = apr_palloc(subpool, sizeof(*lcs));
12933965Sjdp      lcs->next = NULL;
13033965Sjdp      lcs->position[0] = start_position[0];
13133965Sjdp      lcs->position[1] = start_position[1];
13233965Sjdp      lcs->length = common_length;
13333965Sjdp
13433965Sjdp      lcs_ref = &lcs->next;
13533965Sjdp    }
13633965Sjdp
13733965Sjdp  modified_length -= common_length;
13833965Sjdp  latest_length -= common_length;
13933965Sjdp
14033965Sjdp  modified_start = start_position[0]->offset;
14133965Sjdp  latest_start = start_position[1]->offset;
14233965Sjdp
14333965Sjdp  start_position[0] = position[0];
14433965Sjdp  start_position[1] = position[1];
14533965Sjdp
14633965Sjdp  /* Create a new ring for svn_diff__lcs to grok.
14733965Sjdp   * We can safely do this given we don't need the
14833965Sjdp   * positions we processed anymore.
14933965Sjdp   */
15033965Sjdp  if (modified_length == 0)
15133965Sjdp    {
15233965Sjdp      *position_list1 = position[0];
15333965Sjdp      position[0] = NULL;
15433965Sjdp    }
15533965Sjdp  else
15633965Sjdp    {
15733965Sjdp      while (--modified_length)
15833965Sjdp        position[0] = position[0]->next;
15933965Sjdp
16033965Sjdp      *position_list1 = position[0]->next;
16133965Sjdp      position[0]->next = start_position[0];
16233965Sjdp    }
16333965Sjdp
16433965Sjdp  if (latest_length == 0)
16533965Sjdp    {
16633965Sjdp      *position_list2 = position[1];
16733965Sjdp      position[1] = NULL;
16833965Sjdp    }
16933965Sjdp  else
170218822Sdim    {
17133965Sjdp      while (--latest_length)
17233965Sjdp        position[1] = position[1]->next;
17333965Sjdp
17433965Sjdp      *position_list2 = position[1]->next;
17533965Sjdp      position[1]->next = start_position[1];
17633965Sjdp    }
17733965Sjdp
17833965Sjdp  token_counts[0] = svn_diff__get_token_counts(position[0], num_tokens,
17933965Sjdp                                               subpool);
18033965Sjdp  token_counts[1] = svn_diff__get_token_counts(position[1], num_tokens,
18133965Sjdp                                               subpool);
18233965Sjdp
18333965Sjdp  *lcs_ref = svn_diff__lcs(position[0], position[1], token_counts[0],
18433965Sjdp                           token_counts[1], num_tokens, 0, 0, subpool);
18533965Sjdp
18633965Sjdp  /* Fix up the EOF lcs element in case one of
18733965Sjdp   * the two sequences was NULL.
18833965Sjdp   */
18933965Sjdp  if ((*lcs_ref)->position[0]->offset == 1)
19033965Sjdp    (*lcs_ref)->position[0] = *position_list1;
191218822Sdim
19233965Sjdp  if ((*lcs_ref)->position[1]->offset == 1)
19333965Sjdp    (*lcs_ref)->position[1] = *position_list2;
19433965Sjdp
19533965Sjdp  /* Produce the resolved diff */
19633965Sjdp  while (1)
19733965Sjdp    {
19833965Sjdp      if (modified_start < lcs->position[0]->offset
19933965Sjdp          || latest_start < lcs->position[1]->offset)
20033965Sjdp        {
20133965Sjdp          (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
20233965Sjdp
20333965Sjdp          (*diff_ref)->type = svn_diff__type_conflict;
20433965Sjdp          (*diff_ref)->original_start = hunk->original_start;
20533965Sjdp          (*diff_ref)->original_length = hunk->original_length;
20633965Sjdp          (*diff_ref)->modified_start = modified_start - 1;
20733965Sjdp          (*diff_ref)->modified_length = lcs->position[0]->offset
20833965Sjdp                                         - modified_start;
20933965Sjdp          (*diff_ref)->latest_start = latest_start - 1;
21033965Sjdp          (*diff_ref)->latest_length = lcs->position[1]->offset
21133965Sjdp                                       - latest_start;
21233965Sjdp          (*diff_ref)->resolved_diff = NULL;
21333965Sjdp
21433965Sjdp          diff_ref = &(*diff_ref)->next;
21533965Sjdp        }
21633965Sjdp
21733965Sjdp      /* Detect the EOF */
21833965Sjdp      if (lcs->length == 0)
21933965Sjdp        break;
22033965Sjdp
22133965Sjdp      modified_start = lcs->position[0]->offset;
22233965Sjdp      latest_start = lcs->position[1]->offset;
22333965Sjdp
22433965Sjdp      (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
22533965Sjdp
22633965Sjdp      (*diff_ref)->type = svn_diff__type_diff_common;
22733965Sjdp      (*diff_ref)->original_start = hunk->original_start;
22833965Sjdp      (*diff_ref)->original_length = hunk->original_length;
22933965Sjdp      (*diff_ref)->modified_start = modified_start - 1;
23033965Sjdp      (*diff_ref)->modified_length = lcs->length;
23133965Sjdp      (*diff_ref)->latest_start = latest_start - 1;
23233965Sjdp      (*diff_ref)->latest_length = lcs->length;
23333965Sjdp      (*diff_ref)->resolved_diff = NULL;
23433965Sjdp
23533965Sjdp      diff_ref = &(*diff_ref)->next;
23633965Sjdp
23733965Sjdp      modified_start += lcs->length;
23833965Sjdp      latest_start += lcs->length;
23933965Sjdp
24033965Sjdp      lcs = lcs->next;
24133965Sjdp    }
24233965Sjdp
24333965Sjdp  *diff_ref = NULL;
24433965Sjdp
24533965Sjdp  svn_pool_destroy(subpool);
24633965Sjdp}
24733965Sjdp
24833965Sjdp
24933965Sjdpsvn_error_t *
25033965Sjdpsvn_diff_diff3_2(svn_diff_t **diff,
25133965Sjdp                 void *diff_baton,
25233965Sjdp                 const svn_diff_fns2_t *vtable,
25333965Sjdp                 apr_pool_t *pool)
25433965Sjdp{
25533965Sjdp  svn_diff__tree_t *tree;
25633965Sjdp  svn_diff__position_t *position_list[3];
25733965Sjdp  svn_diff__token_index_t num_tokens;
25833965Sjdp  svn_diff__token_index_t *token_counts[3];
25933965Sjdp  svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
26033965Sjdp                                        svn_diff_datasource_modified,
26133965Sjdp                                        svn_diff_datasource_latest};
26233965Sjdp  svn_diff__lcs_t *lcs_om;
26333965Sjdp  svn_diff__lcs_t *lcs_ol;
26433965Sjdp  apr_pool_t *subpool;
26533965Sjdp  apr_pool_t *treepool;
26633965Sjdp  apr_off_t prefix_lines = 0;
26733965Sjdp  apr_off_t suffix_lines = 0;
26833965Sjdp
26933965Sjdp  *diff = NULL;
27033965Sjdp
27133965Sjdp  subpool = svn_pool_create(pool);
27233965Sjdp  treepool = svn_pool_create(pool);
27333965Sjdp
27433965Sjdp  svn_diff__tree_create(&tree, treepool);
27533965Sjdp
27633965Sjdp  SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines,
27733965Sjdp                                   datasource, 3));
27833965Sjdp
27933965Sjdp  SVN_ERR(svn_diff__get_tokens(&position_list[0],
28033965Sjdp                               tree,
28133965Sjdp                               diff_baton, vtable,
28233965Sjdp                               svn_diff_datasource_original,
28333965Sjdp                               prefix_lines,
28433965Sjdp                               subpool));
28533965Sjdp
28633965Sjdp  SVN_ERR(svn_diff__get_tokens(&position_list[1],
28733965Sjdp                               tree,
28833965Sjdp                               diff_baton, vtable,
28933965Sjdp                               svn_diff_datasource_modified,
29033965Sjdp                               prefix_lines,
29133965Sjdp                               subpool));
292
293  SVN_ERR(svn_diff__get_tokens(&position_list[2],
294                               tree,
295                               diff_baton, vtable,
296                               svn_diff_datasource_latest,
297                               prefix_lines,
298                               subpool));
299
300  num_tokens = svn_diff__get_node_count(tree);
301
302  /* Get rid of the tokens, we don't need them to calc the diff */
303  if (vtable->token_discard_all != NULL)
304    vtable->token_discard_all(diff_baton);
305
306  /* We don't need the nodes in the tree either anymore, nor the tree itself */
307  svn_pool_destroy(treepool);
308
309  token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens,
310                                               subpool);
311  token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens,
312                                               subpool);
313  token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens,
314                                               subpool);
315
316  /* Get the lcs for original-modified and original-latest */
317  lcs_om = svn_diff__lcs(position_list[0], position_list[1], token_counts[0],
318                         token_counts[1], num_tokens, prefix_lines,
319                         suffix_lines, subpool);
320  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], token_counts[0],
321                         token_counts[2], num_tokens, prefix_lines,
322                         suffix_lines, subpool);
323
324  /* Produce a merged diff */
325  {
326    svn_diff_t **diff_ref = diff;
327
328    apr_off_t original_start = 1;
329    apr_off_t modified_start = 1;
330    apr_off_t latest_start = 1;
331    apr_off_t original_sync;
332    apr_off_t modified_sync;
333    apr_off_t latest_sync;
334    apr_off_t common_length;
335    apr_off_t modified_length;
336    apr_off_t latest_length;
337    svn_boolean_t is_modified;
338    svn_boolean_t is_latest;
339    svn_diff__position_t sentinel_position[2];
340
341    /* Point the position lists to the start of the list
342     * so that common_diff/conflict detection actually is
343     * able to work.
344     */
345    if (position_list[1])
346      {
347        sentinel_position[0].next = position_list[1]->next;
348        sentinel_position[0].offset = position_list[1]->offset + 1;
349        position_list[1]->next = &sentinel_position[0];
350        position_list[1] = sentinel_position[0].next;
351      }
352    else
353      {
354        sentinel_position[0].offset = prefix_lines + 1;
355        sentinel_position[0].next = NULL;
356        position_list[1] = &sentinel_position[0];
357      }
358
359    if (position_list[2])
360      {
361        sentinel_position[1].next = position_list[2]->next;
362        sentinel_position[1].offset = position_list[2]->offset + 1;
363        position_list[2]->next = &sentinel_position[1];
364        position_list[2] = sentinel_position[1].next;
365      }
366    else
367      {
368        sentinel_position[1].offset = prefix_lines + 1;
369        sentinel_position[1].next = NULL;
370        position_list[2] = &sentinel_position[1];
371      }
372
373    while (1)
374      {
375        /* Find the sync points */
376        while (1)
377          {
378            if (lcs_om->position[0]->offset > lcs_ol->position[0]->offset)
379              {
380                original_sync = lcs_om->position[0]->offset;
381
382                while (lcs_ol->position[0]->offset + lcs_ol->length
383                       < original_sync)
384                  lcs_ol = lcs_ol->next;
385
386                /* If the sync point is the EOF, and our current lcs segment
387                 * doesn't reach as far as EOF, we need to skip this segment.
388                 */
389                if (lcs_om->length == 0 && lcs_ol->length > 0
390                    && lcs_ol->position[0]->offset + lcs_ol->length
391                       == original_sync
392                    && lcs_ol->position[1]->offset + lcs_ol->length
393                       != lcs_ol->next->position[1]->offset)
394                  lcs_ol = lcs_ol->next;
395
396                if (lcs_ol->position[0]->offset <= original_sync)
397                    break;
398              }
399            else
400              {
401                original_sync = lcs_ol->position[0]->offset;
402
403                while (lcs_om->position[0]->offset + lcs_om->length
404                       < original_sync)
405                  lcs_om = lcs_om->next;
406
407                /* If the sync point is the EOF, and our current lcs segment
408                 * doesn't reach as far as EOF, we need to skip this segment.
409                 */
410                if (lcs_ol->length == 0 && lcs_om->length > 0
411                    && lcs_om->position[0]->offset + lcs_om->length
412                       == original_sync
413                    && lcs_om->position[1]->offset + lcs_om->length
414                       != lcs_om->next->position[1]->offset)
415                  lcs_om = lcs_om->next;
416
417                if (lcs_om->position[0]->offset <= original_sync)
418                    break;
419              }
420          }
421
422        modified_sync = lcs_om->position[1]->offset
423                      + (original_sync - lcs_om->position[0]->offset);
424        latest_sync = lcs_ol->position[1]->offset
425                    + (original_sync - lcs_ol->position[0]->offset);
426
427        /* Determine what is modified, if anything */
428        is_modified = lcs_om->position[0]->offset - original_start > 0
429                      || lcs_om->position[1]->offset - modified_start > 0;
430
431        is_latest = lcs_ol->position[0]->offset - original_start > 0
432                    || lcs_ol->position[1]->offset - latest_start > 0;
433
434        if (is_modified || is_latest)
435          {
436            modified_length = modified_sync - modified_start;
437            latest_length = latest_sync - latest_start;
438
439            (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
440
441            (*diff_ref)->original_start = original_start - 1;
442            (*diff_ref)->original_length = original_sync - original_start;
443            (*diff_ref)->modified_start = modified_start - 1;
444            (*diff_ref)->modified_length = modified_length;
445            (*diff_ref)->latest_start = latest_start - 1;
446            (*diff_ref)->latest_length = latest_length;
447            (*diff_ref)->resolved_diff = NULL;
448
449            if (is_modified && is_latest)
450              {
451                svn_diff__resolve_conflict(*diff_ref,
452                                           &position_list[1],
453                                           &position_list[2],
454                                           num_tokens,
455                                           pool);
456              }
457            else if (is_modified)
458              {
459                (*diff_ref)->type = svn_diff__type_diff_modified;
460              }
461            else
462              {
463                (*diff_ref)->type = svn_diff__type_diff_latest;
464              }
465
466            diff_ref = &(*diff_ref)->next;
467          }
468
469        /* Detect EOF */
470        if (lcs_om->length == 0 || lcs_ol->length == 0)
471            break;
472
473        modified_length = lcs_om->length
474                          - (original_sync - lcs_om->position[0]->offset);
475        latest_length = lcs_ol->length
476                        - (original_sync - lcs_ol->position[0]->offset);
477        common_length = modified_length < latest_length
478                        ? modified_length : latest_length;
479
480        (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
481
482        (*diff_ref)->type = svn_diff__type_common;
483        (*diff_ref)->original_start = original_sync - 1;
484        (*diff_ref)->original_length = common_length;
485        (*diff_ref)->modified_start = modified_sync - 1;
486        (*diff_ref)->modified_length = common_length;
487        (*diff_ref)->latest_start = latest_sync - 1;
488        (*diff_ref)->latest_length = common_length;
489        (*diff_ref)->resolved_diff = NULL;
490
491        diff_ref = &(*diff_ref)->next;
492
493        /* Set the new offsets */
494        original_start = original_sync + common_length;
495        modified_start = modified_sync + common_length;
496        latest_start = latest_sync + common_length;
497
498        /* Make it easier for diff_common/conflict detection
499           by recording last lcs start positions
500         */
501        if (position_list[1]->offset < lcs_om->position[1]->offset)
502          position_list[1] = lcs_om->position[1];
503
504        if (position_list[2]->offset < lcs_ol->position[1]->offset)
505          position_list[2] = lcs_ol->position[1];
506
507        /* Make sure we are pointing to lcs entries beyond
508         * the range we just processed
509         */
510        while (original_start >= lcs_om->position[0]->offset + lcs_om->length
511               && lcs_om->length > 0)
512          {
513            lcs_om = lcs_om->next;
514          }
515
516        while (original_start >= lcs_ol->position[0]->offset + lcs_ol->length
517               && lcs_ol->length > 0)
518          {
519            lcs_ol = lcs_ol->next;
520          }
521      }
522
523    *diff_ref = NULL;
524  }
525
526  svn_pool_destroy(subpool);
527
528  return SVN_NO_ERROR;
529}
530