diff4.c revision 251881
1227825Stheraven/*
2227825Stheraven * diff.c :  routines for doing diffs
3227825Stheraven *
4227825Stheraven * ====================================================================
5227825Stheraven *    Licensed to the Apache Software Foundation (ASF) under one
6227825Stheraven *    or more contributor license agreements.  See the NOTICE file
7227825Stheraven *    distributed with this work for additional information
8227825Stheraven *    regarding copyright ownership.  The ASF licenses this file
9227825Stheraven *    to you under the Apache License, Version 2.0 (the
10227825Stheraven *    "License"); you may not use this file except in compliance
11227825Stheraven *    with the License.  You may obtain a copy of the License at
12227825Stheraven *
13227825Stheraven *      http://www.apache.org/licenses/LICENSE-2.0
14227825Stheraven *
15227825Stheraven *    Unless required by applicable law or agreed to in writing,
16227825Stheraven *    software distributed under the License is distributed on an
17232950Stheraven *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18232950Stheraven *    KIND, either express or implied.  See the License for the
19227825Stheraven *    specific language governing permissions and limitations
20227825Stheraven *    under the License.
21227825Stheraven * ====================================================================
22227825Stheraven */
23227825Stheraven
24227825Stheraven
25243376Sdim#include <apr.h>
26232950Stheraven#include <apr_pools.h>
27227825Stheraven#include <apr_general.h>
28227825Stheraven
29227825Stheraven#include "svn_pools.h"
30227825Stheraven#include "svn_error.h"
31227825Stheraven#include "svn_diff.h"
32227825Stheraven#include "svn_types.h"
33227825Stheraven
34232950Stheraven#include "diff.h"
35227825Stheraven
36227825Stheraven/*
37232950Stheraven * Variance adjustment rules:
38232950Stheraven *
39227825Stheraven * See notes/variance-adjusted-patching.html
40227825Stheraven *
41227825Stheraven * ###: Expand this comment to contain the full set of adjustment
42227825Stheraven * ###: rules instead of pointing to a webpage.
43262801Sdim */
44232950Stheraven
45227825Stheraven/*
46232950Stheraven * In the text below consider the following:
47227825Stheraven *
48232950Stheraven * O     = Original
49232950Stheraven * M     = Modified
50227825Stheraven * L     = Latest
51227825Stheraven * A     = Ancestor
52227825Stheraven * X:Y   = diff between X and Y
53227825Stheraven * X:Y:Z = 3-way diff between X, Y and Z
54227825Stheraven * P     = O:L, possibly adjusted
55227825Stheraven *
56227825Stheraven * diff4 -- Variance adjusted diff algorithm
57227825Stheraven *
58227825Stheraven * 1. Create a diff O:L and call that P.
59227825Stheraven *
60227825Stheraven * 2. Morph P into a 3-way diff by performing the following
61227825Stheraven *    transformation: O:L -> O:O:L.
62227825Stheraven *
63227825Stheraven * 3. Create a diff A:O.
64227825Stheraven *
65227825Stheraven * 4. Using A:O...
66227825Stheraven *
67227825Stheraven * #. Using M:A...
68227825Stheraven *
69227825Stheraven * #. Resolve conflicts...
70227825Stheraven *
71232950Stheraven
72232950Stheraven   1. Out-range added line: decrement the line numbers in every hunk in P
73227825Stheraven      that comes after the addition. This undoes the effect of the add, since
74227825Stheraven      the add never happened in D.
75227825Stheraven
76227825Stheraven   2. Out-range deleted line: increment the line numbers in every hunk in P
77227825Stheraven      that comes after the deletion. This undoes the effect of the deletion,
78227825Stheraven      since the deletion never happened in D.
79232950Stheraven
80232950Stheraven   3. Out-range edited line: do nothing. Out-range edits are irrelevant to P.
81227825Stheraven
82227825Stheraven   4. Added line in context range in P: remove the corresponding line from
83227825Stheraven      the context, optionally replacing it with new context based on that
84250514Sdim      region in M, and adjust line numbers and mappings appropriately.
85262801Sdim
86250514Sdim   5. Added line in affected text range in P: this is a dependency problem
87250514Sdim      -- part of the change T:18-T:19 depends on changes introduced to T after
88250514Sdim      B branched. There are several possible behaviors, depending on what the
89250514Sdim      user wants. One is to generate an informative error, stating that
90250514Sdim      T:18-T:19 depends on some other change (T:N-T:M, where N>=8, M<=18,
91250514Sdim      and M-N == 1); the exact revisions can be discovered automatically using
92250514Sdim      the same process as "cvs annotate", though it may take some time to do
93250514Sdim      so. Another option is to include the change in P, as an insertion of the
94232950Stheraven      "after" version of the text, and adjust line numbers and mappings
95262801Sdim      accordingly. (And if all this isn't sounding a lot like a directory
96227825Stheraven      merge algorithm, try drinking more of the Kool-Aid.) A third option is
97232950Stheraven      to include it as an insertion, but with metadata (such as CVS-style
98227825Stheraven      conflict markers) indicating that the line attempting to be patched
99227825Stheraven      does not exist in B.
100227825Stheraven
101227825Stheraven   6. Deleted line that is in-range in P: request another universe -- this
102227825Stheraven      situation can't happen in ours.
103227825Stheraven
104232950Stheraven   7. In-range edited line: reverse that edit in the "before" version of the
105262801Sdim      corresponding line in the appropriate hunk in P, to obtain the version of
106227825Stheraven      the line that will be found in B when P is applied.
107232950Stheraven*/
108227825Stheraven
109227825Stheraven
110227825Stheravenstatic void
111227825Stheravenadjust_diff(svn_diff_t *diff, svn_diff_t *adjust)
112227825Stheraven{
113227825Stheraven  svn_diff_t *hunk;
114232950Stheraven  apr_off_t range_start;
115262801Sdim  apr_off_t range_end;
116227825Stheraven  apr_off_t adjustment;
117232950Stheraven
118227825Stheraven  for (; adjust; adjust = adjust->next)
119227825Stheraven    {
120227825Stheraven      range_start = adjust->modified_start;
121227825Stheraven      range_end = range_start + adjust->modified_length;
122227825Stheraven      adjustment = adjust->original_length - adjust->modified_length;
123227825Stheraven
124232950Stheraven      /* No change in line count, so no modifications. [3, 7] */
125227825Stheraven      if (adjustment == 0)
126227825Stheraven        continue;
127232950Stheraven
128232950Stheraven      for (hunk = diff; hunk; hunk = hunk->next)
129227825Stheraven        {
130227825Stheraven          /* Changes are in the range before this hunk.  Adjust the start
131227825Stheraven           * of the hunk. [1, 2]
132227825Stheraven           */
133262801Sdim          if (hunk->modified_start >= range_end)
134232950Stheraven            {
135227825Stheraven              hunk->modified_start += adjustment;
136232950Stheraven              continue;
137227825Stheraven            }
138232950Stheraven
139227825Stheraven          /* Changes are in the range beyond this hunk.  No adjustments
140227825Stheraven           * needed. [1, 2]
141232950Stheraven           */
142227825Stheraven          if (hunk->modified_start + hunk->modified_length <= range_start)
143227825Stheraven            continue;
144243376Sdim
145227825Stheraven          /* From here on changes are in the range of this hunk. */
146227825Stheraven
147232950Stheraven          /* This is a context hunk.  Adjust the length. [4]
148232950Stheraven           */
149227825Stheraven          if (hunk->type == svn_diff__type_diff_modified)
150227825Stheraven            {
151243376Sdim              hunk->modified_length += adjustment;
152227825Stheraven              continue;
153227825Stheraven            }
154227825Stheraven
155227825Stheraven          /* Mark as conflicted. This happens in the reverse case when a line
156227825Stheraven           * is added in range and in the forward case when a line is deleted
157227825Stheraven           * in range. [5 (reverse), 6 (forward)]
158227825Stheraven           */
159227825Stheraven          if (adjustment < 0)
160243376Sdim              hunk->type = svn_diff__type_conflict;
161243376Sdim
162243376Sdim          /* Adjust the length of this hunk (reverse the change). [5, 6] */
163227825Stheraven          hunk->modified_length -= adjustment;
164243376Sdim        }
165227825Stheraven    }
166227825Stheraven}
167227825Stheraven
168227825Stheravensvn_error_t *
169227825Stheravensvn_diff_diff4_2(svn_diff_t **diff,
170227825Stheraven                 void *diff_baton,
171227825Stheraven                 const svn_diff_fns2_t *vtable,
172227825Stheraven                 apr_pool_t *pool)
173227825Stheraven{
174227825Stheraven  svn_diff__tree_t *tree;
175227825Stheraven  svn_diff__position_t *position_list[4];
176262801Sdim  svn_diff__token_index_t num_tokens;
177262801Sdim  svn_diff__token_index_t *token_counts[4];
178227825Stheraven  svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
179227825Stheraven                                        svn_diff_datasource_modified,
180227825Stheraven                                        svn_diff_datasource_latest,
181227825Stheraven                                        svn_diff_datasource_ancestor};
182227825Stheraven  svn_diff__lcs_t *lcs_ol;
183227825Stheraven  svn_diff__lcs_t *lcs_adjust;
184227825Stheraven  svn_diff_t *diff_ol;
185227825Stheraven  svn_diff_t *diff_adjust;
186227825Stheraven  svn_diff_t *hunk;
187227825Stheraven  apr_pool_t *subpool;
188227825Stheraven  apr_pool_t *subpool2;
189227825Stheraven  apr_pool_t *subpool3;
190227825Stheraven  apr_off_t prefix_lines = 0;
191227825Stheraven  apr_off_t suffix_lines = 0;
192227825Stheraven
193227825Stheraven  *diff = NULL;
194227825Stheraven
195227825Stheraven  subpool = svn_pool_create(pool);
196243376Sdim  subpool2 = svn_pool_create(subpool);
197243376Sdim  subpool3 = svn_pool_create(subpool2);
198243376Sdim
199227825Stheraven  svn_diff__tree_create(&tree, subpool3);
200243376Sdim
201227825Stheraven  SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines,
202227825Stheraven                                   datasource, 4));
203227825Stheraven
204227825Stheraven  SVN_ERR(svn_diff__get_tokens(&position_list[0],
205227825Stheraven                               tree,
206227825Stheraven                               diff_baton, vtable,
207227825Stheraven                               svn_diff_datasource_original,
208227825Stheraven                               prefix_lines,
209243376Sdim                               subpool2));
210227825Stheraven
211227825Stheraven  SVN_ERR(svn_diff__get_tokens(&position_list[1],
212262801Sdim                               tree,
213262801Sdim                               diff_baton, vtable,
214227825Stheraven                               svn_diff_datasource_modified,
215227825Stheraven                               prefix_lines,
216227825Stheraven                               subpool));
217227825Stheraven
218227825Stheraven  SVN_ERR(svn_diff__get_tokens(&position_list[2],
219227825Stheraven                               tree,
220227825Stheraven                               diff_baton, vtable,
221227825Stheraven                               svn_diff_datasource_latest,
222227825Stheraven                               prefix_lines,
223227825Stheraven                               subpool));
224227825Stheraven
225227825Stheraven  SVN_ERR(svn_diff__get_tokens(&position_list[3],
226227825Stheraven                               tree,
227227825Stheraven                               diff_baton, vtable,
228243376Sdim                               svn_diff_datasource_ancestor,
229227825Stheraven                               prefix_lines,
230227825Stheraven                               subpool2));
231227825Stheraven
232227825Stheraven  num_tokens = svn_diff__get_node_count(tree);
233227825Stheraven
234227825Stheraven  /* Get rid of the tokens, we don't need them to calc the diff */
235243376Sdim  if (vtable->token_discard_all != NULL)
236227825Stheraven    vtable->token_discard_all(diff_baton);
237243376Sdim
238243376Sdim  /* We don't need the nodes in the tree either anymore, nor the tree itself */
239227825Stheraven  svn_pool_clear(subpool3);
240227825Stheraven
241232950Stheraven  token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens,
242232950Stheraven                                               subpool);
243227825Stheraven  token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens,
244227825Stheraven                                               subpool);
245227825Stheraven  token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens,
246227825Stheraven                                               subpool);
247243376Sdim  token_counts[3] = svn_diff__get_token_counts(position_list[3], num_tokens,
248243376Sdim                                               subpool);
249243376Sdim
250227825Stheraven  /* Get the lcs for original - latest */
251243376Sdim  lcs_ol = svn_diff__lcs(position_list[0], position_list[2],
252227825Stheraven                         token_counts[0], token_counts[2],
253227825Stheraven                         num_tokens, prefix_lines,
254227825Stheraven                         suffix_lines, subpool3);
255227825Stheraven  diff_ol = svn_diff__diff(lcs_ol, 1, 1, TRUE, pool);
256227825Stheraven
257227825Stheraven  svn_pool_clear(subpool3);
258227825Stheraven
259227825Stheraven  for (hunk = diff_ol; hunk; hunk = hunk->next)
260227825Stheraven    {
261227825Stheraven      hunk->latest_start = hunk->modified_start;
262227825Stheraven      hunk->latest_length = hunk->modified_length;
263227825Stheraven      hunk->modified_start = hunk->original_start;
264227825Stheraven      hunk->modified_length = hunk->original_length;
265227825Stheraven
266227825Stheraven      if (hunk->type == svn_diff__type_diff_modified)
267227825Stheraven          hunk->type = svn_diff__type_diff_latest;
268227825Stheraven      else
269227825Stheraven          hunk->type = svn_diff__type_diff_modified;
270227825Stheraven    }
271227825Stheraven
272227825Stheraven  /* Get the lcs for common ancestor - original
273227825Stheraven   * Do reverse adjustements
274227825Stheraven   */
275227825Stheraven  lcs_adjust = svn_diff__lcs(position_list[3], position_list[2],
276227825Stheraven                             token_counts[3], token_counts[2],
277227825Stheraven                             num_tokens, prefix_lines,
278243376Sdim                             suffix_lines, subpool3);
279243376Sdim  diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
280243376Sdim  adjust_diff(diff_ol, diff_adjust);
281227825Stheraven
282243376Sdim  svn_pool_clear(subpool3);
283227825Stheraven
284227825Stheraven  /* Get the lcs for modified - common ancestor
285227825Stheraven   * Do forward adjustments
286227825Stheraven   */
287227825Stheraven  lcs_adjust = svn_diff__lcs(position_list[1], position_list[3],
288227825Stheraven                             token_counts[1], token_counts[3],
289227825Stheraven                             num_tokens, prefix_lines,
290227825Stheraven                             suffix_lines, subpool3);
291227825Stheraven  diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
292227825Stheraven  adjust_diff(diff_ol, diff_adjust);
293243376Sdim
294227825Stheraven  /* Get rid of the position lists for original and ancestor, and delete
295227825Stheraven   * our scratchpool.
296227825Stheraven   */
297227825Stheraven  svn_pool_destroy(subpool2);
298227825Stheraven
299227825Stheraven  /* Now we try and resolve the conflicts we encountered */
300227825Stheraven  for (hunk = diff_ol; hunk; hunk = hunk->next)
301227825Stheraven    {
302227825Stheraven      if (hunk->type == svn_diff__type_conflict)
303227825Stheraven        {
304243376Sdim          svn_diff__resolve_conflict(hunk, &position_list[1],
305227825Stheraven                                     &position_list[2], num_tokens, pool);
306227825Stheraven        }
307227825Stheraven    }
308227825Stheraven
309243376Sdim  svn_pool_destroy(subpool);
310227825Stheraven
311243376Sdim  *diff = diff_ol;
312243376Sdim
313227825Stheraven  return SVN_NO_ERROR;
314227825Stheraven}
315232950Stheraven