diff4.c revision 251881
1227825Stheraven/* 2227825Stheraven * diff.c : routines for doing diffs 3227825Stheraven * 4227825Stheraven * ==================================================================== 5227825Stheraven * Licensed to the Apache Software Foundation (ASF) under one 6227825Stheraven * or more contributor license agreements. See the NOTICE file 7227825Stheraven * distributed with this work for additional information 8227825Stheraven * regarding copyright ownership. The ASF licenses this file 9227825Stheraven * to you under the Apache License, Version 2.0 (the 10227825Stheraven * "License"); you may not use this file except in compliance 11227825Stheraven * with the License. You may obtain a copy of the License at 12227825Stheraven * 13227825Stheraven * http://www.apache.org/licenses/LICENSE-2.0 14227825Stheraven * 15227825Stheraven * Unless required by applicable law or agreed to in writing, 16227825Stheraven * software distributed under the License is distributed on an 17232950Stheraven * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18232950Stheraven * KIND, either express or implied. See the License for the 19227825Stheraven * specific language governing permissions and limitations 20227825Stheraven * under the License. 21227825Stheraven * ==================================================================== 22227825Stheraven */ 23227825Stheraven 24227825Stheraven 25243376Sdim#include <apr.h> 26232950Stheraven#include <apr_pools.h> 27227825Stheraven#include <apr_general.h> 28227825Stheraven 29227825Stheraven#include "svn_pools.h" 30227825Stheraven#include "svn_error.h" 31227825Stheraven#include "svn_diff.h" 32227825Stheraven#include "svn_types.h" 33227825Stheraven 34232950Stheraven#include "diff.h" 35227825Stheraven 36227825Stheraven/* 37232950Stheraven * Variance adjustment rules: 38232950Stheraven * 39227825Stheraven * See notes/variance-adjusted-patching.html 40227825Stheraven * 41227825Stheraven * ###: Expand this comment to contain the full set of adjustment 42227825Stheraven * ###: rules instead of pointing to a webpage. 43262801Sdim */ 44232950Stheraven 45227825Stheraven/* 46232950Stheraven * In the text below consider the following: 47227825Stheraven * 48232950Stheraven * O = Original 49232950Stheraven * M = Modified 50227825Stheraven * L = Latest 51227825Stheraven * A = Ancestor 52227825Stheraven * X:Y = diff between X and Y 53227825Stheraven * X:Y:Z = 3-way diff between X, Y and Z 54227825Stheraven * P = O:L, possibly adjusted 55227825Stheraven * 56227825Stheraven * diff4 -- Variance adjusted diff algorithm 57227825Stheraven * 58227825Stheraven * 1. Create a diff O:L and call that P. 59227825Stheraven * 60227825Stheraven * 2. Morph P into a 3-way diff by performing the following 61227825Stheraven * transformation: O:L -> O:O:L. 62227825Stheraven * 63227825Stheraven * 3. Create a diff A:O. 64227825Stheraven * 65227825Stheraven * 4. Using A:O... 66227825Stheraven * 67227825Stheraven * #. Using M:A... 68227825Stheraven * 69227825Stheraven * #. Resolve conflicts... 70227825Stheraven * 71232950Stheraven 72232950Stheraven 1. Out-range added line: decrement the line numbers in every hunk in P 73227825Stheraven that comes after the addition. This undoes the effect of the add, since 74227825Stheraven the add never happened in D. 75227825Stheraven 76227825Stheraven 2. Out-range deleted line: increment the line numbers in every hunk in P 77227825Stheraven that comes after the deletion. This undoes the effect of the deletion, 78227825Stheraven since the deletion never happened in D. 79232950Stheraven 80232950Stheraven 3. Out-range edited line: do nothing. Out-range edits are irrelevant to P. 81227825Stheraven 82227825Stheraven 4. Added line in context range in P: remove the corresponding line from 83227825Stheraven the context, optionally replacing it with new context based on that 84250514Sdim region in M, and adjust line numbers and mappings appropriately. 85262801Sdim 86250514Sdim 5. Added line in affected text range in P: this is a dependency problem 87250514Sdim -- part of the change T:18-T:19 depends on changes introduced to T after 88250514Sdim B branched. There are several possible behaviors, depending on what the 89250514Sdim user wants. One is to generate an informative error, stating that 90250514Sdim T:18-T:19 depends on some other change (T:N-T:M, where N>=8, M<=18, 91250514Sdim and M-N == 1); the exact revisions can be discovered automatically using 92250514Sdim the same process as "cvs annotate", though it may take some time to do 93250514Sdim so. Another option is to include the change in P, as an insertion of the 94232950Stheraven "after" version of the text, and adjust line numbers and mappings 95262801Sdim accordingly. (And if all this isn't sounding a lot like a directory 96227825Stheraven merge algorithm, try drinking more of the Kool-Aid.) A third option is 97232950Stheraven to include it as an insertion, but with metadata (such as CVS-style 98227825Stheraven conflict markers) indicating that the line attempting to be patched 99227825Stheraven does not exist in B. 100227825Stheraven 101227825Stheraven 6. Deleted line that is in-range in P: request another universe -- this 102227825Stheraven situation can't happen in ours. 103227825Stheraven 104232950Stheraven 7. In-range edited line: reverse that edit in the "before" version of the 105262801Sdim corresponding line in the appropriate hunk in P, to obtain the version of 106227825Stheraven the line that will be found in B when P is applied. 107232950Stheraven*/ 108227825Stheraven 109227825Stheraven 110227825Stheravenstatic void 111227825Stheravenadjust_diff(svn_diff_t *diff, svn_diff_t *adjust) 112227825Stheraven{ 113227825Stheraven svn_diff_t *hunk; 114232950Stheraven apr_off_t range_start; 115262801Sdim apr_off_t range_end; 116227825Stheraven apr_off_t adjustment; 117232950Stheraven 118227825Stheraven for (; adjust; adjust = adjust->next) 119227825Stheraven { 120227825Stheraven range_start = adjust->modified_start; 121227825Stheraven range_end = range_start + adjust->modified_length; 122227825Stheraven adjustment = adjust->original_length - adjust->modified_length; 123227825Stheraven 124232950Stheraven /* No change in line count, so no modifications. [3, 7] */ 125227825Stheraven if (adjustment == 0) 126227825Stheraven continue; 127232950Stheraven 128232950Stheraven for (hunk = diff; hunk; hunk = hunk->next) 129227825Stheraven { 130227825Stheraven /* Changes are in the range before this hunk. Adjust the start 131227825Stheraven * of the hunk. [1, 2] 132227825Stheraven */ 133262801Sdim if (hunk->modified_start >= range_end) 134232950Stheraven { 135227825Stheraven hunk->modified_start += adjustment; 136232950Stheraven continue; 137227825Stheraven } 138232950Stheraven 139227825Stheraven /* Changes are in the range beyond this hunk. No adjustments 140227825Stheraven * needed. [1, 2] 141232950Stheraven */ 142227825Stheraven if (hunk->modified_start + hunk->modified_length <= range_start) 143227825Stheraven continue; 144243376Sdim 145227825Stheraven /* From here on changes are in the range of this hunk. */ 146227825Stheraven 147232950Stheraven /* This is a context hunk. Adjust the length. [4] 148232950Stheraven */ 149227825Stheraven if (hunk->type == svn_diff__type_diff_modified) 150227825Stheraven { 151243376Sdim hunk->modified_length += adjustment; 152227825Stheraven continue; 153227825Stheraven } 154227825Stheraven 155227825Stheraven /* Mark as conflicted. This happens in the reverse case when a line 156227825Stheraven * is added in range and in the forward case when a line is deleted 157227825Stheraven * in range. [5 (reverse), 6 (forward)] 158227825Stheraven */ 159227825Stheraven if (adjustment < 0) 160243376Sdim hunk->type = svn_diff__type_conflict; 161243376Sdim 162243376Sdim /* Adjust the length of this hunk (reverse the change). [5, 6] */ 163227825Stheraven hunk->modified_length -= adjustment; 164243376Sdim } 165227825Stheraven } 166227825Stheraven} 167227825Stheraven 168227825Stheravensvn_error_t * 169227825Stheravensvn_diff_diff4_2(svn_diff_t **diff, 170227825Stheraven void *diff_baton, 171227825Stheraven const svn_diff_fns2_t *vtable, 172227825Stheraven apr_pool_t *pool) 173227825Stheraven{ 174227825Stheraven svn_diff__tree_t *tree; 175227825Stheraven svn_diff__position_t *position_list[4]; 176262801Sdim svn_diff__token_index_t num_tokens; 177262801Sdim svn_diff__token_index_t *token_counts[4]; 178227825Stheraven svn_diff_datasource_e datasource[] = {svn_diff_datasource_original, 179227825Stheraven svn_diff_datasource_modified, 180227825Stheraven svn_diff_datasource_latest, 181227825Stheraven svn_diff_datasource_ancestor}; 182227825Stheraven svn_diff__lcs_t *lcs_ol; 183227825Stheraven svn_diff__lcs_t *lcs_adjust; 184227825Stheraven svn_diff_t *diff_ol; 185227825Stheraven svn_diff_t *diff_adjust; 186227825Stheraven svn_diff_t *hunk; 187227825Stheraven apr_pool_t *subpool; 188227825Stheraven apr_pool_t *subpool2; 189227825Stheraven apr_pool_t *subpool3; 190227825Stheraven apr_off_t prefix_lines = 0; 191227825Stheraven apr_off_t suffix_lines = 0; 192227825Stheraven 193227825Stheraven *diff = NULL; 194227825Stheraven 195227825Stheraven subpool = svn_pool_create(pool); 196243376Sdim subpool2 = svn_pool_create(subpool); 197243376Sdim subpool3 = svn_pool_create(subpool2); 198243376Sdim 199227825Stheraven svn_diff__tree_create(&tree, subpool3); 200243376Sdim 201227825Stheraven SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines, 202227825Stheraven datasource, 4)); 203227825Stheraven 204227825Stheraven SVN_ERR(svn_diff__get_tokens(&position_list[0], 205227825Stheraven tree, 206227825Stheraven diff_baton, vtable, 207227825Stheraven svn_diff_datasource_original, 208227825Stheraven prefix_lines, 209243376Sdim subpool2)); 210227825Stheraven 211227825Stheraven SVN_ERR(svn_diff__get_tokens(&position_list[1], 212262801Sdim tree, 213262801Sdim diff_baton, vtable, 214227825Stheraven svn_diff_datasource_modified, 215227825Stheraven prefix_lines, 216227825Stheraven subpool)); 217227825Stheraven 218227825Stheraven SVN_ERR(svn_diff__get_tokens(&position_list[2], 219227825Stheraven tree, 220227825Stheraven diff_baton, vtable, 221227825Stheraven svn_diff_datasource_latest, 222227825Stheraven prefix_lines, 223227825Stheraven subpool)); 224227825Stheraven 225227825Stheraven SVN_ERR(svn_diff__get_tokens(&position_list[3], 226227825Stheraven tree, 227227825Stheraven diff_baton, vtable, 228243376Sdim svn_diff_datasource_ancestor, 229227825Stheraven prefix_lines, 230227825Stheraven subpool2)); 231227825Stheraven 232227825Stheraven num_tokens = svn_diff__get_node_count(tree); 233227825Stheraven 234227825Stheraven /* Get rid of the tokens, we don't need them to calc the diff */ 235243376Sdim if (vtable->token_discard_all != NULL) 236227825Stheraven vtable->token_discard_all(diff_baton); 237243376Sdim 238243376Sdim /* We don't need the nodes in the tree either anymore, nor the tree itself */ 239227825Stheraven svn_pool_clear(subpool3); 240227825Stheraven 241232950Stheraven token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens, 242232950Stheraven subpool); 243227825Stheraven token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens, 244227825Stheraven subpool); 245227825Stheraven token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens, 246227825Stheraven subpool); 247243376Sdim token_counts[3] = svn_diff__get_token_counts(position_list[3], num_tokens, 248243376Sdim subpool); 249243376Sdim 250227825Stheraven /* Get the lcs for original - latest */ 251243376Sdim lcs_ol = svn_diff__lcs(position_list[0], position_list[2], 252227825Stheraven token_counts[0], token_counts[2], 253227825Stheraven num_tokens, prefix_lines, 254227825Stheraven suffix_lines, subpool3); 255227825Stheraven diff_ol = svn_diff__diff(lcs_ol, 1, 1, TRUE, pool); 256227825Stheraven 257227825Stheraven svn_pool_clear(subpool3); 258227825Stheraven 259227825Stheraven for (hunk = diff_ol; hunk; hunk = hunk->next) 260227825Stheraven { 261227825Stheraven hunk->latest_start = hunk->modified_start; 262227825Stheraven hunk->latest_length = hunk->modified_length; 263227825Stheraven hunk->modified_start = hunk->original_start; 264227825Stheraven hunk->modified_length = hunk->original_length; 265227825Stheraven 266227825Stheraven if (hunk->type == svn_diff__type_diff_modified) 267227825Stheraven hunk->type = svn_diff__type_diff_latest; 268227825Stheraven else 269227825Stheraven hunk->type = svn_diff__type_diff_modified; 270227825Stheraven } 271227825Stheraven 272227825Stheraven /* Get the lcs for common ancestor - original 273227825Stheraven * Do reverse adjustements 274227825Stheraven */ 275227825Stheraven lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], 276227825Stheraven token_counts[3], token_counts[2], 277227825Stheraven num_tokens, prefix_lines, 278243376Sdim suffix_lines, subpool3); 279243376Sdim diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3); 280243376Sdim adjust_diff(diff_ol, diff_adjust); 281227825Stheraven 282243376Sdim svn_pool_clear(subpool3); 283227825Stheraven 284227825Stheraven /* Get the lcs for modified - common ancestor 285227825Stheraven * Do forward adjustments 286227825Stheraven */ 287227825Stheraven lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], 288227825Stheraven token_counts[1], token_counts[3], 289227825Stheraven num_tokens, prefix_lines, 290227825Stheraven suffix_lines, subpool3); 291227825Stheraven diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3); 292227825Stheraven adjust_diff(diff_ol, diff_adjust); 293243376Sdim 294227825Stheraven /* Get rid of the position lists for original and ancestor, and delete 295227825Stheraven * our scratchpool. 296227825Stheraven */ 297227825Stheraven svn_pool_destroy(subpool2); 298227825Stheraven 299227825Stheraven /* Now we try and resolve the conflicts we encountered */ 300227825Stheraven for (hunk = diff_ol; hunk; hunk = hunk->next) 301227825Stheraven { 302227825Stheraven if (hunk->type == svn_diff__type_conflict) 303227825Stheraven { 304243376Sdim svn_diff__resolve_conflict(hunk, &position_list[1], 305227825Stheraven &position_list[2], num_tokens, pool); 306227825Stheraven } 307227825Stheraven } 308227825Stheraven 309243376Sdim svn_pool_destroy(subpool); 310227825Stheraven 311243376Sdim *diff = diff_ol; 312243376Sdim 313227825Stheraven return SVN_NO_ERROR; 314227825Stheraven} 315232950Stheraven