diff3.c revision 251886
133965Sjdp/* 233965Sjdp * diff3.c : routines for doing diffs 333965Sjdp * 433965Sjdp * ==================================================================== 533965Sjdp * Licensed to the Apache Software Foundation (ASF) under one 633965Sjdp * or more contributor license agreements. See the NOTICE file 733965Sjdp * distributed with this work for additional information 833965Sjdp * regarding copyright ownership. The ASF licenses this file 933965Sjdp * to you under the Apache License, Version 2.0 (the 1033965Sjdp * "License"); you may not use this file except in compliance 1133965Sjdp * with the License. You may obtain a copy of the License at 1233965Sjdp * 1333965Sjdp * http://www.apache.org/licenses/LICENSE-2.0 1433965Sjdp * 1533965Sjdp * Unless required by applicable law or agreed to in writing, 1633965Sjdp * software distributed under the License is distributed on an 17218822Sdim * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18218822Sdim * KIND, either express or implied. See the License for the 1933965Sjdp * specific language governing permissions and limitations 20218822Sdim * under the License. 2133965Sjdp * ==================================================================== 2277298Sobrien */ 2333965Sjdp 2433965Sjdp 2533965Sjdp#include <apr.h> 2633965Sjdp#include <apr_pools.h> 2733965Sjdp#include <apr_general.h> 2833965Sjdp 2933965Sjdp#include "svn_pools.h" 3033965Sjdp#include "svn_error.h" 3133965Sjdp#include "svn_diff.h" 3233965Sjdp#include "svn_types.h" 3333965Sjdp 3433965Sjdp#include "diff.h" 3533965Sjdp 3677298Sobrien 3777298Sobrienvoid 3877298Sobriensvn_diff__resolve_conflict(svn_diff_t *hunk, 3933965Sjdp svn_diff__position_t **position_list1, 40218822Sdim svn_diff__position_t **position_list2, 41218822Sdim svn_diff__token_index_t num_tokens, 4233965Sjdp apr_pool_t *pool) 4333965Sjdp{ 4477298Sobrien apr_off_t modified_start = hunk->modified_start + 1; 4577298Sobrien apr_off_t latest_start = hunk->latest_start + 1; 4633965Sjdp apr_off_t common_length; 4733965Sjdp apr_off_t modified_length = hunk->modified_length; 4833965Sjdp apr_off_t latest_length = hunk->latest_length; 4933965Sjdp svn_diff__position_t *start_position[2]; 5033965Sjdp svn_diff__position_t *position[2]; 5133965Sjdp svn_diff__token_index_t *token_counts[2]; 5233965Sjdp svn_diff__lcs_t *lcs = NULL; 5333965Sjdp svn_diff__lcs_t **lcs_ref = &lcs; 5433965Sjdp svn_diff_t **diff_ref = &hunk->resolved_diff; 5533965Sjdp apr_pool_t *subpool; 5633965Sjdp 5733965Sjdp /* First find the starting positions for the 5833965Sjdp * comparison 5933965Sjdp */ 6033965Sjdp 6133965Sjdp start_position[0] = *position_list1; 6233965Sjdp start_position[1] = *position_list2; 6333965Sjdp 6433965Sjdp while (start_position[0]->offset < modified_start) 6533965Sjdp start_position[0] = start_position[0]->next; 6633965Sjdp 6733965Sjdp while (start_position[1]->offset < latest_start) 6833965Sjdp start_position[1] = start_position[1]->next; 6933965Sjdp 7033965Sjdp position[0] = start_position[0]; 7133965Sjdp position[1] = start_position[1]; 7233965Sjdp 7333965Sjdp common_length = modified_length < latest_length 7433965Sjdp ? modified_length : latest_length; 7533965Sjdp 7633965Sjdp while (common_length > 0 7733965Sjdp && position[0]->token_index == position[1]->token_index) 7833965Sjdp { 7933965Sjdp position[0] = position[0]->next; 8033965Sjdp position[1] = position[1]->next; 8133965Sjdp 8233965Sjdp common_length--; 8333965Sjdp } 8433965Sjdp 8533965Sjdp if (common_length == 0 86218822Sdim && modified_length == latest_length) 8733965Sjdp { 8833965Sjdp hunk->type = svn_diff__type_diff_common; 8933965Sjdp hunk->resolved_diff = NULL; 9033965Sjdp 9133965Sjdp *position_list1 = position[0]; 9233965Sjdp *position_list2 = position[1]; 9333965Sjdp 9433965Sjdp return; 9533965Sjdp } 9633965Sjdp 9733965Sjdp hunk->type = svn_diff__type_conflict; 9833965Sjdp 9933965Sjdp /* ### If we have a conflict we can try to find the 10033965Sjdp * ### common parts in it by getting an lcs between 10133965Sjdp * ### modified (start to start + length) and 10233965Sjdp * ### latest (start to start + length). 10333965Sjdp * ### We use this lcs to create a simple diff. Only 10433965Sjdp * ### where there is a diff between the two, we have 10533965Sjdp * ### a conflict. 10633965Sjdp * ### This raises a problem; several common diffs and 10733965Sjdp * ### conflicts can occur within the same original 10833965Sjdp * ### block. This needs some thought. 10933965Sjdp * ### 11033965Sjdp * ### NB: We can use the node _pointers_ to identify 11133965Sjdp * ### different tokens 11233965Sjdp */ 11333965Sjdp 11433965Sjdp subpool = svn_pool_create(pool); 115218822Sdim 11633965Sjdp /* Calculate how much of the two sequences was 11733965Sjdp * actually the same. 11833965Sjdp */ 11933965Sjdp common_length = (modified_length < latest_length 12033965Sjdp ? modified_length : latest_length) 12133965Sjdp - common_length; 12233965Sjdp 12333965Sjdp /* If there were matching symbols at the start of 12433965Sjdp * both sequences, record that fact. 12533965Sjdp */ 12633965Sjdp if (common_length > 0) 12733965Sjdp { 12833965Sjdp lcs = apr_palloc(subpool, sizeof(*lcs)); 12933965Sjdp lcs->next = NULL; 13033965Sjdp lcs->position[0] = start_position[0]; 13133965Sjdp lcs->position[1] = start_position[1]; 13233965Sjdp lcs->length = common_length; 13333965Sjdp 13433965Sjdp lcs_ref = &lcs->next; 13533965Sjdp } 13633965Sjdp 13733965Sjdp modified_length -= common_length; 13833965Sjdp latest_length -= common_length; 13933965Sjdp 14033965Sjdp modified_start = start_position[0]->offset; 14133965Sjdp latest_start = start_position[1]->offset; 14233965Sjdp 14333965Sjdp start_position[0] = position[0]; 14433965Sjdp start_position[1] = position[1]; 14533965Sjdp 14633965Sjdp /* Create a new ring for svn_diff__lcs to grok. 14733965Sjdp * We can safely do this given we don't need the 14833965Sjdp * positions we processed anymore. 14933965Sjdp */ 15033965Sjdp if (modified_length == 0) 15133965Sjdp { 15233965Sjdp *position_list1 = position[0]; 15333965Sjdp position[0] = NULL; 15433965Sjdp } 15533965Sjdp else 15633965Sjdp { 15733965Sjdp while (--modified_length) 15833965Sjdp position[0] = position[0]->next; 15933965Sjdp 16033965Sjdp *position_list1 = position[0]->next; 16133965Sjdp position[0]->next = start_position[0]; 16233965Sjdp } 16333965Sjdp 16433965Sjdp if (latest_length == 0) 16533965Sjdp { 16633965Sjdp *position_list2 = position[1]; 16733965Sjdp position[1] = NULL; 16833965Sjdp } 16933965Sjdp else 170218822Sdim { 17133965Sjdp while (--latest_length) 17233965Sjdp position[1] = position[1]->next; 17333965Sjdp 17433965Sjdp *position_list2 = position[1]->next; 17533965Sjdp position[1]->next = start_position[1]; 17633965Sjdp } 17733965Sjdp 17833965Sjdp token_counts[0] = svn_diff__get_token_counts(position[0], num_tokens, 17933965Sjdp subpool); 18033965Sjdp token_counts[1] = svn_diff__get_token_counts(position[1], num_tokens, 18133965Sjdp subpool); 18233965Sjdp 18333965Sjdp *lcs_ref = svn_diff__lcs(position[0], position[1], token_counts[0], 18433965Sjdp token_counts[1], num_tokens, 0, 0, subpool); 18533965Sjdp 18633965Sjdp /* Fix up the EOF lcs element in case one of 18733965Sjdp * the two sequences was NULL. 18833965Sjdp */ 18933965Sjdp if ((*lcs_ref)->position[0]->offset == 1) 19033965Sjdp (*lcs_ref)->position[0] = *position_list1; 191218822Sdim 19233965Sjdp if ((*lcs_ref)->position[1]->offset == 1) 19333965Sjdp (*lcs_ref)->position[1] = *position_list2; 19433965Sjdp 19533965Sjdp /* Produce the resolved diff */ 19633965Sjdp while (1) 19733965Sjdp { 19833965Sjdp if (modified_start < lcs->position[0]->offset 19933965Sjdp || latest_start < lcs->position[1]->offset) 20033965Sjdp { 20133965Sjdp (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); 20233965Sjdp 20333965Sjdp (*diff_ref)->type = svn_diff__type_conflict; 20433965Sjdp (*diff_ref)->original_start = hunk->original_start; 20533965Sjdp (*diff_ref)->original_length = hunk->original_length; 20633965Sjdp (*diff_ref)->modified_start = modified_start - 1; 20733965Sjdp (*diff_ref)->modified_length = lcs->position[0]->offset 20833965Sjdp - modified_start; 20933965Sjdp (*diff_ref)->latest_start = latest_start - 1; 21033965Sjdp (*diff_ref)->latest_length = lcs->position[1]->offset 21133965Sjdp - latest_start; 21233965Sjdp (*diff_ref)->resolved_diff = NULL; 21333965Sjdp 21433965Sjdp diff_ref = &(*diff_ref)->next; 21533965Sjdp } 21633965Sjdp 21733965Sjdp /* Detect the EOF */ 21833965Sjdp if (lcs->length == 0) 21933965Sjdp break; 22033965Sjdp 22133965Sjdp modified_start = lcs->position[0]->offset; 22233965Sjdp latest_start = lcs->position[1]->offset; 22333965Sjdp 22433965Sjdp (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); 22533965Sjdp 22633965Sjdp (*diff_ref)->type = svn_diff__type_diff_common; 22733965Sjdp (*diff_ref)->original_start = hunk->original_start; 22833965Sjdp (*diff_ref)->original_length = hunk->original_length; 22933965Sjdp (*diff_ref)->modified_start = modified_start - 1; 23033965Sjdp (*diff_ref)->modified_length = lcs->length; 23133965Sjdp (*diff_ref)->latest_start = latest_start - 1; 23233965Sjdp (*diff_ref)->latest_length = lcs->length; 23333965Sjdp (*diff_ref)->resolved_diff = NULL; 23433965Sjdp 23533965Sjdp diff_ref = &(*diff_ref)->next; 23633965Sjdp 23733965Sjdp modified_start += lcs->length; 23833965Sjdp latest_start += lcs->length; 23933965Sjdp 24033965Sjdp lcs = lcs->next; 24133965Sjdp } 24233965Sjdp 24333965Sjdp *diff_ref = NULL; 24433965Sjdp 24533965Sjdp svn_pool_destroy(subpool); 24633965Sjdp} 24733965Sjdp 24833965Sjdp 24933965Sjdpsvn_error_t * 25033965Sjdpsvn_diff_diff3_2(svn_diff_t **diff, 25133965Sjdp void *diff_baton, 25233965Sjdp const svn_diff_fns2_t *vtable, 25333965Sjdp apr_pool_t *pool) 25433965Sjdp{ 25533965Sjdp svn_diff__tree_t *tree; 25633965Sjdp svn_diff__position_t *position_list[3]; 25733965Sjdp svn_diff__token_index_t num_tokens; 25833965Sjdp svn_diff__token_index_t *token_counts[3]; 25933965Sjdp svn_diff_datasource_e datasource[] = {svn_diff_datasource_original, 26033965Sjdp svn_diff_datasource_modified, 26133965Sjdp svn_diff_datasource_latest}; 26233965Sjdp svn_diff__lcs_t *lcs_om; 26333965Sjdp svn_diff__lcs_t *lcs_ol; 26433965Sjdp apr_pool_t *subpool; 26533965Sjdp apr_pool_t *treepool; 26633965Sjdp apr_off_t prefix_lines = 0; 26733965Sjdp apr_off_t suffix_lines = 0; 26833965Sjdp 26933965Sjdp *diff = NULL; 27033965Sjdp 27133965Sjdp subpool = svn_pool_create(pool); 27233965Sjdp treepool = svn_pool_create(pool); 27333965Sjdp 27433965Sjdp svn_diff__tree_create(&tree, treepool); 27533965Sjdp 27633965Sjdp SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines, 27733965Sjdp datasource, 3)); 27833965Sjdp 27933965Sjdp SVN_ERR(svn_diff__get_tokens(&position_list[0], 28033965Sjdp tree, 28133965Sjdp diff_baton, vtable, 28233965Sjdp svn_diff_datasource_original, 28333965Sjdp prefix_lines, 28433965Sjdp subpool)); 28533965Sjdp 28633965Sjdp SVN_ERR(svn_diff__get_tokens(&position_list[1], 28733965Sjdp tree, 28833965Sjdp diff_baton, vtable, 28933965Sjdp svn_diff_datasource_modified, 29033965Sjdp prefix_lines, 29133965Sjdp subpool)); 292 293 SVN_ERR(svn_diff__get_tokens(&position_list[2], 294 tree, 295 diff_baton, vtable, 296 svn_diff_datasource_latest, 297 prefix_lines, 298 subpool)); 299 300 num_tokens = svn_diff__get_node_count(tree); 301 302 /* Get rid of the tokens, we don't need them to calc the diff */ 303 if (vtable->token_discard_all != NULL) 304 vtable->token_discard_all(diff_baton); 305 306 /* We don't need the nodes in the tree either anymore, nor the tree itself */ 307 svn_pool_destroy(treepool); 308 309 token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens, 310 subpool); 311 token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens, 312 subpool); 313 token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens, 314 subpool); 315 316 /* Get the lcs for original-modified and original-latest */ 317 lcs_om = svn_diff__lcs(position_list[0], position_list[1], token_counts[0], 318 token_counts[1], num_tokens, prefix_lines, 319 suffix_lines, subpool); 320 lcs_ol = svn_diff__lcs(position_list[0], position_list[2], token_counts[0], 321 token_counts[2], num_tokens, prefix_lines, 322 suffix_lines, subpool); 323 324 /* Produce a merged diff */ 325 { 326 svn_diff_t **diff_ref = diff; 327 328 apr_off_t original_start = 1; 329 apr_off_t modified_start = 1; 330 apr_off_t latest_start = 1; 331 apr_off_t original_sync; 332 apr_off_t modified_sync; 333 apr_off_t latest_sync; 334 apr_off_t common_length; 335 apr_off_t modified_length; 336 apr_off_t latest_length; 337 svn_boolean_t is_modified; 338 svn_boolean_t is_latest; 339 svn_diff__position_t sentinel_position[2]; 340 341 /* Point the position lists to the start of the list 342 * so that common_diff/conflict detection actually is 343 * able to work. 344 */ 345 if (position_list[1]) 346 { 347 sentinel_position[0].next = position_list[1]->next; 348 sentinel_position[0].offset = position_list[1]->offset + 1; 349 position_list[1]->next = &sentinel_position[0]; 350 position_list[1] = sentinel_position[0].next; 351 } 352 else 353 { 354 sentinel_position[0].offset = prefix_lines + 1; 355 sentinel_position[0].next = NULL; 356 position_list[1] = &sentinel_position[0]; 357 } 358 359 if (position_list[2]) 360 { 361 sentinel_position[1].next = position_list[2]->next; 362 sentinel_position[1].offset = position_list[2]->offset + 1; 363 position_list[2]->next = &sentinel_position[1]; 364 position_list[2] = sentinel_position[1].next; 365 } 366 else 367 { 368 sentinel_position[1].offset = prefix_lines + 1; 369 sentinel_position[1].next = NULL; 370 position_list[2] = &sentinel_position[1]; 371 } 372 373 while (1) 374 { 375 /* Find the sync points */ 376 while (1) 377 { 378 if (lcs_om->position[0]->offset > lcs_ol->position[0]->offset) 379 { 380 original_sync = lcs_om->position[0]->offset; 381 382 while (lcs_ol->position[0]->offset + lcs_ol->length 383 < original_sync) 384 lcs_ol = lcs_ol->next; 385 386 /* If the sync point is the EOF, and our current lcs segment 387 * doesn't reach as far as EOF, we need to skip this segment. 388 */ 389 if (lcs_om->length == 0 && lcs_ol->length > 0 390 && lcs_ol->position[0]->offset + lcs_ol->length 391 == original_sync 392 && lcs_ol->position[1]->offset + lcs_ol->length 393 != lcs_ol->next->position[1]->offset) 394 lcs_ol = lcs_ol->next; 395 396 if (lcs_ol->position[0]->offset <= original_sync) 397 break; 398 } 399 else 400 { 401 original_sync = lcs_ol->position[0]->offset; 402 403 while (lcs_om->position[0]->offset + lcs_om->length 404 < original_sync) 405 lcs_om = lcs_om->next; 406 407 /* If the sync point is the EOF, and our current lcs segment 408 * doesn't reach as far as EOF, we need to skip this segment. 409 */ 410 if (lcs_ol->length == 0 && lcs_om->length > 0 411 && lcs_om->position[0]->offset + lcs_om->length 412 == original_sync 413 && lcs_om->position[1]->offset + lcs_om->length 414 != lcs_om->next->position[1]->offset) 415 lcs_om = lcs_om->next; 416 417 if (lcs_om->position[0]->offset <= original_sync) 418 break; 419 } 420 } 421 422 modified_sync = lcs_om->position[1]->offset 423 + (original_sync - lcs_om->position[0]->offset); 424 latest_sync = lcs_ol->position[1]->offset 425 + (original_sync - lcs_ol->position[0]->offset); 426 427 /* Determine what is modified, if anything */ 428 is_modified = lcs_om->position[0]->offset - original_start > 0 429 || lcs_om->position[1]->offset - modified_start > 0; 430 431 is_latest = lcs_ol->position[0]->offset - original_start > 0 432 || lcs_ol->position[1]->offset - latest_start > 0; 433 434 if (is_modified || is_latest) 435 { 436 modified_length = modified_sync - modified_start; 437 latest_length = latest_sync - latest_start; 438 439 (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); 440 441 (*diff_ref)->original_start = original_start - 1; 442 (*diff_ref)->original_length = original_sync - original_start; 443 (*diff_ref)->modified_start = modified_start - 1; 444 (*diff_ref)->modified_length = modified_length; 445 (*diff_ref)->latest_start = latest_start - 1; 446 (*diff_ref)->latest_length = latest_length; 447 (*diff_ref)->resolved_diff = NULL; 448 449 if (is_modified && is_latest) 450 { 451 svn_diff__resolve_conflict(*diff_ref, 452 &position_list[1], 453 &position_list[2], 454 num_tokens, 455 pool); 456 } 457 else if (is_modified) 458 { 459 (*diff_ref)->type = svn_diff__type_diff_modified; 460 } 461 else 462 { 463 (*diff_ref)->type = svn_diff__type_diff_latest; 464 } 465 466 diff_ref = &(*diff_ref)->next; 467 } 468 469 /* Detect EOF */ 470 if (lcs_om->length == 0 || lcs_ol->length == 0) 471 break; 472 473 modified_length = lcs_om->length 474 - (original_sync - lcs_om->position[0]->offset); 475 latest_length = lcs_ol->length 476 - (original_sync - lcs_ol->position[0]->offset); 477 common_length = modified_length < latest_length 478 ? modified_length : latest_length; 479 480 (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); 481 482 (*diff_ref)->type = svn_diff__type_common; 483 (*diff_ref)->original_start = original_sync - 1; 484 (*diff_ref)->original_length = common_length; 485 (*diff_ref)->modified_start = modified_sync - 1; 486 (*diff_ref)->modified_length = common_length; 487 (*diff_ref)->latest_start = latest_sync - 1; 488 (*diff_ref)->latest_length = common_length; 489 (*diff_ref)->resolved_diff = NULL; 490 491 diff_ref = &(*diff_ref)->next; 492 493 /* Set the new offsets */ 494 original_start = original_sync + common_length; 495 modified_start = modified_sync + common_length; 496 latest_start = latest_sync + common_length; 497 498 /* Make it easier for diff_common/conflict detection 499 by recording last lcs start positions 500 */ 501 if (position_list[1]->offset < lcs_om->position[1]->offset) 502 position_list[1] = lcs_om->position[1]; 503 504 if (position_list[2]->offset < lcs_ol->position[1]->offset) 505 position_list[2] = lcs_ol->position[1]; 506 507 /* Make sure we are pointing to lcs entries beyond 508 * the range we just processed 509 */ 510 while (original_start >= lcs_om->position[0]->offset + lcs_om->length 511 && lcs_om->length > 0) 512 { 513 lcs_om = lcs_om->next; 514 } 515 516 while (original_start >= lcs_ol->position[0]->offset + lcs_ol->length 517 && lcs_ol->length > 0) 518 { 519 lcs_ol = lcs_ol->next; 520 } 521 } 522 523 *diff_ref = NULL; 524 } 525 526 svn_pool_destroy(subpool); 527 528 return SVN_NO_ERROR; 529} 530