util.c revision 289166
1/* 2 * util.c : routines for doing diffs 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25#include <apr.h> 26#include <apr_general.h> 27 28#include "svn_hash.h" 29#include "svn_pools.h" 30#include "svn_dirent_uri.h" 31#include "svn_props.h" 32#include "svn_mergeinfo.h" 33#include "svn_error.h" 34#include "svn_diff.h" 35#include "svn_types.h" 36#include "svn_ctype.h" 37#include "svn_sorts.h" 38#include "svn_utf.h" 39#include "svn_version.h" 40 41#include "private/svn_diff_private.h" 42#include "diff.h" 43 44#include "svn_private_config.h" 45 46 47svn_boolean_t 48svn_diff_contains_conflicts(svn_diff_t *diff) 49{ 50 while (diff != NULL) 51 { 52 if (diff->type == svn_diff__type_conflict) 53 { 54 return TRUE; 55 } 56 57 diff = diff->next; 58 } 59 60 return FALSE; 61} 62 63svn_boolean_t 64svn_diff_contains_diffs(svn_diff_t *diff) 65{ 66 while (diff != NULL) 67 { 68 if (diff->type != svn_diff__type_common) 69 { 70 return TRUE; 71 } 72 73 diff = diff->next; 74 } 75 76 return FALSE; 77} 78 79svn_error_t * 80svn_diff_output(svn_diff_t *diff, 81 void *output_baton, 82 const svn_diff_output_fns_t *vtable) 83{ 84 svn_error_t *(*output_fn)(void *, 85 apr_off_t, apr_off_t, 86 apr_off_t, apr_off_t, 87 apr_off_t, apr_off_t); 88 89 while (diff != NULL) 90 { 91 switch (diff->type) 92 { 93 case svn_diff__type_common: 94 output_fn = vtable->output_common; 95 break; 96 97 case svn_diff__type_diff_common: 98 output_fn = vtable->output_diff_common; 99 break; 100 101 case svn_diff__type_diff_modified: 102 output_fn = vtable->output_diff_modified; 103 break; 104 105 case svn_diff__type_diff_latest: 106 output_fn = vtable->output_diff_latest; 107 break; 108 109 case svn_diff__type_conflict: 110 output_fn = NULL; 111 if (vtable->output_conflict != NULL) 112 { 113 SVN_ERR(vtable->output_conflict(output_baton, 114 diff->original_start, diff->original_length, 115 diff->modified_start, diff->modified_length, 116 diff->latest_start, diff->latest_length, 117 diff->resolved_diff)); 118 } 119 break; 120 121 default: 122 output_fn = NULL; 123 break; 124 } 125 126 if (output_fn != NULL) 127 { 128 SVN_ERR(output_fn(output_baton, 129 diff->original_start, diff->original_length, 130 diff->modified_start, diff->modified_length, 131 diff->latest_start, diff->latest_length)); 132 } 133 134 diff = diff->next; 135 } 136 137 return SVN_NO_ERROR; 138} 139 140 141void 142svn_diff__normalize_buffer(char **tgt, 143 apr_off_t *lengthp, 144 svn_diff__normalize_state_t *statep, 145 const char *buf, 146 const svn_diff_file_options_t *opts) 147{ 148 /* Variables for looping through BUF */ 149 const char *curp, *endp; 150 151 /* Variable to record normalizing state */ 152 svn_diff__normalize_state_t state = *statep; 153 154 /* Variables to track what needs copying into the target buffer */ 155 const char *start = buf; 156 apr_size_t include_len = 0; 157 svn_boolean_t last_skipped = FALSE; /* makes sure we set 'start' */ 158 159 /* Variable to record the state of the target buffer */ 160 char *tgt_newend = *tgt; 161 162 /* If this is a noop, then just get out of here. */ 163 if (! opts->ignore_space && ! opts->ignore_eol_style) 164 { 165 *tgt = (char *)buf; 166 return; 167 } 168 169 170 /* It only took me forever to get this routine right, 171 so here my thoughts go: 172 173 Below, we loop through the data, doing 2 things: 174 175 - Normalizing 176 - Copying other data 177 178 The routine tries its hardest *not* to copy data, but instead 179 returning a pointer into already normalized existing data. 180 181 To this end, a block 'other data' shouldn't be copied when found, 182 but only as soon as it can't be returned in-place. 183 184 On a character level, there are 3 possible operations: 185 186 - Skip the character (don't include in the normalized data) 187 - Include the character (do include in the normalizad data) 188 - Include as another character 189 This is essentially the same as skipping the current character 190 and inserting a given character in the output data. 191 192 The macros below (SKIP, INCLUDE and INCLUDE_AS) are defined to 193 handle the character based operations. The macros themselves 194 collect character level data into blocks. 195 196 At all times designate the START, INCLUDED_LEN and CURP pointers 197 an included and and skipped block like this: 198 199 [ start, start + included_len ) [ start + included_len, curp ) 200 INCLUDED EXCLUDED 201 202 When the routine flips from skipping to including, the last 203 included block has to be flushed to the output buffer. 204 */ 205 206 /* Going from including to skipping; only schedules the current 207 included section for flushing. 208 Also, simply chop off the character if it's the first in the buffer, 209 so we can possibly just return the remainder of the buffer */ 210#define SKIP \ 211 do { \ 212 if (start == curp) \ 213 ++start; \ 214 last_skipped = TRUE; \ 215 } while (0) 216 217#define INCLUDE \ 218 do { \ 219 if (last_skipped) \ 220 COPY_INCLUDED_SECTION; \ 221 ++include_len; \ 222 last_skipped = FALSE; \ 223 } while (0) 224 225#define COPY_INCLUDED_SECTION \ 226 do { \ 227 if (include_len > 0) \ 228 { \ 229 memmove(tgt_newend, start, include_len); \ 230 tgt_newend += include_len; \ 231 include_len = 0; \ 232 } \ 233 start = curp; \ 234 } while (0) 235 236 /* Include the current character as character X. 237 If the current character already *is* X, add it to the 238 currently included region, increasing chances for consecutive 239 fully normalized blocks. */ 240#define INCLUDE_AS(x) \ 241 do { \ 242 if (*curp == (x)) \ 243 INCLUDE; \ 244 else \ 245 { \ 246 INSERT((x)); \ 247 SKIP; \ 248 } \ 249 } while (0) 250 251 /* Insert character X in the output buffer */ 252#define INSERT(x) \ 253 do { \ 254 COPY_INCLUDED_SECTION; \ 255 *tgt_newend++ = (x); \ 256 } while (0) 257 258 for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp) 259 { 260 switch (*curp) 261 { 262 case '\r': 263 if (opts->ignore_eol_style) 264 INCLUDE_AS('\n'); 265 else 266 INCLUDE; 267 state = svn_diff__normalize_state_cr; 268 break; 269 270 case '\n': 271 if (state == svn_diff__normalize_state_cr 272 && opts->ignore_eol_style) 273 SKIP; 274 else 275 INCLUDE; 276 state = svn_diff__normalize_state_normal; 277 break; 278 279 default: 280 if (svn_ctype_isspace(*curp) 281 && opts->ignore_space != svn_diff_file_ignore_space_none) 282 { 283 /* Whitespace but not '\r' or '\n' */ 284 if (state != svn_diff__normalize_state_whitespace 285 && opts->ignore_space 286 == svn_diff_file_ignore_space_change) 287 /*### If we can postpone insertion of the space 288 until the next non-whitespace character, 289 we have a potential of reducing the number of copies: 290 If this space is followed by more spaces, 291 this will cause a block-copy. 292 If the next non-space block is considered normalized 293 *and* preceded by a space, we can take advantage of that. */ 294 /* Note, the above optimization applies to 90% of the source 295 lines in our own code, since it (generally) doesn't use 296 more than one space per blank section, except for the 297 beginning of a line. */ 298 INCLUDE_AS(' '); 299 else 300 SKIP; 301 state = svn_diff__normalize_state_whitespace; 302 } 303 else 304 { 305 /* Non-whitespace character, or whitespace character in 306 svn_diff_file_ignore_space_none mode. */ 307 INCLUDE; 308 state = svn_diff__normalize_state_normal; 309 } 310 } 311 } 312 313 /* If we're not in whitespace, flush the last chunk of data. 314 * Note that this will work correctly when this is the last chunk of the 315 * file: 316 * * If there is an eol, it will either have been output when we entered 317 * the state_cr, or it will be output now. 318 * * If there is no eol and we're not in whitespace, then we just output 319 * everything below. 320 * * If there's no eol and we are in whitespace, we want to ignore 321 * whitespace unconditionally. */ 322 323 if (*tgt == tgt_newend) 324 { 325 /* we haven't copied any data in to *tgt and our chunk consists 326 only of one block of (already normalized) data. 327 Just return the block. */ 328 *tgt = (char *)start; 329 *lengthp = include_len; 330 } 331 else 332 { 333 COPY_INCLUDED_SECTION; 334 *lengthp = tgt_newend - *tgt; 335 } 336 337 *statep = state; 338 339#undef SKIP 340#undef INCLUDE 341#undef INCLUDE_AS 342#undef INSERT 343#undef COPY_INCLUDED_SECTION 344} 345 346svn_error_t * 347svn_diff__unified_append_no_newline_msg(svn_stringbuf_t *stringbuf, 348 const char *header_encoding, 349 apr_pool_t *scratch_pool) 350{ 351 const char *out_str; 352 353 SVN_ERR(svn_utf_cstring_from_utf8_ex2( 354 &out_str, 355 APR_EOL_STR 356 SVN_DIFF__NO_NEWLINE_AT_END_OF_FILE APR_EOL_STR, 357 header_encoding, scratch_pool)); 358 svn_stringbuf_appendcstr(stringbuf, out_str); 359 return SVN_NO_ERROR; 360} 361 362svn_error_t * 363svn_diff__unified_write_hunk_header(svn_stream_t *output_stream, 364 const char *header_encoding, 365 const char *hunk_delimiter, 366 apr_off_t old_start, 367 apr_off_t old_length, 368 apr_off_t new_start, 369 apr_off_t new_length, 370 const char *hunk_extra_context, 371 apr_pool_t *scratch_pool) 372{ 373 SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, 374 scratch_pool, 375 "%s -%" APR_OFF_T_FMT, 376 hunk_delimiter, old_start)); 377 /* If the hunk length is 1, suppress the number of lines in the hunk 378 * (it is 1 implicitly) */ 379 if (old_length != 1) 380 { 381 SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, 382 scratch_pool, 383 ",%" APR_OFF_T_FMT, old_length)); 384 } 385 386 SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, 387 scratch_pool, 388 " +%" APR_OFF_T_FMT, new_start)); 389 if (new_length != 1) 390 { 391 SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, 392 scratch_pool, 393 ",%" APR_OFF_T_FMT, new_length)); 394 } 395 396 if (hunk_extra_context == NULL) 397 hunk_extra_context = ""; 398 SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, 399 scratch_pool, 400 " %s%s%s" APR_EOL_STR, 401 hunk_delimiter, 402 hunk_extra_context[0] ? " " : "", 403 hunk_extra_context)); 404 return SVN_NO_ERROR; 405} 406 407svn_error_t * 408svn_diff__unidiff_write_header(svn_stream_t *output_stream, 409 const char *header_encoding, 410 const char *old_header, 411 const char *new_header, 412 apr_pool_t *scratch_pool) 413{ 414 SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, 415 scratch_pool, 416 "--- %s" APR_EOL_STR 417 "+++ %s" APR_EOL_STR, 418 old_header, 419 new_header)); 420 return SVN_NO_ERROR; 421} 422 423/* A helper function for display_prop_diffs. Output the differences between 424 the mergeinfo stored in ORIG_MERGEINFO_VAL and NEW_MERGEINFO_VAL in a 425 human-readable form to OUTSTREAM, using ENCODING. Use POOL for temporary 426 allocations. */ 427static svn_error_t * 428display_mergeinfo_diff(const char *old_mergeinfo_val, 429 const char *new_mergeinfo_val, 430 const char *encoding, 431 svn_stream_t *outstream, 432 apr_pool_t *pool) 433{ 434 apr_hash_t *old_mergeinfo_hash, *new_mergeinfo_hash, *added, *deleted; 435 apr_pool_t *iterpool = svn_pool_create(pool); 436 apr_hash_index_t *hi; 437 438 if (old_mergeinfo_val) 439 SVN_ERR(svn_mergeinfo_parse(&old_mergeinfo_hash, old_mergeinfo_val, pool)); 440 else 441 old_mergeinfo_hash = NULL; 442 443 if (new_mergeinfo_val) 444 SVN_ERR(svn_mergeinfo_parse(&new_mergeinfo_hash, new_mergeinfo_val, pool)); 445 else 446 new_mergeinfo_hash = NULL; 447 448 SVN_ERR(svn_mergeinfo_diff2(&deleted, &added, old_mergeinfo_hash, 449 new_mergeinfo_hash, 450 TRUE, pool, pool)); 451 452 for (hi = apr_hash_first(pool, deleted); 453 hi; hi = apr_hash_next(hi)) 454 { 455 const char *from_path = svn__apr_hash_index_key(hi); 456 svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi); 457 svn_string_t *merge_revstr; 458 459 svn_pool_clear(iterpool); 460 SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray, 461 iterpool)); 462 463 SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool, 464 _(" Reverse-merged %s:r%s%s"), 465 from_path, merge_revstr->data, 466 APR_EOL_STR)); 467 } 468 469 for (hi = apr_hash_first(pool, added); 470 hi; hi = apr_hash_next(hi)) 471 { 472 const char *from_path = svn__apr_hash_index_key(hi); 473 svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi); 474 svn_string_t *merge_revstr; 475 476 svn_pool_clear(iterpool); 477 SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray, 478 iterpool)); 479 480 SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool, 481 _(" Merged %s:r%s%s"), 482 from_path, merge_revstr->data, 483 APR_EOL_STR)); 484 } 485 486 svn_pool_destroy(iterpool); 487 return SVN_NO_ERROR; 488} 489 490/* qsort callback handling svn_prop_t by name */ 491static int 492propchange_sort(const void *k1, const void *k2) 493{ 494 const svn_prop_t *propchange1 = k1; 495 const svn_prop_t *propchange2 = k2; 496 497 return strcmp(propchange1->name, propchange2->name); 498} 499 500svn_error_t * 501svn_diff__display_prop_diffs(svn_stream_t *outstream, 502 const char *encoding, 503 const apr_array_header_t *propchanges, 504 apr_hash_t *original_props, 505 svn_boolean_t pretty_print_mergeinfo, 506 apr_pool_t *scratch_pool) 507{ 508 apr_pool_t *pool = scratch_pool; 509 apr_pool_t *iterpool = svn_pool_create(pool); 510 apr_array_header_t *changes = apr_array_copy(scratch_pool, propchanges); 511 int i; 512 513 qsort(changes->elts, changes->nelts, changes->elt_size, propchange_sort); 514 515 for (i = 0; i < changes->nelts; i++) 516 { 517 const char *action; 518 const svn_string_t *original_value; 519 const svn_prop_t *propchange 520 = &APR_ARRAY_IDX(changes, i, svn_prop_t); 521 522 if (original_props) 523 original_value = svn_hash_gets(original_props, propchange->name); 524 else 525 original_value = NULL; 526 527 /* If the property doesn't exist on either side, or if it exists 528 with the same value, skip it. This can happen if the client is 529 hitting an old mod_dav_svn server that doesn't understand the 530 "send-all" REPORT style. */ 531 if ((! (original_value || propchange->value)) 532 || (original_value && propchange->value 533 && svn_string_compare(original_value, propchange->value))) 534 continue; 535 536 svn_pool_clear(iterpool); 537 538 if (! original_value) 539 action = "Added"; 540 else if (! propchange->value) 541 action = "Deleted"; 542 else 543 action = "Modified"; 544 SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool, 545 "%s: %s%s", action, 546 propchange->name, APR_EOL_STR)); 547 548 if (pretty_print_mergeinfo 549 && strcmp(propchange->name, SVN_PROP_MERGEINFO) == 0) 550 { 551 const char *orig = original_value ? original_value->data : NULL; 552 const char *val = propchange->value ? propchange->value->data : NULL; 553 svn_error_t *err = display_mergeinfo_diff(orig, val, encoding, 554 outstream, iterpool); 555 556 /* Issue #3896: If we can't pretty-print mergeinfo differences 557 because invalid mergeinfo is present, then don't let the diff 558 fail, just print the diff as any other property. */ 559 if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR) 560 { 561 svn_error_clear(err); 562 } 563 else 564 { 565 SVN_ERR(err); 566 continue; 567 } 568 } 569 570 { 571 svn_diff_t *diff; 572 svn_diff_file_options_t options = { 0 }; 573 const svn_string_t *orig 574 = original_value ? original_value 575 : svn_string_create_empty(iterpool); 576 const svn_string_t *val 577 = propchange->value ? propchange->value 578 : svn_string_create_empty(iterpool); 579 580 SVN_ERR(svn_diff_mem_string_diff(&diff, orig, val, &options, 581 iterpool)); 582 583 /* UNIX patch will try to apply a diff even if the diff header 584 * is missing. It tries to be helpful by asking the user for a 585 * target filename when it can't determine the target filename 586 * from the diff header. But there usually are no files which 587 * UNIX patch could apply the property diff to, so we use "##" 588 * instead of "@@" as the default hunk delimiter for property diffs. 589 * We also supress the diff header. */ 590 SVN_ERR(svn_diff_mem_string_output_unified2( 591 outstream, diff, FALSE /* no header */, "##", NULL, NULL, 592 encoding, orig, val, iterpool)); 593 } 594 } 595 svn_pool_destroy(iterpool); 596 597 return SVN_NO_ERROR; 598} 599 600 601/* Return the library version number. */ 602const svn_version_t * 603svn_diff_version(void) 604{ 605 SVN_VERSION_BODY; 606} 607