1251881Speter/* 2251881Speter * parse-diff.c: functions for parsing diff files 3251881Speter * 4251881Speter * ==================================================================== 5251881Speter * Licensed to the Apache Software Foundation (ASF) under one 6251881Speter * or more contributor license agreements. See the NOTICE file 7251881Speter * distributed with this work for additional information 8251881Speter * regarding copyright ownership. The ASF licenses this file 9251881Speter * to you under the Apache License, Version 2.0 (the 10251881Speter * "License"); you may not use this file except in compliance 11251881Speter * with the License. You may obtain a copy of the License at 12251881Speter * 13251881Speter * http://www.apache.org/licenses/LICENSE-2.0 14251881Speter * 15251881Speter * Unless required by applicable law or agreed to in writing, 16251881Speter * software distributed under the License is distributed on an 17251881Speter * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18251881Speter * KIND, either express or implied. See the License for the 19251881Speter * specific language governing permissions and limitations 20251881Speter * under the License. 21251881Speter * ==================================================================== 22251881Speter */ 23251881Speter 24251881Speter#include <stdlib.h> 25251881Speter#include <stddef.h> 26251881Speter#include <string.h> 27251881Speter 28251881Speter#include "svn_hash.h" 29251881Speter#include "svn_types.h" 30251881Speter#include "svn_error.h" 31251881Speter#include "svn_io.h" 32251881Speter#include "svn_pools.h" 33251881Speter#include "svn_props.h" 34251881Speter#include "svn_string.h" 35251881Speter#include "svn_utf.h" 36251881Speter#include "svn_dirent_uri.h" 37251881Speter#include "svn_diff.h" 38299742Sdim#include "svn_ctype.h" 39299742Sdim#include "svn_mergeinfo.h" 40251881Speter 41251881Speter#include "private/svn_eol_private.h" 42251881Speter#include "private/svn_dep_compat.h" 43299742Sdim#include "private/svn_sorts_private.h" 44251881Speter 45251881Speter/* Helper macro for readability */ 46251881Speter#define starts_with(str, start) \ 47251881Speter (strncmp((str), (start), strlen(start)) == 0) 48251881Speter 49251881Speter/* Like strlen() but for string literals. */ 50251881Speter#define STRLEN_LITERAL(str) (sizeof(str) - 1) 51251881Speter 52251881Speter/* This struct describes a range within a file, as well as the 53251881Speter * current cursor position within the range. All numbers are in bytes. */ 54251881Speterstruct svn_diff__hunk_range { 55251881Speter apr_off_t start; 56251881Speter apr_off_t end; 57251881Speter apr_off_t current; 58251881Speter}; 59251881Speter 60251881Speterstruct svn_diff_hunk_t { 61251881Speter /* The patch this hunk belongs to. */ 62251881Speter svn_patch_t *patch; 63251881Speter 64251881Speter /* APR file handle to the patch file this hunk came from. */ 65251881Speter apr_file_t *apr_file; 66251881Speter 67251881Speter /* Ranges used to keep track of this hunk's texts positions within 68251881Speter * the patch file. */ 69251881Speter struct svn_diff__hunk_range diff_text_range; 70251881Speter struct svn_diff__hunk_range original_text_range; 71251881Speter struct svn_diff__hunk_range modified_text_range; 72251881Speter 73251881Speter /* Hunk ranges as they appeared in the patch file. 74251881Speter * All numbers are lines, not bytes. */ 75251881Speter svn_linenum_t original_start; 76251881Speter svn_linenum_t original_length; 77251881Speter svn_linenum_t modified_start; 78251881Speter svn_linenum_t modified_length; 79251881Speter 80251881Speter /* Number of lines of leading and trailing hunk context. */ 81251881Speter svn_linenum_t leading_context; 82251881Speter svn_linenum_t trailing_context; 83251881Speter}; 84251881Speter 85251881Spetervoid 86251881Spetersvn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk) 87251881Speter{ 88251881Speter hunk->diff_text_range.current = hunk->diff_text_range.start; 89251881Speter} 90251881Speter 91251881Spetervoid 92251881Spetersvn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk) 93251881Speter{ 94251881Speter if (hunk->patch->reverse) 95251881Speter hunk->modified_text_range.current = hunk->modified_text_range.start; 96251881Speter else 97251881Speter hunk->original_text_range.current = hunk->original_text_range.start; 98251881Speter} 99251881Speter 100251881Spetervoid 101251881Spetersvn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk) 102251881Speter{ 103251881Speter if (hunk->patch->reverse) 104251881Speter hunk->original_text_range.current = hunk->original_text_range.start; 105251881Speter else 106251881Speter hunk->modified_text_range.current = hunk->modified_text_range.start; 107251881Speter} 108251881Speter 109251881Spetersvn_linenum_t 110251881Spetersvn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk) 111251881Speter{ 112251881Speter return hunk->patch->reverse ? hunk->modified_start : hunk->original_start; 113251881Speter} 114251881Speter 115251881Spetersvn_linenum_t 116251881Spetersvn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk) 117251881Speter{ 118251881Speter return hunk->patch->reverse ? hunk->modified_length : hunk->original_length; 119251881Speter} 120251881Speter 121251881Spetersvn_linenum_t 122251881Spetersvn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk) 123251881Speter{ 124251881Speter return hunk->patch->reverse ? hunk->original_start : hunk->modified_start; 125251881Speter} 126251881Speter 127251881Spetersvn_linenum_t 128251881Spetersvn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk) 129251881Speter{ 130251881Speter return hunk->patch->reverse ? hunk->original_length : hunk->modified_length; 131251881Speter} 132251881Speter 133251881Spetersvn_linenum_t 134251881Spetersvn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk) 135251881Speter{ 136251881Speter return hunk->leading_context; 137251881Speter} 138251881Speter 139251881Spetersvn_linenum_t 140251881Spetersvn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk) 141251881Speter{ 142251881Speter return hunk->trailing_context; 143251881Speter} 144251881Speter 145251881Speter/* Try to parse a positive number from a decimal number encoded 146251881Speter * in the string NUMBER. Return parsed number in OFFSET, and return 147251881Speter * TRUE if parsing was successful. */ 148251881Speterstatic svn_boolean_t 149251881Speterparse_offset(svn_linenum_t *offset, const char *number) 150251881Speter{ 151251881Speter svn_error_t *err; 152251881Speter apr_uint64_t val; 153251881Speter 154251881Speter err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10); 155251881Speter if (err) 156251881Speter { 157251881Speter svn_error_clear(err); 158251881Speter return FALSE; 159251881Speter } 160251881Speter 161251881Speter *offset = (svn_linenum_t)val; 162251881Speter 163251881Speter return TRUE; 164251881Speter} 165251881Speter 166251881Speter/* Try to parse a hunk range specification from the string RANGE. 167251881Speter * Return parsed information in *START and *LENGTH, and return TRUE 168251881Speter * if the range parsed correctly. Note: This function may modify the 169251881Speter * input value RANGE. */ 170251881Speterstatic svn_boolean_t 171251881Speterparse_range(svn_linenum_t *start, svn_linenum_t *length, char *range) 172251881Speter{ 173251881Speter char *comma; 174251881Speter 175251881Speter if (*range == 0) 176251881Speter return FALSE; 177251881Speter 178251881Speter comma = strstr(range, ","); 179251881Speter if (comma) 180251881Speter { 181251881Speter if (strlen(comma + 1) > 0) 182251881Speter { 183251881Speter /* Try to parse the length. */ 184251881Speter if (! parse_offset(length, comma + 1)) 185251881Speter return FALSE; 186251881Speter 187251881Speter /* Snip off the end of the string, 188251881Speter * so we can comfortably parse the line 189251881Speter * number the hunk starts at. */ 190251881Speter *comma = '\0'; 191251881Speter } 192251881Speter else 193251881Speter /* A comma but no length? */ 194251881Speter return FALSE; 195251881Speter } 196251881Speter else 197251881Speter { 198251881Speter *length = 1; 199251881Speter } 200251881Speter 201251881Speter /* Try to parse the line number the hunk starts at. */ 202251881Speter return parse_offset(start, range); 203251881Speter} 204251881Speter 205251881Speter/* Try to parse a hunk header in string HEADER, putting parsed information 206251881Speter * into HUNK. Return TRUE if the header parsed correctly. ATAT is the 207251881Speter * character string used to delimit the hunk header. 208251881Speter * Do all allocations in POOL. */ 209251881Speterstatic svn_boolean_t 210251881Speterparse_hunk_header(const char *header, svn_diff_hunk_t *hunk, 211251881Speter const char *atat, apr_pool_t *pool) 212251881Speter{ 213251881Speter const char *p; 214251881Speter const char *start; 215251881Speter svn_stringbuf_t *range; 216251881Speter 217251881Speter p = header + strlen(atat); 218251881Speter if (*p != ' ') 219251881Speter /* No. */ 220251881Speter return FALSE; 221251881Speter p++; 222251881Speter if (*p != '-') 223251881Speter /* Nah... */ 224251881Speter return FALSE; 225251881Speter /* OK, this may be worth allocating some memory for... */ 226251881Speter range = svn_stringbuf_create_ensure(31, pool); 227251881Speter start = ++p; 228251881Speter while (*p && *p != ' ') 229251881Speter { 230251881Speter p++; 231251881Speter } 232251881Speter 233251881Speter if (*p != ' ') 234251881Speter /* No no no... */ 235251881Speter return FALSE; 236251881Speter 237251881Speter svn_stringbuf_appendbytes(range, start, p - start); 238251881Speter 239251881Speter /* Try to parse the first range. */ 240251881Speter if (! parse_range(&hunk->original_start, &hunk->original_length, range->data)) 241251881Speter return FALSE; 242251881Speter 243251881Speter /* Clear the stringbuf so we can reuse it for the second range. */ 244251881Speter svn_stringbuf_setempty(range); 245251881Speter p++; 246251881Speter if (*p != '+') 247251881Speter /* Eeek! */ 248251881Speter return FALSE; 249251881Speter /* OK, this may be worth copying... */ 250251881Speter start = ++p; 251251881Speter while (*p && *p != ' ') 252251881Speter { 253251881Speter p++; 254251881Speter } 255251881Speter if (*p != ' ') 256251881Speter /* No no no... */ 257251881Speter return FALSE; 258251881Speter 259251881Speter svn_stringbuf_appendbytes(range, start, p - start); 260251881Speter 261251881Speter /* Check for trailing @@ */ 262251881Speter p++; 263251881Speter if (! starts_with(p, atat)) 264251881Speter return FALSE; 265251881Speter 266251881Speter /* There may be stuff like C-function names after the trailing @@, 267251881Speter * but we ignore that. */ 268251881Speter 269251881Speter /* Try to parse the second range. */ 270251881Speter if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data)) 271251881Speter return FALSE; 272251881Speter 273251881Speter /* Hunk header is good. */ 274251881Speter return TRUE; 275251881Speter} 276251881Speter 277251881Speter/* Read a line of original or modified hunk text from the specified 278251881Speter * RANGE within FILE. FILE is expected to contain unidiff text. 279251881Speter * Leading unidiff symbols ('+', '-', and ' ') are removed from the line, 280251881Speter * Any lines commencing with the VERBOTEN character are discarded. 281251881Speter * VERBOTEN should be '+' or '-', depending on which form of hunk text 282251881Speter * is being read. 283251881Speter * 284251881Speter * All other parameters are as in svn_diff_hunk_readline_original_text() 285251881Speter * and svn_diff_hunk_readline_modified_text(). 286251881Speter */ 287251881Speterstatic svn_error_t * 288251881Speterhunk_readline_original_or_modified(apr_file_t *file, 289251881Speter struct svn_diff__hunk_range *range, 290251881Speter svn_stringbuf_t **stringbuf, 291251881Speter const char **eol, 292251881Speter svn_boolean_t *eof, 293251881Speter char verboten, 294251881Speter apr_pool_t *result_pool, 295251881Speter apr_pool_t *scratch_pool) 296251881Speter{ 297251881Speter apr_size_t max_len; 298251881Speter svn_boolean_t filtered; 299251881Speter apr_off_t pos; 300251881Speter svn_stringbuf_t *str; 301251881Speter 302251881Speter if (range->current >= range->end) 303251881Speter { 304251881Speter /* We're past the range. Indicate that no bytes can be read. */ 305251881Speter *eof = TRUE; 306251881Speter if (eol) 307251881Speter *eol = NULL; 308251881Speter *stringbuf = svn_stringbuf_create_empty(result_pool); 309251881Speter return SVN_NO_ERROR; 310251881Speter } 311251881Speter 312251881Speter pos = 0; 313251881Speter SVN_ERR(svn_io_file_seek(file, APR_CUR, &pos, scratch_pool)); 314251881Speter SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool)); 315251881Speter do 316251881Speter { 317251881Speter max_len = range->end - range->current; 318251881Speter SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len, 319251881Speter result_pool, scratch_pool)); 320251881Speter range->current = 0; 321251881Speter SVN_ERR(svn_io_file_seek(file, APR_CUR, &range->current, scratch_pool)); 322251881Speter filtered = (str->data[0] == verboten || str->data[0] == '\\'); 323251881Speter } 324251881Speter while (filtered && ! *eof); 325251881Speter 326251881Speter if (filtered) 327251881Speter { 328251881Speter /* EOF, return an empty string. */ 329251881Speter *stringbuf = svn_stringbuf_create_ensure(0, result_pool); 330251881Speter } 331251881Speter else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ') 332251881Speter { 333251881Speter /* Shave off leading unidiff symbols. */ 334251881Speter *stringbuf = svn_stringbuf_create(str->data + 1, result_pool); 335251881Speter } 336251881Speter else 337251881Speter { 338251881Speter /* Return the line as-is. */ 339251881Speter *stringbuf = svn_stringbuf_dup(str, result_pool); 340251881Speter } 341251881Speter 342251881Speter SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool)); 343251881Speter 344251881Speter return SVN_NO_ERROR; 345251881Speter} 346251881Speter 347251881Spetersvn_error_t * 348251881Spetersvn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk, 349251881Speter svn_stringbuf_t **stringbuf, 350251881Speter const char **eol, 351251881Speter svn_boolean_t *eof, 352251881Speter apr_pool_t *result_pool, 353251881Speter apr_pool_t *scratch_pool) 354251881Speter{ 355251881Speter return svn_error_trace( 356251881Speter hunk_readline_original_or_modified(hunk->apr_file, 357251881Speter hunk->patch->reverse ? 358251881Speter &hunk->modified_text_range : 359251881Speter &hunk->original_text_range, 360251881Speter stringbuf, eol, eof, 361251881Speter hunk->patch->reverse ? '-' : '+', 362251881Speter result_pool, scratch_pool)); 363251881Speter} 364251881Speter 365251881Spetersvn_error_t * 366251881Spetersvn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk, 367251881Speter svn_stringbuf_t **stringbuf, 368251881Speter const char **eol, 369251881Speter svn_boolean_t *eof, 370251881Speter apr_pool_t *result_pool, 371251881Speter apr_pool_t *scratch_pool) 372251881Speter{ 373251881Speter return svn_error_trace( 374251881Speter hunk_readline_original_or_modified(hunk->apr_file, 375251881Speter hunk->patch->reverse ? 376251881Speter &hunk->original_text_range : 377251881Speter &hunk->modified_text_range, 378251881Speter stringbuf, eol, eof, 379251881Speter hunk->patch->reverse ? '+' : '-', 380251881Speter result_pool, scratch_pool)); 381251881Speter} 382251881Speter 383251881Spetersvn_error_t * 384251881Spetersvn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk, 385251881Speter svn_stringbuf_t **stringbuf, 386251881Speter const char **eol, 387251881Speter svn_boolean_t *eof, 388251881Speter apr_pool_t *result_pool, 389251881Speter apr_pool_t *scratch_pool) 390251881Speter{ 391251881Speter svn_stringbuf_t *line; 392251881Speter apr_size_t max_len; 393251881Speter apr_off_t pos; 394251881Speter 395251881Speter if (hunk->diff_text_range.current >= hunk->diff_text_range.end) 396251881Speter { 397251881Speter /* We're past the range. Indicate that no bytes can be read. */ 398251881Speter *eof = TRUE; 399251881Speter if (eol) 400251881Speter *eol = NULL; 401251881Speter *stringbuf = svn_stringbuf_create_empty(result_pool); 402251881Speter return SVN_NO_ERROR; 403251881Speter } 404251881Speter 405251881Speter pos = 0; 406251881Speter SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, &pos, scratch_pool)); 407251881Speter SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, 408251881Speter &hunk->diff_text_range.current, scratch_pool)); 409251881Speter max_len = hunk->diff_text_range.end - hunk->diff_text_range.current; 410251881Speter SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len, 411251881Speter result_pool, 412251881Speter scratch_pool)); 413251881Speter hunk->diff_text_range.current = 0; 414251881Speter SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, 415251881Speter &hunk->diff_text_range.current, scratch_pool)); 416251881Speter SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool)); 417251881Speter 418251881Speter if (hunk->patch->reverse) 419251881Speter { 420299742Sdim if (line->data[0] == '+') 421299742Sdim line->data[0] = '-'; 422299742Sdim else if (line->data[0] == '-') 423299742Sdim line->data[0] = '+'; 424251881Speter } 425251881Speter 426251881Speter *stringbuf = line; 427251881Speter 428251881Speter return SVN_NO_ERROR; 429251881Speter} 430251881Speter 431251881Speter/* Parse *PROP_NAME from HEADER as the part after the INDICATOR line. 432251881Speter * Allocate *PROP_NAME in RESULT_POOL. 433251881Speter * Set *PROP_NAME to NULL if no valid property name was found. */ 434251881Speterstatic svn_error_t * 435251881Speterparse_prop_name(const char **prop_name, const char *header, 436251881Speter const char *indicator, apr_pool_t *result_pool) 437251881Speter{ 438251881Speter SVN_ERR(svn_utf_cstring_to_utf8(prop_name, 439251881Speter header + strlen(indicator), 440251881Speter result_pool)); 441251881Speter if (**prop_name == '\0') 442251881Speter *prop_name = NULL; 443251881Speter else if (! svn_prop_name_is_valid(*prop_name)) 444251881Speter { 445251881Speter svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool); 446251881Speter svn_stringbuf_strip_whitespace(buf); 447251881Speter *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL); 448251881Speter } 449251881Speter 450251881Speter return SVN_NO_ERROR; 451251881Speter} 452251881Speter 453299742Sdim 454299742Sdim/* A helper function to parse svn:mergeinfo diffs. 455299742Sdim * 456299742Sdim * These diffs use a special pretty-print format, for instance: 457299742Sdim * 458299742Sdim * Added: svn:mergeinfo 459299742Sdim * ## -0,0 +0,1 ## 460299742Sdim * Merged /trunk:r2-3 461299742Sdim * 462299742Sdim * The hunk header has the following format: 463299742Sdim * ## -0,NUMBER_OF_REVERSE_MERGES +0,NUMBER_OF_FORWARD_MERGES ## 464299742Sdim * 465299742Sdim * At this point, the number of reverse merges has already been 466299742Sdim * parsed into HUNK->ORIGINAL_LENGTH, and the number of forward 467299742Sdim * merges has been parsed into HUNK->MODIFIED_LENGTH. 468299742Sdim * 469299742Sdim * The header is followed by a list of mergeinfo, one path per line. 470299742Sdim * This function parses such lines. Lines describing reverse merges 471299742Sdim * appear first, and then all lines describing forward merges appear. 472299742Sdim * 473299742Sdim * Parts of the line are affected by i18n. The words 'Merged' 474299742Sdim * and 'Reverse-merged' can appear in any language and at any 475299742Sdim * position within the line. We can only assume that a leading 476299742Sdim * '/' starts the merge source path, the path is followed by 477299742Sdim * ":r", which in turn is followed by a mergeinfo revision range, 478299742Sdim * which is terminated by whitespace or end-of-string. 479299742Sdim * 480299742Sdim * If the current line meets the above criteria and we're able 481299742Sdim * to parse valid mergeinfo from it, the resulting mergeinfo 482299742Sdim * is added to patch->mergeinfo or patch->reverse_mergeinfo, 483299742Sdim * and we proceed to the next line. 484299742Sdim */ 485299742Sdimstatic svn_error_t * 486299742Sdimparse_mergeinfo(svn_boolean_t *found_mergeinfo, 487299742Sdim svn_stringbuf_t *line, 488299742Sdim svn_diff_hunk_t *hunk, 489299742Sdim svn_patch_t *patch, 490299742Sdim apr_pool_t *result_pool, 491299742Sdim apr_pool_t *scratch_pool) 492299742Sdim{ 493299742Sdim char *slash = strchr(line->data, '/'); 494299742Sdim char *colon = strrchr(line->data, ':'); 495299742Sdim 496299742Sdim *found_mergeinfo = FALSE; 497299742Sdim 498299742Sdim if (slash && colon && colon[1] == 'r' && slash < colon) 499299742Sdim { 500299742Sdim svn_stringbuf_t *input; 501299742Sdim svn_mergeinfo_t mergeinfo = NULL; 502299742Sdim char *s; 503299742Sdim svn_error_t *err; 504299742Sdim 505299742Sdim input = svn_stringbuf_create_ensure(line->len, scratch_pool); 506299742Sdim 507299742Sdim /* Copy the merge source path + colon */ 508299742Sdim s = slash; 509299742Sdim while (s <= colon) 510299742Sdim { 511299742Sdim svn_stringbuf_appendbyte(input, *s); 512299742Sdim s++; 513299742Sdim } 514299742Sdim 515299742Sdim /* skip 'r' after colon */ 516299742Sdim s++; 517299742Sdim 518299742Sdim /* Copy the revision range. */ 519299742Sdim while (s < line->data + line->len) 520299742Sdim { 521299742Sdim if (svn_ctype_isspace(*s)) 522299742Sdim break; 523299742Sdim svn_stringbuf_appendbyte(input, *s); 524299742Sdim s++; 525299742Sdim } 526299742Sdim 527299742Sdim err = svn_mergeinfo_parse(&mergeinfo, input->data, result_pool); 528299742Sdim if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR) 529299742Sdim { 530299742Sdim svn_error_clear(err); 531299742Sdim mergeinfo = NULL; 532299742Sdim } 533299742Sdim else 534299742Sdim SVN_ERR(err); 535299742Sdim 536299742Sdim if (mergeinfo) 537299742Sdim { 538299742Sdim if (hunk->original_length > 0) /* reverse merges */ 539299742Sdim { 540299742Sdim if (patch->reverse) 541299742Sdim { 542299742Sdim if (patch->mergeinfo == NULL) 543299742Sdim patch->mergeinfo = mergeinfo; 544299742Sdim else 545299742Sdim SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo, 546299742Sdim mergeinfo, 547299742Sdim result_pool, 548299742Sdim scratch_pool)); 549299742Sdim } 550299742Sdim else 551299742Sdim { 552299742Sdim if (patch->reverse_mergeinfo == NULL) 553299742Sdim patch->reverse_mergeinfo = mergeinfo; 554299742Sdim else 555299742Sdim SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo, 556299742Sdim mergeinfo, 557299742Sdim result_pool, 558299742Sdim scratch_pool)); 559299742Sdim } 560299742Sdim hunk->original_length--; 561299742Sdim } 562299742Sdim else if (hunk->modified_length > 0) /* forward merges */ 563299742Sdim { 564299742Sdim if (patch->reverse) 565299742Sdim { 566299742Sdim if (patch->reverse_mergeinfo == NULL) 567299742Sdim patch->reverse_mergeinfo = mergeinfo; 568299742Sdim else 569299742Sdim SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo, 570299742Sdim mergeinfo, 571299742Sdim result_pool, 572299742Sdim scratch_pool)); 573299742Sdim } 574299742Sdim else 575299742Sdim { 576299742Sdim if (patch->mergeinfo == NULL) 577299742Sdim patch->mergeinfo = mergeinfo; 578299742Sdim else 579299742Sdim SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo, 580299742Sdim mergeinfo, 581299742Sdim result_pool, 582299742Sdim scratch_pool)); 583299742Sdim } 584299742Sdim hunk->modified_length--; 585299742Sdim } 586299742Sdim 587299742Sdim *found_mergeinfo = TRUE; 588299742Sdim } 589299742Sdim } 590299742Sdim 591299742Sdim return SVN_NO_ERROR; 592299742Sdim} 593299742Sdim 594251881Speter/* Return the next *HUNK from a PATCH in APR_FILE. 595251881Speter * If no hunk can be found, set *HUNK to NULL. 596251881Speter * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK 597251881Speter * is the first belonging to a certain property, then PROP_NAME and 598251881Speter * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be 599251881Speter * NULL. If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be 600251881Speter * treated as context lines. Allocate results in RESULT_POOL. 601251881Speter * Use SCRATCH_POOL for all other allocations. */ 602251881Speterstatic svn_error_t * 603251881Speterparse_next_hunk(svn_diff_hunk_t **hunk, 604251881Speter svn_boolean_t *is_property, 605251881Speter const char **prop_name, 606251881Speter svn_diff_operation_kind_t *prop_operation, 607251881Speter svn_patch_t *patch, 608251881Speter apr_file_t *apr_file, 609251881Speter svn_boolean_t ignore_whitespace, 610251881Speter apr_pool_t *result_pool, 611251881Speter apr_pool_t *scratch_pool) 612251881Speter{ 613251881Speter static const char * const minus = "--- "; 614251881Speter static const char * const text_atat = "@@"; 615251881Speter static const char * const prop_atat = "##"; 616251881Speter svn_stringbuf_t *line; 617251881Speter svn_boolean_t eof, in_hunk, hunk_seen; 618251881Speter apr_off_t pos, last_line; 619251881Speter apr_off_t start, end; 620251881Speter apr_off_t original_end; 621251881Speter apr_off_t modified_end; 622251881Speter svn_linenum_t original_lines; 623251881Speter svn_linenum_t modified_lines; 624251881Speter svn_linenum_t leading_context; 625251881Speter svn_linenum_t trailing_context; 626251881Speter svn_boolean_t changed_line_seen; 627251881Speter enum { 628251881Speter noise_line, 629251881Speter original_line, 630251881Speter modified_line, 631251881Speter context_line 632251881Speter } last_line_type; 633251881Speter apr_pool_t *iterpool; 634251881Speter 635251881Speter *prop_operation = svn_diff_op_unchanged; 636251881Speter 637251881Speter /* We only set this if we have a property hunk header. */ 638251881Speter *prop_name = NULL; 639251881Speter *is_property = FALSE; 640251881Speter 641251881Speter if (apr_file_eof(apr_file) == APR_EOF) 642251881Speter { 643251881Speter /* No more hunks here. */ 644251881Speter *hunk = NULL; 645251881Speter return SVN_NO_ERROR; 646251881Speter } 647251881Speter 648251881Speter in_hunk = FALSE; 649251881Speter hunk_seen = FALSE; 650251881Speter leading_context = 0; 651251881Speter trailing_context = 0; 652251881Speter changed_line_seen = FALSE; 653251881Speter original_end = 0; 654251881Speter modified_end = 0; 655251881Speter *hunk = apr_pcalloc(result_pool, sizeof(**hunk)); 656251881Speter 657251881Speter /* Get current seek position -- APR has no ftell() :( */ 658251881Speter pos = 0; 659251881Speter SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, scratch_pool)); 660251881Speter 661251881Speter /* Start out assuming noise. */ 662251881Speter last_line_type = noise_line; 663251881Speter 664251881Speter iterpool = svn_pool_create(scratch_pool); 665251881Speter do 666251881Speter { 667251881Speter 668251881Speter svn_pool_clear(iterpool); 669251881Speter 670251881Speter /* Remember the current line's offset, and read the line. */ 671251881Speter last_line = pos; 672251881Speter SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX, 673251881Speter iterpool, iterpool)); 674251881Speter 675251881Speter /* Update line offset for next iteration. */ 676251881Speter pos = 0; 677251881Speter SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, iterpool)); 678251881Speter 679251881Speter /* Lines starting with a backslash indicate a missing EOL: 680251881Speter * "\ No newline at end of file" or "end of property". */ 681251881Speter if (line->data[0] == '\\') 682251881Speter { 683251881Speter if (in_hunk) 684251881Speter { 685251881Speter char eolbuf[2]; 686251881Speter apr_size_t len; 687251881Speter apr_off_t off; 688251881Speter apr_off_t hunk_text_end; 689251881Speter 690251881Speter /* Comment terminates the hunk text and says the hunk text 691251881Speter * has no trailing EOL. Snip off trailing EOL which is part 692251881Speter * of the patch file but not part of the hunk text. */ 693251881Speter off = last_line - 2; 694251881Speter SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool)); 695251881Speter len = sizeof(eolbuf); 696251881Speter SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len, 697251881Speter &eof, iterpool)); 698251881Speter if (eolbuf[0] == '\r' && eolbuf[1] == '\n') 699251881Speter hunk_text_end = last_line - 2; 700251881Speter else if (eolbuf[1] == '\n' || eolbuf[1] == '\r') 701251881Speter hunk_text_end = last_line - 1; 702251881Speter else 703251881Speter hunk_text_end = last_line; 704251881Speter 705251881Speter if (last_line_type == original_line && original_end == 0) 706251881Speter original_end = hunk_text_end; 707251881Speter else if (last_line_type == modified_line && modified_end == 0) 708251881Speter modified_end = hunk_text_end; 709251881Speter else if (last_line_type == context_line) 710251881Speter { 711251881Speter if (original_end == 0) 712251881Speter original_end = hunk_text_end; 713251881Speter if (modified_end == 0) 714251881Speter modified_end = hunk_text_end; 715251881Speter } 716251881Speter 717251881Speter SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool)); 718251881Speter } 719251881Speter 720251881Speter continue; 721251881Speter } 722251881Speter 723299742Sdim if (in_hunk && *is_property && *prop_name && 724299742Sdim strcmp(*prop_name, SVN_PROP_MERGEINFO) == 0) 725299742Sdim { 726299742Sdim svn_boolean_t found_mergeinfo; 727299742Sdim 728299742Sdim SVN_ERR(parse_mergeinfo(&found_mergeinfo, line, *hunk, patch, 729299742Sdim result_pool, iterpool)); 730299742Sdim if (found_mergeinfo) 731299742Sdim continue; /* Proceed to the next line in the patch. */ 732299742Sdim } 733299742Sdim 734251881Speter if (in_hunk) 735251881Speter { 736251881Speter char c; 737251881Speter static const char add = '+'; 738251881Speter static const char del = '-'; 739251881Speter 740251881Speter if (! hunk_seen) 741251881Speter { 742251881Speter /* We're reading the first line of the hunk, so the start 743251881Speter * of the line just read is the hunk text's byte offset. */ 744251881Speter start = last_line; 745251881Speter } 746251881Speter 747251881Speter c = line->data[0]; 748251881Speter if (original_lines > 0 && modified_lines > 0 && 749251881Speter ((c == ' ') 750251881Speter /* Tolerate chopped leading spaces on empty lines. */ 751251881Speter || (! eof && line->len == 0) 752251881Speter /* Maybe tolerate chopped leading spaces on non-empty lines. */ 753251881Speter || (ignore_whitespace && c != del && c != add))) 754251881Speter { 755251881Speter /* It's a "context" line in the hunk. */ 756251881Speter hunk_seen = TRUE; 757251881Speter original_lines--; 758251881Speter modified_lines--; 759251881Speter if (changed_line_seen) 760251881Speter trailing_context++; 761251881Speter else 762251881Speter leading_context++; 763251881Speter last_line_type = context_line; 764251881Speter } 765251881Speter else if (original_lines > 0 && c == del) 766251881Speter { 767251881Speter /* It's a "deleted" line in the hunk. */ 768251881Speter hunk_seen = TRUE; 769251881Speter changed_line_seen = TRUE; 770251881Speter 771251881Speter /* A hunk may have context in the middle. We only want 772251881Speter trailing lines of context. */ 773251881Speter if (trailing_context > 0) 774251881Speter trailing_context = 0; 775251881Speter 776251881Speter original_lines--; 777251881Speter last_line_type = original_line; 778251881Speter } 779251881Speter else if (modified_lines > 0 && c == add) 780251881Speter { 781251881Speter /* It's an "added" line in the hunk. */ 782251881Speter hunk_seen = TRUE; 783251881Speter changed_line_seen = TRUE; 784251881Speter 785251881Speter /* A hunk may have context in the middle. We only want 786251881Speter trailing lines of context. */ 787251881Speter if (trailing_context > 0) 788251881Speter trailing_context = 0; 789251881Speter 790251881Speter modified_lines--; 791251881Speter last_line_type = modified_line; 792251881Speter } 793251881Speter else 794251881Speter { 795251881Speter if (eof) 796251881Speter { 797251881Speter /* The hunk ends at EOF. */ 798251881Speter end = pos; 799251881Speter } 800251881Speter else 801251881Speter { 802251881Speter /* The start of the current line marks the first byte 803251881Speter * after the hunk text. */ 804251881Speter end = last_line; 805251881Speter } 806251881Speter 807251881Speter if (original_end == 0) 808251881Speter original_end = end; 809251881Speter if (modified_end == 0) 810251881Speter modified_end = end; 811251881Speter break; /* Hunk was empty or has been read. */ 812251881Speter } 813251881Speter } 814251881Speter else 815251881Speter { 816251881Speter if (starts_with(line->data, text_atat)) 817251881Speter { 818251881Speter /* Looks like we have a hunk header, try to rip it apart. */ 819251881Speter in_hunk = parse_hunk_header(line->data, *hunk, text_atat, 820251881Speter iterpool); 821251881Speter if (in_hunk) 822251881Speter { 823251881Speter original_lines = (*hunk)->original_length; 824251881Speter modified_lines = (*hunk)->modified_length; 825251881Speter *is_property = FALSE; 826251881Speter } 827251881Speter } 828251881Speter else if (starts_with(line->data, prop_atat)) 829251881Speter { 830251881Speter /* Looks like we have a property hunk header, try to rip it 831251881Speter * apart. */ 832251881Speter in_hunk = parse_hunk_header(line->data, *hunk, prop_atat, 833251881Speter iterpool); 834251881Speter if (in_hunk) 835251881Speter { 836251881Speter original_lines = (*hunk)->original_length; 837251881Speter modified_lines = (*hunk)->modified_length; 838251881Speter *is_property = TRUE; 839251881Speter } 840251881Speter } 841251881Speter else if (starts_with(line->data, "Added: ")) 842251881Speter { 843251881Speter SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ", 844251881Speter result_pool)); 845251881Speter if (*prop_name) 846251881Speter *prop_operation = svn_diff_op_added; 847251881Speter } 848251881Speter else if (starts_with(line->data, "Deleted: ")) 849251881Speter { 850251881Speter SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ", 851251881Speter result_pool)); 852251881Speter if (*prop_name) 853251881Speter *prop_operation = svn_diff_op_deleted; 854251881Speter } 855251881Speter else if (starts_with(line->data, "Modified: ")) 856251881Speter { 857251881Speter SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ", 858251881Speter result_pool)); 859251881Speter if (*prop_name) 860251881Speter *prop_operation = svn_diff_op_modified; 861251881Speter } 862251881Speter else if (starts_with(line->data, minus) 863251881Speter || starts_with(line->data, "diff --git ")) 864251881Speter /* This could be a header of another patch. Bail out. */ 865251881Speter break; 866251881Speter } 867251881Speter } 868251881Speter /* Check for the line length since a file may not have a newline at the 869251881Speter * end and we depend upon the last line to be an empty one. */ 870251881Speter while (! eof || line->len > 0); 871251881Speter svn_pool_destroy(iterpool); 872251881Speter 873251881Speter if (! eof) 874251881Speter /* Rewind to the start of the line just read, so subsequent calls 875251881Speter * to this function or svn_diff_parse_next_patch() don't end 876251881Speter * up skipping the line -- it may contain a patch or hunk header. */ 877251881Speter SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool)); 878251881Speter 879251881Speter if (hunk_seen && start < end) 880251881Speter { 881251881Speter (*hunk)->patch = patch; 882251881Speter (*hunk)->apr_file = apr_file; 883251881Speter (*hunk)->leading_context = leading_context; 884251881Speter (*hunk)->trailing_context = trailing_context; 885251881Speter (*hunk)->diff_text_range.start = start; 886251881Speter (*hunk)->diff_text_range.current = start; 887251881Speter (*hunk)->diff_text_range.end = end; 888251881Speter (*hunk)->original_text_range.start = start; 889251881Speter (*hunk)->original_text_range.current = start; 890251881Speter (*hunk)->original_text_range.end = original_end; 891251881Speter (*hunk)->modified_text_range.start = start; 892251881Speter (*hunk)->modified_text_range.current = start; 893251881Speter (*hunk)->modified_text_range.end = modified_end; 894251881Speter } 895251881Speter else 896251881Speter /* Something went wrong, just discard the result. */ 897251881Speter *hunk = NULL; 898251881Speter 899251881Speter return SVN_NO_ERROR; 900251881Speter} 901251881Speter 902251881Speter/* Compare function for sorting hunks after parsing. 903251881Speter * We sort hunks by their original line offset. */ 904251881Speterstatic int 905251881Spetercompare_hunks(const void *a, const void *b) 906251881Speter{ 907251881Speter const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a); 908251881Speter const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b); 909251881Speter 910251881Speter if (ha->original_start < hb->original_start) 911251881Speter return -1; 912251881Speter if (ha->original_start > hb->original_start) 913251881Speter return 1; 914251881Speter return 0; 915251881Speter} 916251881Speter 917251881Speter/* Possible states of the diff header parser. */ 918251881Speterenum parse_state 919251881Speter{ 920251881Speter state_start, /* initial */ 921251881Speter state_git_diff_seen, /* diff --git */ 922251881Speter state_git_tree_seen, /* a tree operation, rather then content change */ 923251881Speter state_git_minus_seen, /* --- /dev/null; or --- a/ */ 924251881Speter state_git_plus_seen, /* +++ /dev/null; or +++ a/ */ 925251881Speter state_move_from_seen, /* rename from foo.c */ 926251881Speter state_copy_from_seen, /* copy from foo.c */ 927251881Speter state_minus_seen, /* --- foo.c */ 928251881Speter state_unidiff_found, /* valid start of a regular unidiff header */ 929251881Speter state_git_header_found /* valid start of a --git diff header */ 930251881Speter}; 931251881Speter 932251881Speter/* Data type describing a valid state transition of the parser. */ 933251881Speterstruct transition 934251881Speter{ 935251881Speter const char *expected_input; 936251881Speter enum parse_state required_state; 937251881Speter 938251881Speter /* A callback called upon each parser state transition. */ 939251881Speter svn_error_t *(*fn)(enum parse_state *new_state, char *input, 940251881Speter svn_patch_t *patch, apr_pool_t *result_pool, 941251881Speter apr_pool_t *scratch_pool); 942251881Speter}; 943251881Speter 944251881Speter/* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */ 945251881Speterstatic svn_error_t * 946251881Spetergrab_filename(const char **file_name, const char *line, apr_pool_t *result_pool, 947251881Speter apr_pool_t *scratch_pool) 948251881Speter{ 949251881Speter const char *utf8_path; 950251881Speter const char *canon_path; 951251881Speter 952251881Speter /* Grab the filename and encode it in UTF-8. */ 953251881Speter /* TODO: Allow specifying the patch file's encoding. 954251881Speter * For now, we assume its encoding is native. */ 955251881Speter /* ### This can fail if the filename cannot be represented in the current 956251881Speter * ### locale's encoding. */ 957251881Speter SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path, 958251881Speter line, 959251881Speter scratch_pool)); 960251881Speter 961251881Speter /* Canonicalize the path name. */ 962251881Speter canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool); 963251881Speter 964251881Speter *file_name = apr_pstrdup(result_pool, canon_path); 965251881Speter 966251881Speter return SVN_NO_ERROR; 967251881Speter} 968251881Speter 969251881Speter/* Parse the '--- ' line of a regular unidiff. */ 970251881Speterstatic svn_error_t * 971251881Speterdiff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch, 972251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 973251881Speter{ 974251881Speter /* If we can find a tab, it separates the filename from 975251881Speter * the rest of the line which we can discard. */ 976251881Speter char *tab = strchr(line, '\t'); 977251881Speter if (tab) 978251881Speter *tab = '\0'; 979251881Speter 980251881Speter SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "), 981251881Speter result_pool, scratch_pool)); 982251881Speter 983251881Speter *new_state = state_minus_seen; 984251881Speter 985251881Speter return SVN_NO_ERROR; 986251881Speter} 987251881Speter 988251881Speter/* Parse the '+++ ' line of a regular unidiff. */ 989251881Speterstatic svn_error_t * 990251881Speterdiff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch, 991251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 992251881Speter{ 993251881Speter /* If we can find a tab, it separates the filename from 994251881Speter * the rest of the line which we can discard. */ 995251881Speter char *tab = strchr(line, '\t'); 996251881Speter if (tab) 997251881Speter *tab = '\0'; 998251881Speter 999251881Speter SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "), 1000251881Speter result_pool, scratch_pool)); 1001251881Speter 1002251881Speter *new_state = state_unidiff_found; 1003251881Speter 1004251881Speter return SVN_NO_ERROR; 1005251881Speter} 1006251881Speter 1007251881Speter/* Parse the first line of a git extended unidiff. */ 1008251881Speterstatic svn_error_t * 1009251881Spetergit_start(enum parse_state *new_state, char *line, svn_patch_t *patch, 1010251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1011251881Speter{ 1012251881Speter const char *old_path_start; 1013251881Speter char *old_path_end; 1014251881Speter const char *new_path_start; 1015251881Speter const char *new_path_end; 1016251881Speter char *new_path_marker; 1017251881Speter const char *old_path_marker; 1018251881Speter 1019251881Speter /* ### Add handling of escaped paths 1020251881Speter * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html: 1021251881Speter * 1022251881Speter * TAB, LF, double quote and backslash characters in pathnames are 1023251881Speter * represented as \t, \n, \" and \\, respectively. If there is need for 1024251881Speter * such substitution then the whole pathname is put in double quotes. 1025251881Speter */ 1026251881Speter 1027251881Speter /* Our line should look like this: 'diff --git a/path b/path'. 1028251881Speter * 1029251881Speter * If we find any deviations from that format, we return with state reset 1030251881Speter * to start. 1031251881Speter */ 1032251881Speter old_path_marker = strstr(line, " a/"); 1033251881Speter 1034251881Speter if (! old_path_marker) 1035251881Speter { 1036251881Speter *new_state = state_start; 1037251881Speter return SVN_NO_ERROR; 1038251881Speter } 1039251881Speter 1040251881Speter if (! *(old_path_marker + 3)) 1041251881Speter { 1042251881Speter *new_state = state_start; 1043251881Speter return SVN_NO_ERROR; 1044251881Speter } 1045251881Speter 1046251881Speter new_path_marker = strstr(old_path_marker, " b/"); 1047251881Speter 1048251881Speter if (! new_path_marker) 1049251881Speter { 1050251881Speter *new_state = state_start; 1051251881Speter return SVN_NO_ERROR; 1052251881Speter } 1053251881Speter 1054251881Speter if (! *(new_path_marker + 3)) 1055251881Speter { 1056251881Speter *new_state = state_start; 1057251881Speter return SVN_NO_ERROR; 1058251881Speter } 1059251881Speter 1060251881Speter /* By now, we know that we have a line on the form '--git diff a/.+ b/.+' 1061251881Speter * We only need the filenames when we have deleted or added empty 1062251881Speter * files. In those cases the old_path and new_path is identical on the 1063251881Speter * 'diff --git' line. For all other cases we fetch the filenames from 1064251881Speter * other header lines. */ 1065251881Speter old_path_start = line + STRLEN_LITERAL("diff --git a/"); 1066251881Speter new_path_end = line + strlen(line); 1067251881Speter new_path_start = old_path_start; 1068251881Speter 1069251881Speter while (TRUE) 1070251881Speter { 1071251881Speter ptrdiff_t len_old; 1072251881Speter ptrdiff_t len_new; 1073251881Speter 1074251881Speter new_path_marker = strstr(new_path_start, " b/"); 1075251881Speter 1076251881Speter /* No new path marker, bail out. */ 1077251881Speter if (! new_path_marker) 1078251881Speter break; 1079251881Speter 1080251881Speter old_path_end = new_path_marker; 1081251881Speter new_path_start = new_path_marker + STRLEN_LITERAL(" b/"); 1082251881Speter 1083251881Speter /* No path after the marker. */ 1084251881Speter if (! *new_path_start) 1085251881Speter break; 1086251881Speter 1087251881Speter len_old = old_path_end - old_path_start; 1088251881Speter len_new = new_path_end - new_path_start; 1089251881Speter 1090251881Speter /* Are the paths before and after the " b/" marker the same? */ 1091251881Speter if (len_old == len_new 1092251881Speter && ! strncmp(old_path_start, new_path_start, len_old)) 1093251881Speter { 1094251881Speter *old_path_end = '\0'; 1095251881Speter SVN_ERR(grab_filename(&patch->old_filename, old_path_start, 1096251881Speter result_pool, scratch_pool)); 1097251881Speter 1098251881Speter SVN_ERR(grab_filename(&patch->new_filename, new_path_start, 1099251881Speter result_pool, scratch_pool)); 1100251881Speter break; 1101251881Speter } 1102251881Speter } 1103251881Speter 1104251881Speter /* We assume that the path is only modified until we've found a 'tree' 1105251881Speter * header */ 1106251881Speter patch->operation = svn_diff_op_modified; 1107251881Speter 1108251881Speter *new_state = state_git_diff_seen; 1109251881Speter return SVN_NO_ERROR; 1110251881Speter} 1111251881Speter 1112251881Speter/* Parse the '--- ' line of a git extended unidiff. */ 1113251881Speterstatic svn_error_t * 1114251881Spetergit_minus(enum parse_state *new_state, char *line, svn_patch_t *patch, 1115251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1116251881Speter{ 1117251881Speter /* If we can find a tab, it separates the filename from 1118251881Speter * the rest of the line which we can discard. */ 1119251881Speter char *tab = strchr(line, '\t'); 1120251881Speter if (tab) 1121251881Speter *tab = '\0'; 1122251881Speter 1123251881Speter if (starts_with(line, "--- /dev/null")) 1124251881Speter SVN_ERR(grab_filename(&patch->old_filename, "/dev/null", 1125251881Speter result_pool, scratch_pool)); 1126251881Speter else 1127251881Speter SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"), 1128251881Speter result_pool, scratch_pool)); 1129251881Speter 1130251881Speter *new_state = state_git_minus_seen; 1131251881Speter return SVN_NO_ERROR; 1132251881Speter} 1133251881Speter 1134251881Speter/* Parse the '+++ ' line of a git extended unidiff. */ 1135251881Speterstatic svn_error_t * 1136251881Spetergit_plus(enum parse_state *new_state, char *line, svn_patch_t *patch, 1137251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1138251881Speter{ 1139251881Speter /* If we can find a tab, it separates the filename from 1140251881Speter * the rest of the line which we can discard. */ 1141251881Speter char *tab = strchr(line, '\t'); 1142251881Speter if (tab) 1143251881Speter *tab = '\0'; 1144251881Speter 1145251881Speter if (starts_with(line, "+++ /dev/null")) 1146251881Speter SVN_ERR(grab_filename(&patch->new_filename, "/dev/null", 1147251881Speter result_pool, scratch_pool)); 1148251881Speter else 1149251881Speter SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"), 1150251881Speter result_pool, scratch_pool)); 1151251881Speter 1152251881Speter *new_state = state_git_header_found; 1153251881Speter return SVN_NO_ERROR; 1154251881Speter} 1155251881Speter 1156251881Speter/* Parse the 'rename from ' line of a git extended unidiff. */ 1157251881Speterstatic svn_error_t * 1158251881Spetergit_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch, 1159251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1160251881Speter{ 1161251881Speter SVN_ERR(grab_filename(&patch->old_filename, 1162251881Speter line + STRLEN_LITERAL("rename from "), 1163251881Speter result_pool, scratch_pool)); 1164251881Speter 1165251881Speter *new_state = state_move_from_seen; 1166251881Speter return SVN_NO_ERROR; 1167251881Speter} 1168251881Speter 1169251881Speter/* Parse the 'rename to ' line of a git extended unidiff. */ 1170251881Speterstatic svn_error_t * 1171251881Spetergit_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch, 1172251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1173251881Speter{ 1174251881Speter SVN_ERR(grab_filename(&patch->new_filename, 1175251881Speter line + STRLEN_LITERAL("rename to "), 1176251881Speter result_pool, scratch_pool)); 1177251881Speter 1178251881Speter patch->operation = svn_diff_op_moved; 1179251881Speter 1180251881Speter *new_state = state_git_tree_seen; 1181251881Speter return SVN_NO_ERROR; 1182251881Speter} 1183251881Speter 1184251881Speter/* Parse the 'copy from ' line of a git extended unidiff. */ 1185251881Speterstatic svn_error_t * 1186251881Spetergit_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch, 1187251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1188251881Speter{ 1189251881Speter SVN_ERR(grab_filename(&patch->old_filename, 1190251881Speter line + STRLEN_LITERAL("copy from "), 1191251881Speter result_pool, scratch_pool)); 1192251881Speter 1193251881Speter *new_state = state_copy_from_seen; 1194251881Speter return SVN_NO_ERROR; 1195251881Speter} 1196251881Speter 1197251881Speter/* Parse the 'copy to ' line of a git extended unidiff. */ 1198251881Speterstatic svn_error_t * 1199251881Spetergit_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch, 1200251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1201251881Speter{ 1202251881Speter SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "), 1203251881Speter result_pool, scratch_pool)); 1204251881Speter 1205251881Speter patch->operation = svn_diff_op_copied; 1206251881Speter 1207251881Speter *new_state = state_git_tree_seen; 1208251881Speter return SVN_NO_ERROR; 1209251881Speter} 1210251881Speter 1211251881Speter/* Parse the 'new file ' line of a git extended unidiff. */ 1212251881Speterstatic svn_error_t * 1213251881Spetergit_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch, 1214251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1215251881Speter{ 1216251881Speter patch->operation = svn_diff_op_added; 1217251881Speter 1218251881Speter /* Filename already retrieved from diff --git header. */ 1219251881Speter 1220251881Speter *new_state = state_git_tree_seen; 1221251881Speter return SVN_NO_ERROR; 1222251881Speter} 1223251881Speter 1224251881Speter/* Parse the 'deleted file ' line of a git extended unidiff. */ 1225251881Speterstatic svn_error_t * 1226251881Spetergit_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch, 1227251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1228251881Speter{ 1229251881Speter patch->operation = svn_diff_op_deleted; 1230251881Speter 1231251881Speter /* Filename already retrieved from diff --git header. */ 1232251881Speter 1233251881Speter *new_state = state_git_tree_seen; 1234251881Speter return SVN_NO_ERROR; 1235251881Speter} 1236251881Speter 1237251881Speter/* Add a HUNK associated with the property PROP_NAME to PATCH. */ 1238251881Speterstatic svn_error_t * 1239251881Speteradd_property_hunk(svn_patch_t *patch, const char *prop_name, 1240251881Speter svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation, 1241251881Speter apr_pool_t *result_pool) 1242251881Speter{ 1243251881Speter svn_prop_patch_t *prop_patch; 1244251881Speter 1245251881Speter prop_patch = svn_hash_gets(patch->prop_patches, prop_name); 1246251881Speter 1247251881Speter if (! prop_patch) 1248251881Speter { 1249251881Speter prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t)); 1250251881Speter prop_patch->name = prop_name; 1251251881Speter prop_patch->operation = operation; 1252251881Speter prop_patch->hunks = apr_array_make(result_pool, 1, 1253251881Speter sizeof(svn_diff_hunk_t *)); 1254251881Speter 1255251881Speter svn_hash_sets(patch->prop_patches, prop_name, prop_patch); 1256251881Speter } 1257251881Speter 1258251881Speter APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk; 1259251881Speter 1260251881Speter return SVN_NO_ERROR; 1261251881Speter} 1262251881Speter 1263251881Speterstruct svn_patch_file_t 1264251881Speter{ 1265251881Speter /* The APR file handle to the patch file. */ 1266251881Speter apr_file_t *apr_file; 1267251881Speter 1268251881Speter /* The file offset at which the next patch is expected. */ 1269251881Speter apr_off_t next_patch_offset; 1270251881Speter}; 1271251881Speter 1272251881Spetersvn_error_t * 1273251881Spetersvn_diff_open_patch_file(svn_patch_file_t **patch_file, 1274251881Speter const char *local_abspath, 1275251881Speter apr_pool_t *result_pool) 1276251881Speter{ 1277251881Speter svn_patch_file_t *p; 1278251881Speter 1279251881Speter p = apr_palloc(result_pool, sizeof(*p)); 1280251881Speter SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath, 1281251881Speter APR_READ | APR_BUFFERED, APR_OS_DEFAULT, 1282251881Speter result_pool)); 1283251881Speter p->next_patch_offset = 0; 1284251881Speter *patch_file = p; 1285251881Speter 1286251881Speter return SVN_NO_ERROR; 1287251881Speter} 1288251881Speter 1289251881Speter/* Parse hunks from APR_FILE and store them in PATCH->HUNKS. 1290251881Speter * Parsing stops if no valid next hunk can be found. 1291251881Speter * If IGNORE_WHITESPACE is TRUE, lines without 1292251881Speter * leading spaces will be treated as context lines. 1293251881Speter * Allocate results in RESULT_POOL. 1294251881Speter * Use SCRATCH_POOL for temporary allocations. */ 1295251881Speterstatic svn_error_t * 1296251881Speterparse_hunks(svn_patch_t *patch, apr_file_t *apr_file, 1297251881Speter svn_boolean_t ignore_whitespace, 1298251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1299251881Speter{ 1300251881Speter svn_diff_hunk_t *hunk; 1301251881Speter svn_boolean_t is_property; 1302251881Speter const char *last_prop_name; 1303251881Speter const char *prop_name; 1304251881Speter svn_diff_operation_kind_t prop_operation; 1305251881Speter apr_pool_t *iterpool; 1306251881Speter 1307251881Speter last_prop_name = NULL; 1308251881Speter 1309251881Speter patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *)); 1310251881Speter patch->prop_patches = apr_hash_make(result_pool); 1311251881Speter iterpool = svn_pool_create(scratch_pool); 1312251881Speter do 1313251881Speter { 1314251881Speter svn_pool_clear(iterpool); 1315251881Speter 1316251881Speter SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation, 1317251881Speter patch, apr_file, ignore_whitespace, result_pool, 1318251881Speter iterpool)); 1319251881Speter 1320251881Speter if (hunk && is_property) 1321251881Speter { 1322251881Speter if (! prop_name) 1323251881Speter prop_name = last_prop_name; 1324251881Speter else 1325251881Speter last_prop_name = prop_name; 1326299742Sdim 1327299742Sdim /* Skip svn:mergeinfo properties. 1328299742Sdim * Mergeinfo data cannot be represented as a hunk and 1329299742Sdim * is therefore stored in PATCH itself. */ 1330299742Sdim if (strcmp(prop_name, SVN_PROP_MERGEINFO) == 0) 1331299742Sdim continue; 1332299742Sdim 1333251881Speter SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation, 1334251881Speter result_pool)); 1335251881Speter } 1336251881Speter else if (hunk) 1337251881Speter { 1338251881Speter APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk; 1339251881Speter last_prop_name = NULL; 1340251881Speter } 1341251881Speter 1342251881Speter } 1343251881Speter while (hunk); 1344251881Speter svn_pool_destroy(iterpool); 1345251881Speter 1346251881Speter return SVN_NO_ERROR; 1347251881Speter} 1348251881Speter 1349251881Speter/* State machine for the diff header parser. 1350251881Speter * Expected Input Required state Function to call */ 1351251881Speterstatic struct transition transitions[] = 1352251881Speter{ 1353251881Speter {"--- ", state_start, diff_minus}, 1354251881Speter {"+++ ", state_minus_seen, diff_plus}, 1355251881Speter {"diff --git", state_start, git_start}, 1356251881Speter {"--- a/", state_git_diff_seen, git_minus}, 1357251881Speter {"--- a/", state_git_tree_seen, git_minus}, 1358251881Speter {"--- /dev/null", state_git_tree_seen, git_minus}, 1359251881Speter {"+++ b/", state_git_minus_seen, git_plus}, 1360251881Speter {"+++ /dev/null", state_git_minus_seen, git_plus}, 1361251881Speter {"rename from ", state_git_diff_seen, git_move_from}, 1362251881Speter {"rename to ", state_move_from_seen, git_move_to}, 1363251881Speter {"copy from ", state_git_diff_seen, git_copy_from}, 1364251881Speter {"copy to ", state_copy_from_seen, git_copy_to}, 1365251881Speter {"new file ", state_git_diff_seen, git_new_file}, 1366251881Speter {"deleted file ", state_git_diff_seen, git_deleted_file}, 1367251881Speter}; 1368251881Speter 1369251881Spetersvn_error_t * 1370299742Sdimsvn_diff_parse_next_patch(svn_patch_t **patch_p, 1371251881Speter svn_patch_file_t *patch_file, 1372251881Speter svn_boolean_t reverse, 1373251881Speter svn_boolean_t ignore_whitespace, 1374251881Speter apr_pool_t *result_pool, 1375251881Speter apr_pool_t *scratch_pool) 1376251881Speter{ 1377251881Speter apr_off_t pos, last_line; 1378251881Speter svn_boolean_t eof; 1379251881Speter svn_boolean_t line_after_tree_header_read = FALSE; 1380251881Speter apr_pool_t *iterpool; 1381299742Sdim svn_patch_t *patch; 1382251881Speter enum parse_state state = state_start; 1383251881Speter 1384251881Speter if (apr_file_eof(patch_file->apr_file) == APR_EOF) 1385251881Speter { 1386251881Speter /* No more patches here. */ 1387299742Sdim *patch_p = NULL; 1388251881Speter return SVN_NO_ERROR; 1389251881Speter } 1390251881Speter 1391299742Sdim patch = apr_pcalloc(result_pool, sizeof(*patch)); 1392251881Speter 1393251881Speter pos = patch_file->next_patch_offset; 1394251881Speter SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool)); 1395251881Speter 1396251881Speter iterpool = svn_pool_create(scratch_pool); 1397251881Speter do 1398251881Speter { 1399251881Speter svn_stringbuf_t *line; 1400251881Speter svn_boolean_t valid_header_line = FALSE; 1401251881Speter int i; 1402251881Speter 1403251881Speter svn_pool_clear(iterpool); 1404251881Speter 1405251881Speter /* Remember the current line's offset, and read the line. */ 1406251881Speter last_line = pos; 1407251881Speter SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof, 1408251881Speter APR_SIZE_MAX, iterpool, iterpool)); 1409251881Speter 1410251881Speter if (! eof) 1411251881Speter { 1412251881Speter /* Update line offset for next iteration. */ 1413251881Speter pos = 0; 1414251881Speter SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, &pos, 1415251881Speter iterpool)); 1416251881Speter } 1417251881Speter 1418251881Speter /* Run the state machine. */ 1419251881Speter for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++) 1420251881Speter { 1421251881Speter if (starts_with(line->data, transitions[i].expected_input) 1422251881Speter && state == transitions[i].required_state) 1423251881Speter { 1424299742Sdim SVN_ERR(transitions[i].fn(&state, line->data, patch, 1425251881Speter result_pool, iterpool)); 1426251881Speter valid_header_line = TRUE; 1427251881Speter break; 1428251881Speter } 1429251881Speter } 1430251881Speter 1431251881Speter if (state == state_unidiff_found || state == state_git_header_found) 1432251881Speter { 1433251881Speter /* We have a valid diff header, yay! */ 1434251881Speter break; 1435251881Speter } 1436251881Speter else if (state == state_git_tree_seen && line_after_tree_header_read) 1437251881Speter { 1438251881Speter /* git patches can contain an index line after the file mode line */ 1439251881Speter if (!starts_with(line->data, "index ")) 1440251881Speter { 1441251881Speter /* We have a valid diff header for a patch with only tree changes. 1442251881Speter * Rewind to the start of the line just read, so subsequent calls 1443251881Speter * to this function don't end up skipping the line -- it may 1444251881Speter * contain a patch. */ 1445251881Speter SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line, 1446251881Speter scratch_pool)); 1447251881Speter break; 1448251881Speter } 1449251881Speter } 1450251881Speter else if (state == state_git_tree_seen) 1451251881Speter { 1452251881Speter line_after_tree_header_read = TRUE; 1453251881Speter } 1454251881Speter else if (! valid_header_line && state != state_start 1455289166Speter && state != state_git_diff_seen 1456251881Speter && !starts_with(line->data, "index ")) 1457251881Speter { 1458251881Speter /* We've encountered an invalid diff header. 1459251881Speter * 1460251881Speter * Rewind to the start of the line just read - it may be a new 1461251881Speter * header that begins there. */ 1462251881Speter SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line, 1463251881Speter scratch_pool)); 1464251881Speter state = state_start; 1465251881Speter } 1466251881Speter 1467251881Speter } 1468251881Speter while (! eof); 1469251881Speter 1470299742Sdim patch->reverse = reverse; 1471251881Speter if (reverse) 1472251881Speter { 1473251881Speter const char *temp; 1474299742Sdim temp = patch->old_filename; 1475299742Sdim patch->old_filename = patch->new_filename; 1476299742Sdim patch->new_filename = temp; 1477251881Speter } 1478251881Speter 1479299742Sdim if (patch->old_filename == NULL || patch->new_filename == NULL) 1480251881Speter { 1481251881Speter /* Something went wrong, just discard the result. */ 1482299742Sdim patch = NULL; 1483251881Speter } 1484251881Speter else 1485299742Sdim SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace, 1486251881Speter result_pool, iterpool)); 1487251881Speter 1488251881Speter svn_pool_destroy(iterpool); 1489251881Speter 1490251881Speter patch_file->next_patch_offset = 0; 1491251881Speter SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, 1492251881Speter &patch_file->next_patch_offset, scratch_pool)); 1493251881Speter 1494299742Sdim if (patch) 1495251881Speter { 1496251881Speter /* Usually, hunks appear in the patch sorted by their original line 1497251881Speter * offset. But just in case they weren't parsed in this order for 1498251881Speter * some reason, we sort them so that our caller can assume that hunks 1499251881Speter * are sorted as if parsed from a usual patch. */ 1500299742Sdim svn_sort__array(patch->hunks, compare_hunks); 1501251881Speter } 1502251881Speter 1503299742Sdim *patch_p = patch; 1504251881Speter return SVN_NO_ERROR; 1505251881Speter} 1506251881Speter 1507251881Spetersvn_error_t * 1508251881Spetersvn_diff_close_patch_file(svn_patch_file_t *patch_file, 1509251881Speter apr_pool_t *scratch_pool) 1510251881Speter{ 1511251881Speter return svn_error_trace(svn_io_file_close(patch_file->apr_file, 1512251881Speter scratch_pool)); 1513251881Speter} 1514