1/* 2 * dirent_uri.c: a library to manipulate URIs and directory entries. 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25 26#include <string.h> 27#include <assert.h> 28#include <ctype.h> 29 30#include <apr_uri.h> 31#include <apr_lib.h> 32 33#include "svn_private_config.h" 34#include "svn_string.h" 35#include "svn_dirent_uri.h" 36#include "svn_path.h" 37#include "svn_ctype.h" 38 39#include "dirent_uri.h" 40#include "private/svn_fspath.h" 41#include "private/svn_cert.h" 42 43/* The canonical empty path. Can this be changed? Well, change the empty 44 test below and the path library will work, not so sure about the fs/wc 45 libraries. */ 46#define SVN_EMPTY_PATH "" 47 48/* TRUE if s is the canonical empty path, FALSE otherwise */ 49#define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0') 50 51/* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can 52 this be changed? Well, the path library will work, not so sure about 53 the OS! */ 54#define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.') 55 56/* This check must match the check on top of dirent_uri-tests.c and 57 path-tests.c */ 58#if defined(WIN32) || defined(__CYGWIN__) || defined(__OS2__) 59#define SVN_USE_DOS_PATHS 60#endif 61 62/* Path type definition. Used only by internal functions. */ 63typedef enum path_type_t { 64 type_uri, 65 type_dirent, 66 type_relpath 67} path_type_t; 68 69 70/**** Forward declarations *****/ 71 72static svn_boolean_t 73relpath_is_canonical(const char *relpath); 74 75 76/**** Internal implementation functions *****/ 77 78/* Return an internal-style new path based on PATH, allocated in POOL. 79 * 80 * "Internal-style" means that separators are all '/'. 81 */ 82static const char * 83internal_style(const char *path, apr_pool_t *pool) 84{ 85#if '/' != SVN_PATH_LOCAL_SEPARATOR 86 { 87 char *p = apr_pstrdup(pool, path); 88 path = p; 89 90 /* Convert all local-style separators to the canonical ones. */ 91 for (; *p != '\0'; ++p) 92 if (*p == SVN_PATH_LOCAL_SEPARATOR) 93 *p = '/'; 94 } 95#endif 96 97 return path; 98} 99 100/* Locale insensitive tolower() for converting parts of dirents and urls 101 while canonicalizing */ 102static char 103canonicalize_to_lower(char c) 104{ 105 if (c < 'A' || c > 'Z') 106 return c; 107 else 108 return (char)(c - 'A' + 'a'); 109} 110 111/* Locale insensitive toupper() for converting parts of dirents and urls 112 while canonicalizing */ 113static char 114canonicalize_to_upper(char c) 115{ 116 if (c < 'a' || c > 'z') 117 return c; 118 else 119 return (char)(c - 'a' + 'A'); 120} 121 122/* Calculates the length of the dirent absolute or non absolute root in 123 DIRENT, return 0 if dirent is not rooted */ 124static apr_size_t 125dirent_root_length(const char *dirent, apr_size_t len) 126{ 127#ifdef SVN_USE_DOS_PATHS 128 if (len >= 2 && dirent[1] == ':' && 129 ((dirent[0] >= 'A' && dirent[0] <= 'Z') || 130 (dirent[0] >= 'a' && dirent[0] <= 'z'))) 131 { 132 return (len > 2 && dirent[2] == '/') ? 3 : 2; 133 } 134 135 if (len > 2 && dirent[0] == '/' && dirent[1] == '/') 136 { 137 apr_size_t i = 2; 138 139 while (i < len && dirent[i] != '/') 140 i++; 141 142 if (i == len) 143 return len; /* Cygwin drive alias, invalid path on WIN32 */ 144 145 i++; /* Skip '/' */ 146 147 while (i < len && dirent[i] != '/') 148 i++; 149 150 return i; 151 } 152#endif /* SVN_USE_DOS_PATHS */ 153 if (len >= 1 && dirent[0] == '/') 154 return 1; 155 156 return 0; 157} 158 159 160/* Return the length of substring necessary to encompass the entire 161 * previous dirent segment in DIRENT, which should be a LEN byte string. 162 * 163 * A trailing slash will not be included in the returned length except 164 * in the case in which DIRENT is absolute and there are no more 165 * previous segments. 166 */ 167static apr_size_t 168dirent_previous_segment(const char *dirent, 169 apr_size_t len) 170{ 171 if (len == 0) 172 return 0; 173 174 --len; 175 while (len > 0 && dirent[len] != '/' 176#ifdef SVN_USE_DOS_PATHS 177 && (dirent[len] != ':' || len != 1) 178#endif /* SVN_USE_DOS_PATHS */ 179 ) 180 --len; 181 182 /* check if the remaining segment including trailing '/' is a root dirent */ 183 if (dirent_root_length(dirent, len+1) == len + 1) 184 return len + 1; 185 else 186 return len; 187} 188 189/* Calculates the length occupied by the schema defined root of URI */ 190static apr_size_t 191uri_schema_root_length(const char *uri, apr_size_t len) 192{ 193 apr_size_t i; 194 195 for (i = 0; i < len; i++) 196 { 197 if (uri[i] == '/') 198 { 199 if (i > 0 && uri[i-1] == ':' && i < len-1 && uri[i+1] == '/') 200 { 201 /* We have an absolute uri */ 202 if (i == 5 && strncmp("file", uri, 4) == 0) 203 return 7; /* file:// */ 204 else 205 { 206 for (i += 2; i < len; i++) 207 if (uri[i] == '/') 208 return i; 209 210 return len; /* Only a hostname is found */ 211 } 212 } 213 else 214 return 0; 215 } 216 } 217 218 return 0; 219} 220 221/* Returns TRUE if svn_dirent_is_absolute(dirent) or when dirent has 222 a non absolute root. (E.g. '/' or 'F:' on Windows) */ 223static svn_boolean_t 224dirent_is_rooted(const char *dirent) 225{ 226 if (! dirent) 227 return FALSE; 228 229 /* Root on all systems */ 230 if (dirent[0] == '/') 231 return TRUE; 232 233 /* On Windows, dirent is also absolute when it starts with 'H:' or 'H:/' 234 where 'H' is any letter. */ 235#ifdef SVN_USE_DOS_PATHS 236 if (((dirent[0] >= 'A' && dirent[0] <= 'Z') || 237 (dirent[0] >= 'a' && dirent[0] <= 'z')) && 238 (dirent[1] == ':')) 239 return TRUE; 240#endif /* SVN_USE_DOS_PATHS */ 241 242 return FALSE; 243} 244 245/* Return the length of substring necessary to encompass the entire 246 * previous relpath segment in RELPATH, which should be a LEN byte string. 247 * 248 * A trailing slash will not be included in the returned length. 249 */ 250static apr_size_t 251relpath_previous_segment(const char *relpath, 252 apr_size_t len) 253{ 254 if (len == 0) 255 return 0; 256 257 --len; 258 while (len > 0 && relpath[len] != '/') 259 --len; 260 261 return len; 262} 263 264/* Return the length of substring necessary to encompass the entire 265 * previous uri segment in URI, which should be a LEN byte string. 266 * 267 * A trailing slash will not be included in the returned length except 268 * in the case in which URI is absolute and there are no more 269 * previous segments. 270 */ 271static apr_size_t 272uri_previous_segment(const char *uri, 273 apr_size_t len) 274{ 275 apr_size_t root_length; 276 apr_size_t i = len; 277 if (len == 0) 278 return 0; 279 280 root_length = uri_schema_root_length(uri, len); 281 282 --i; 283 while (len > root_length && uri[i] != '/') 284 --i; 285 286 if (i == 0 && len > 1 && *uri == '/') 287 return 1; 288 289 return i; 290} 291 292/* Return the canonicalized version of PATH, of type TYPE, allocated in 293 * POOL. 294 */ 295static const char * 296canonicalize(path_type_t type, const char *path, apr_pool_t *pool) 297{ 298 char *canon, *dst; 299 const char *src; 300 apr_size_t seglen; 301 apr_size_t schemelen = 0; 302 apr_size_t canon_segments = 0; 303 svn_boolean_t url = FALSE; 304 char *schema_data = NULL; 305 306 /* "" is already canonical, so just return it; note that later code 307 depends on path not being zero-length. */ 308 if (SVN_PATH_IS_EMPTY(path)) 309 { 310 assert(type != type_uri); 311 return ""; 312 } 313 314 dst = canon = apr_pcalloc(pool, strlen(path) + 1); 315 316 /* If this is supposed to be an URI, it should start with 317 "scheme://". We'll copy the scheme, host name, etc. to DST and 318 set URL = TRUE. */ 319 src = path; 320 if (type == type_uri) 321 { 322 assert(*src != '/'); 323 324 while (*src && (*src != '/') && (*src != ':')) 325 src++; 326 327 if (*src == ':' && *(src+1) == '/' && *(src+2) == '/') 328 { 329 const char *seg; 330 331 url = TRUE; 332 333 /* Found a scheme, convert to lowercase and copy to dst. */ 334 src = path; 335 while (*src != ':') 336 { 337 *(dst++) = canonicalize_to_lower((*src++)); 338 schemelen++; 339 } 340 *(dst++) = ':'; 341 *(dst++) = '/'; 342 *(dst++) = '/'; 343 src += 3; 344 schemelen += 3; 345 346 /* This might be the hostname */ 347 seg = src; 348 while (*src && (*src != '/') && (*src != '@')) 349 src++; 350 351 if (*src == '@') 352 { 353 /* Copy the username & password. */ 354 seglen = src - seg + 1; 355 memcpy(dst, seg, seglen); 356 dst += seglen; 357 src++; 358 } 359 else 360 src = seg; 361 362 /* Found a hostname, convert to lowercase and copy to dst. */ 363 if (*src == '[') 364 { 365 *(dst++) = *(src++); /* Copy '[' */ 366 367 while (*src == ':' 368 || (*src >= '0' && (*src <= '9')) 369 || (*src >= 'a' && (*src <= 'f')) 370 || (*src >= 'A' && (*src <= 'F'))) 371 { 372 *(dst++) = canonicalize_to_lower((*src++)); 373 } 374 375 if (*src == ']') 376 *(dst++) = *(src++); /* Copy ']' */ 377 } 378 else 379 while (*src && (*src != '/') && (*src != ':')) 380 *(dst++) = canonicalize_to_lower((*src++)); 381 382 if (*src == ':') 383 { 384 /* We probably have a port number: Is it a default portnumber 385 which doesn't belong in a canonical url? */ 386 if (src[1] == '8' && src[2] == '0' 387 && (src[3]== '/'|| !src[3]) 388 && !strncmp(canon, "http:", 5)) 389 { 390 src += 3; 391 } 392 else if (src[1] == '4' && src[2] == '4' && src[3] == '3' 393 && (src[4]== '/'|| !src[4]) 394 && !strncmp(canon, "https:", 6)) 395 { 396 src += 4; 397 } 398 else if (src[1] == '3' && src[2] == '6' 399 && src[3] == '9' && src[4] == '0' 400 && (src[5]== '/'|| !src[5]) 401 && !strncmp(canon, "svn:", 4)) 402 { 403 src += 5; 404 } 405 else if (src[1] == '/' || !src[1]) 406 { 407 src += 1; 408 } 409 410 while (*src && (*src != '/')) 411 *(dst++) = canonicalize_to_lower((*src++)); 412 } 413 414 /* Copy trailing slash, or null-terminator. */ 415 *(dst) = *(src); 416 417 /* Move src and dst forward only if we are not 418 * at null-terminator yet. */ 419 if (*src) 420 { 421 src++; 422 dst++; 423 schema_data = dst; 424 } 425 426 canon_segments = 1; 427 } 428 } 429 430 /* Copy to DST any separator or drive letter that must come before the 431 first regular path segment. */ 432 if (! url && type != type_relpath) 433 { 434 src = path; 435 /* If this is an absolute path, then just copy over the initial 436 separator character. */ 437 if (*src == '/') 438 { 439 *(dst++) = *(src++); 440 441#ifdef SVN_USE_DOS_PATHS 442 /* On Windows permit two leading separator characters which means an 443 * UNC path. */ 444 if ((type == type_dirent) && *src == '/') 445 *(dst++) = *(src++); 446#endif /* SVN_USE_DOS_PATHS */ 447 } 448#ifdef SVN_USE_DOS_PATHS 449 /* On Windows the first segment can be a drive letter, which we normalize 450 to upper case. */ 451 else if (type == type_dirent && 452 ((*src >= 'a' && *src <= 'z') || 453 (*src >= 'A' && *src <= 'Z')) && 454 (src[1] == ':')) 455 { 456 *(dst++) = canonicalize_to_upper(*(src++)); 457 /* Leave the ':' to be processed as (or as part of) a path segment 458 by the following code block, so we need not care whether it has 459 a slash after it. */ 460 } 461#endif /* SVN_USE_DOS_PATHS */ 462 } 463 464 while (*src) 465 { 466 /* Parse each segment, finding the closing '/' (which might look 467 like '%2F' for URIs). */ 468 const char *next = src; 469 apr_size_t slash_len = 0; 470 471 while (*next 472 && (next[0] != '/') 473 && (! (type == type_uri && next[0] == '%' && next[1] == '2' && 474 canonicalize_to_upper(next[2]) == 'F'))) 475 { 476 ++next; 477 } 478 479 /* Record how long our "slash" is. */ 480 if (next[0] == '/') 481 slash_len = 1; 482 else if (type == type_uri && next[0] == '%') 483 slash_len = 3; 484 485 seglen = next - src; 486 487 if (seglen == 0 488 || (seglen == 1 && src[0] == '.') 489 || (type == type_uri && seglen == 3 && src[0] == '%' && src[1] == '2' 490 && canonicalize_to_upper(src[2]) == 'E')) 491 { 492 /* Empty or noop segment, so do nothing. (For URIs, '%2E' 493 is equivalent to '.'). */ 494 } 495#ifdef SVN_USE_DOS_PATHS 496 /* If this is the first path segment of a file:// URI and it contains a 497 windows drive letter, convert the drive letter to upper case. */ 498 else if (url && canon_segments == 1 && seglen == 2 && 499 (strncmp(canon, "file:", 5) == 0) && 500 src[0] >= 'a' && src[0] <= 'z' && src[1] == ':') 501 { 502 *(dst++) = canonicalize_to_upper(src[0]); 503 *(dst++) = ':'; 504 if (*next) 505 *(dst++) = *next; 506 canon_segments++; 507 } 508#endif /* SVN_USE_DOS_PATHS */ 509 else 510 { 511 /* An actual segment, append it to the destination path */ 512 memcpy(dst, src, seglen); 513 dst += seglen; 514 if (slash_len) 515 *(dst++) = '/'; 516 canon_segments++; 517 } 518 519 /* Skip over trailing slash to the next segment. */ 520 src = next + slash_len; 521 } 522 523 /* Remove the trailing slash if there was at least one 524 * canonical segment and the last segment ends with a slash. 525 * 526 * But keep in mind that, for URLs, the scheme counts as a 527 * canonical segment -- so if path is ONLY a scheme (such 528 * as "https://") we should NOT remove the trailing slash. */ 529 if ((canon_segments > 0 && *(dst - 1) == '/') 530 && ! (url && path[schemelen] == '\0')) 531 { 532 dst --; 533 } 534 535 *dst = '\0'; 536 537#ifdef SVN_USE_DOS_PATHS 538 /* Skip leading double slashes when there are less than 2 539 * canon segments. UNC paths *MUST* have two segments. */ 540 if ((type == type_dirent) && canon[0] == '/' && canon[1] == '/') 541 { 542 if (canon_segments < 2) 543 return canon + 1; 544 else 545 { 546 /* Now we're sure this is a valid UNC path, convert the server name 547 (the first path segment) to lowercase as Windows treats it as case 548 insensitive. 549 Note: normally the share name is treated as case insensitive too, 550 but it seems to be possible to configure Samba to treat those as 551 case sensitive, so better leave that alone. */ 552 for (dst = canon + 2; *dst && *dst != '/'; dst++) 553 *dst = canonicalize_to_lower(*dst); 554 } 555 } 556#endif /* SVN_USE_DOS_PATHS */ 557 558 /* Check the normalization of characters in a uri */ 559 if (schema_data) 560 { 561 int need_extra = 0; 562 src = schema_data; 563 564 while (*src) 565 { 566 switch (*src) 567 { 568 case '/': 569 break; 570 case '%': 571 if (!svn_ctype_isxdigit(*(src+1)) || 572 !svn_ctype_isxdigit(*(src+2))) 573 need_extra += 2; 574 else 575 src += 2; 576 break; 577 default: 578 if (!svn_uri__char_validity[(unsigned char)*src]) 579 need_extra += 2; 580 break; 581 } 582 src++; 583 } 584 585 if (need_extra > 0) 586 { 587 apr_size_t pre_schema_size = (apr_size_t)(schema_data - canon); 588 589 dst = apr_palloc(pool, (apr_size_t)(src - canon) + need_extra + 1); 590 memcpy(dst, canon, pre_schema_size); 591 canon = dst; 592 593 dst += pre_schema_size; 594 } 595 else 596 dst = schema_data; 597 598 src = schema_data; 599 600 while (*src) 601 { 602 switch (*src) 603 { 604 case '/': 605 *(dst++) = '/'; 606 break; 607 case '%': 608 if (!svn_ctype_isxdigit(*(src+1)) || 609 !svn_ctype_isxdigit(*(src+2))) 610 { 611 *(dst++) = '%'; 612 *(dst++) = '2'; 613 *(dst++) = '5'; 614 } 615 else 616 { 617 char digitz[3]; 618 int val; 619 620 digitz[0] = *(++src); 621 digitz[1] = *(++src); 622 digitz[2] = 0; 623 624 val = (int)strtol(digitz, NULL, 16); 625 626 if (svn_uri__char_validity[(unsigned char)val]) 627 *(dst++) = (char)val; 628 else 629 { 630 *(dst++) = '%'; 631 *(dst++) = canonicalize_to_upper(digitz[0]); 632 *(dst++) = canonicalize_to_upper(digitz[1]); 633 } 634 } 635 break; 636 default: 637 if (!svn_uri__char_validity[(unsigned char)*src]) 638 { 639 apr_snprintf(dst, 4, "%%%02X", (unsigned char)*src); 640 dst += 3; 641 } 642 else 643 *(dst++) = *src; 644 break; 645 } 646 src++; 647 } 648 *dst = '\0'; 649 } 650 651 return canon; 652} 653 654/* Return the string length of the longest common ancestor of PATH1 and PATH2. 655 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if 656 * PATH1 and PATH2 are regular paths. 657 * 658 * If the two paths do not share a common ancestor, return 0. 659 * 660 * New strings are allocated in POOL. 661 */ 662static apr_size_t 663get_longest_ancestor_length(path_type_t types, 664 const char *path1, 665 const char *path2, 666 apr_pool_t *pool) 667{ 668 apr_size_t path1_len, path2_len; 669 apr_size_t i = 0; 670 apr_size_t last_dirsep = 0; 671#ifdef SVN_USE_DOS_PATHS 672 svn_boolean_t unc = FALSE; 673#endif 674 675 path1_len = strlen(path1); 676 path2_len = strlen(path2); 677 678 if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2)) 679 return 0; 680 681 while (path1[i] == path2[i]) 682 { 683 /* Keep track of the last directory separator we hit. */ 684 if (path1[i] == '/') 685 last_dirsep = i; 686 687 i++; 688 689 /* If we get to the end of either path, break out. */ 690 if ((i == path1_len) || (i == path2_len)) 691 break; 692 } 693 694 /* two special cases: 695 1. '/' is the longest common ancestor of '/' and '/foo' */ 696 if (i == 1 && path1[0] == '/' && path2[0] == '/') 697 return 1; 698 /* 2. '' is the longest common ancestor of any non-matching 699 * strings 'foo' and 'bar' */ 700 if (types == type_dirent && i == 0) 701 return 0; 702 703 /* Handle some windows specific cases */ 704#ifdef SVN_USE_DOS_PATHS 705 if (types == type_dirent) 706 { 707 /* don't count the '//' from UNC paths */ 708 if (last_dirsep == 1 && path1[0] == '/' && path1[1] == '/') 709 { 710 last_dirsep = 0; 711 unc = TRUE; 712 } 713 714 /* X:/ and X:/foo */ 715 if (i == 3 && path1[2] == '/' && path1[1] == ':') 716 return i; 717 718 /* Cannot use SVN_ERR_ASSERT here, so we'll have to crash, sorry. 719 * Note that this assertion triggers only if the code above has 720 * been broken. The code below relies on this assertion, because 721 * it uses [i - 1] as index. */ 722 assert(i > 0); 723 724 /* X: and X:/ */ 725 if ((path1[i - 1] == ':' && path2[i] == '/') || 726 (path2[i - 1] == ':' && path1[i] == '/')) 727 return 0; 728 /* X: and X:foo */ 729 if (path1[i - 1] == ':' || path2[i - 1] == ':') 730 return i; 731 } 732#endif /* SVN_USE_DOS_PATHS */ 733 734 /* last_dirsep is now the offset of the last directory separator we 735 crossed before reaching a non-matching byte. i is the offset of 736 that non-matching byte, and is guaranteed to be <= the length of 737 whichever path is shorter. 738 If one of the paths is the common part return that. */ 739 if (((i == path1_len) && (path2[i] == '/')) 740 || ((i == path2_len) && (path1[i] == '/')) 741 || ((i == path1_len) && (i == path2_len))) 742 return i; 743 else 744 { 745 /* Nothing in common but the root folder '/' or 'X:/' for Windows 746 dirents. */ 747#ifdef SVN_USE_DOS_PATHS 748 if (! unc) 749 { 750 /* X:/foo and X:/bar returns X:/ */ 751 if ((types == type_dirent) && 752 last_dirsep == 2 && path1[1] == ':' && path1[2] == '/' 753 && path2[1] == ':' && path2[2] == '/') 754 return 3; 755#endif /* SVN_USE_DOS_PATHS */ 756 if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/') 757 return 1; 758#ifdef SVN_USE_DOS_PATHS 759 } 760#endif 761 } 762 763 return last_dirsep; 764} 765 766/* Determine whether PATH2 is a child of PATH1. 767 * 768 * PATH2 is a child of PATH1 if 769 * 1) PATH1 is empty, and PATH2 is not empty and not an absolute path. 770 * or 771 * 2) PATH2 is has n components, PATH1 has x < n components, 772 * and PATH1 matches PATH2 in all its x components. 773 * Components are separated by a slash, '/'. 774 * 775 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if 776 * PATH1 and PATH2 are regular paths. 777 * 778 * If PATH2 is not a child of PATH1, return NULL. 779 * 780 * If PATH2 is a child of PATH1, and POOL is not NULL, allocate a copy 781 * of the child part of PATH2 in POOL and return a pointer to the 782 * newly allocated child part. 783 * 784 * If PATH2 is a child of PATH1, and POOL is NULL, return a pointer 785 * pointing to the child part of PATH2. 786 * */ 787static const char * 788is_child(path_type_t type, const char *path1, const char *path2, 789 apr_pool_t *pool) 790{ 791 apr_size_t i; 792 793 /* Allow "" and "foo" or "H:foo" to be parent/child */ 794 if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */ 795 { 796 if (SVN_PATH_IS_EMPTY(path2)) /* "" not a child */ 797 return NULL; 798 799 /* check if this is an absolute path */ 800 if ((type == type_uri) || 801 (type == type_dirent && dirent_is_rooted(path2))) 802 return NULL; 803 else 804 /* everything else is child */ 805 return pool ? apr_pstrdup(pool, path2) : path2; 806 } 807 808 /* Reach the end of at least one of the paths. How should we handle 809 things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't 810 appear to arise in the current Subversion code, it's not clear to me 811 if they should be parent/child or not. */ 812 /* Hmmm... aren't paths assumed to be canonical in this function? 813 * How can "foo///bar" even happen if the paths are canonical? */ 814 for (i = 0; path1[i] && path2[i]; i++) 815 if (path1[i] != path2[i]) 816 return NULL; 817 818 /* FIXME: This comment does not really match 819 * the checks made in the code it refers to: */ 820 /* There are two cases that are parent/child 821 ... path1[i] == '\0' 822 .../foo path2[i] == '/' 823 or 824 / path1[i] == '\0' 825 /foo path2[i] != '/' 826 827 Other root paths (like X:/) fall under the former case: 828 X:/ path1[i] == '\0' 829 X:/foo path2[i] != '/' 830 831 Check for '//' to avoid matching '/' and '//srv'. 832 */ 833 if (path1[i] == '\0' && path2[i]) 834 { 835 if (path1[i - 1] == '/' 836#ifdef SVN_USE_DOS_PATHS 837 || ((type == type_dirent) && path1[i - 1] == ':') 838#endif 839 ) 840 { 841 if (path2[i] == '/') 842 /* .../ 843 * ..../ 844 * i */ 845 return NULL; 846 else 847 /* .../ 848 * .../foo 849 * i */ 850 return pool ? apr_pstrdup(pool, path2 + i) : path2 + i; 851 } 852 else if (path2[i] == '/') 853 { 854 if (path2[i + 1]) 855 /* ... 856 * .../foo 857 * i */ 858 return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1; 859 else 860 /* ... 861 * .../ 862 * i */ 863 return NULL; 864 } 865 } 866 867 /* Otherwise, path2 isn't a child. */ 868 return NULL; 869} 870 871 872/**** Public API functions ****/ 873 874const char * 875svn_dirent_internal_style(const char *dirent, apr_pool_t *pool) 876{ 877 return svn_dirent_canonicalize(internal_style(dirent, pool), pool); 878} 879 880const char * 881svn_dirent_local_style(const char *dirent, apr_pool_t *pool) 882{ 883 /* Internally, Subversion represents the current directory with the 884 empty string. But users like to see "." . */ 885 if (SVN_PATH_IS_EMPTY(dirent)) 886 return "."; 887 888#if '/' != SVN_PATH_LOCAL_SEPARATOR 889 { 890 char *p = apr_pstrdup(pool, dirent); 891 dirent = p; 892 893 /* Convert all canonical separators to the local-style ones. */ 894 for (; *p != '\0'; ++p) 895 if (*p == '/') 896 *p = SVN_PATH_LOCAL_SEPARATOR; 897 } 898#endif 899 900 return dirent; 901} 902 903const char * 904svn_relpath__internal_style(const char *relpath, 905 apr_pool_t *pool) 906{ 907 return svn_relpath_canonicalize(internal_style(relpath, pool), pool); 908} 909 910 911/* We decided against using apr_filepath_root here because of the negative 912 performance impact (creating a pool and converting strings ). */ 913svn_boolean_t 914svn_dirent_is_root(const char *dirent, apr_size_t len) 915{ 916#ifdef SVN_USE_DOS_PATHS 917 /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter) 918 are also root directories */ 919 if ((len == 2 || ((len == 3) && (dirent[2] == '/'))) && 920 (dirent[1] == ':') && 921 ((dirent[0] >= 'A' && dirent[0] <= 'Z') || 922 (dirent[0] >= 'a' && dirent[0] <= 'z'))) 923 return TRUE; 924 925 /* On Windows and Cygwin //server/share is a root directory, 926 and on Cygwin //drive is a drive alias */ 927 if (len >= 2 && dirent[0] == '/' && dirent[1] == '/' 928 && dirent[len - 1] != '/') 929 { 930 int segments = 0; 931 apr_size_t i; 932 for (i = len; i >= 2; i--) 933 { 934 if (dirent[i] == '/') 935 { 936 segments ++; 937 if (segments > 1) 938 return FALSE; 939 } 940 } 941#ifdef __CYGWIN__ 942 return (segments <= 1); 943#else 944 return (segments == 1); /* //drive is invalid on plain Windows */ 945#endif 946 } 947#endif 948 949 /* directory is root if it's equal to '/' */ 950 if (len == 1 && dirent[0] == '/') 951 return TRUE; 952 953 return FALSE; 954} 955 956svn_boolean_t 957svn_uri_is_root(const char *uri, apr_size_t len) 958{ 959 assert(svn_uri_is_canonical(uri, NULL)); 960 return (len == uri_schema_root_length(uri, len)); 961} 962 963char *svn_dirent_join(const char *base, 964 const char *component, 965 apr_pool_t *pool) 966{ 967 apr_size_t blen = strlen(base); 968 apr_size_t clen = strlen(component); 969 char *dirent; 970 int add_separator; 971 972 assert(svn_dirent_is_canonical(base, pool)); 973 assert(svn_dirent_is_canonical(component, pool)); 974 975 /* If the component is absolute, then return it. */ 976 if (svn_dirent_is_absolute(component)) 977 return apr_pmemdup(pool, component, clen + 1); 978 979 /* If either is empty return the other */ 980 if (SVN_PATH_IS_EMPTY(base)) 981 return apr_pmemdup(pool, component, clen + 1); 982 if (SVN_PATH_IS_EMPTY(component)) 983 return apr_pmemdup(pool, base, blen + 1); 984 985#ifdef SVN_USE_DOS_PATHS 986 if (component[0] == '/') 987 { 988 /* '/' is drive relative on Windows, not absolute like on Posix */ 989 if (dirent_is_rooted(base)) 990 { 991 /* Join component without '/' to root-of(base) */ 992 blen = dirent_root_length(base, blen); 993 component++; 994 clen--; 995 996 if (blen == 2 && base[1] == ':') /* "C:" case */ 997 { 998 char *root = apr_pmemdup(pool, base, 3); 999 root[2] = '/'; /* We don't need the final '\0' */ 1000 1001 base = root; 1002 blen = 3; 1003 } 1004 1005 if (clen == 0) 1006 return apr_pstrndup(pool, base, blen); 1007 } 1008 else 1009 return apr_pmemdup(pool, component, clen + 1); 1010 } 1011 else if (dirent_is_rooted(component)) 1012 return apr_pmemdup(pool, component, clen + 1); 1013#endif /* SVN_USE_DOS_PATHS */ 1014 1015 /* if last character of base is already a separator, don't add a '/' */ 1016 add_separator = 1; 1017 if (base[blen - 1] == '/' 1018#ifdef SVN_USE_DOS_PATHS 1019 || base[blen - 1] == ':' 1020#endif 1021 ) 1022 add_separator = 0; 1023 1024 /* Construct the new, combined dirent. */ 1025 dirent = apr_palloc(pool, blen + add_separator + clen + 1); 1026 memcpy(dirent, base, blen); 1027 if (add_separator) 1028 dirent[blen] = '/'; 1029 memcpy(dirent + blen + add_separator, component, clen + 1); 1030 1031 return dirent; 1032} 1033 1034char *svn_dirent_join_many(apr_pool_t *pool, const char *base, ...) 1035{ 1036#define MAX_SAVED_LENGTHS 10 1037 apr_size_t saved_lengths[MAX_SAVED_LENGTHS]; 1038 apr_size_t total_len; 1039 int nargs; 1040 va_list va; 1041 const char *s; 1042 apr_size_t len; 1043 char *dirent; 1044 char *p; 1045 int add_separator; 1046 int base_arg = 0; 1047 1048 total_len = strlen(base); 1049 1050 assert(svn_dirent_is_canonical(base, pool)); 1051 1052 /* if last character of base is already a separator, don't add a '/' */ 1053 add_separator = 1; 1054 if (total_len == 0 1055 || base[total_len - 1] == '/' 1056#ifdef SVN_USE_DOS_PATHS 1057 || base[total_len - 1] == ':' 1058#endif 1059 ) 1060 add_separator = 0; 1061 1062 saved_lengths[0] = total_len; 1063 1064 /* Compute the length of the resulting string. */ 1065 1066 nargs = 0; 1067 va_start(va, base); 1068 while ((s = va_arg(va, const char *)) != NULL) 1069 { 1070 len = strlen(s); 1071 1072 assert(svn_dirent_is_canonical(s, pool)); 1073 1074 if (SVN_PATH_IS_EMPTY(s)) 1075 continue; 1076 1077 if (nargs++ < MAX_SAVED_LENGTHS) 1078 saved_lengths[nargs] = len; 1079 1080 if (dirent_is_rooted(s)) 1081 { 1082 total_len = len; 1083 base_arg = nargs; 1084 1085#ifdef SVN_USE_DOS_PATHS 1086 if (!svn_dirent_is_absolute(s)) /* Handle non absolute roots */ 1087 { 1088 /* Set new base and skip the current argument */ 1089 base = s = svn_dirent_join(base, s, pool); 1090 base_arg++; 1091 saved_lengths[0] = total_len = len = strlen(s); 1092 } 1093 else 1094#endif /* SVN_USE_DOS_PATHS */ 1095 { 1096 base = ""; /* Don't add base */ 1097 saved_lengths[0] = 0; 1098 } 1099 1100 add_separator = 1; 1101 if (s[len - 1] == '/' 1102#ifdef SVN_USE_DOS_PATHS 1103 || s[len - 1] == ':' 1104#endif 1105 ) 1106 add_separator = 0; 1107 } 1108 else if (nargs <= base_arg + 1) 1109 { 1110 total_len += add_separator + len; 1111 } 1112 else 1113 { 1114 total_len += 1 + len; 1115 } 1116 } 1117 va_end(va); 1118 1119 /* base == "/" and no further components. just return that. */ 1120 if (add_separator == 0 && total_len == 1) 1121 return apr_pmemdup(pool, "/", 2); 1122 1123 /* we got the total size. allocate it, with room for a NULL character. */ 1124 dirent = p = apr_palloc(pool, total_len + 1); 1125 1126 /* if we aren't supposed to skip forward to an absolute component, and if 1127 this is not an empty base that we are skipping, then copy the base 1128 into the output. */ 1129 if (! SVN_PATH_IS_EMPTY(base)) 1130 { 1131 memcpy(p, base, len = saved_lengths[0]); 1132 p += len; 1133 } 1134 1135 nargs = 0; 1136 va_start(va, base); 1137 while ((s = va_arg(va, const char *)) != NULL) 1138 { 1139 if (SVN_PATH_IS_EMPTY(s)) 1140 continue; 1141 1142 if (++nargs < base_arg) 1143 continue; 1144 1145 if (nargs < MAX_SAVED_LENGTHS) 1146 len = saved_lengths[nargs]; 1147 else 1148 len = strlen(s); 1149 1150 /* insert a separator if we aren't copying in the first component 1151 (which can happen when base_arg is set). also, don't put in a slash 1152 if the prior character is a slash (occurs when prior component 1153 is "/"). */ 1154 if (p != dirent && 1155 ( ! (nargs - 1 <= base_arg) || add_separator)) 1156 *p++ = '/'; 1157 1158 /* copy the new component and advance the pointer */ 1159 memcpy(p, s, len); 1160 p += len; 1161 } 1162 va_end(va); 1163 1164 *p = '\0'; 1165 assert((apr_size_t)(p - dirent) == total_len); 1166 1167 return dirent; 1168} 1169 1170char * 1171svn_relpath_join(const char *base, 1172 const char *component, 1173 apr_pool_t *pool) 1174{ 1175 apr_size_t blen = strlen(base); 1176 apr_size_t clen = strlen(component); 1177 char *path; 1178 1179 assert(relpath_is_canonical(base)); 1180 assert(relpath_is_canonical(component)); 1181 1182 /* If either is empty return the other */ 1183 if (blen == 0) 1184 return apr_pmemdup(pool, component, clen + 1); 1185 if (clen == 0) 1186 return apr_pmemdup(pool, base, blen + 1); 1187 1188 path = apr_palloc(pool, blen + 1 + clen + 1); 1189 memcpy(path, base, blen); 1190 path[blen] = '/'; 1191 memcpy(path + blen + 1, component, clen + 1); 1192 1193 return path; 1194} 1195 1196char * 1197svn_dirent_dirname(const char *dirent, apr_pool_t *pool) 1198{ 1199 apr_size_t len = strlen(dirent); 1200 1201 assert(svn_dirent_is_canonical(dirent, pool)); 1202 1203 if (len == dirent_root_length(dirent, len)) 1204 return apr_pstrmemdup(pool, dirent, len); 1205 else 1206 return apr_pstrmemdup(pool, dirent, dirent_previous_segment(dirent, len)); 1207} 1208 1209const char * 1210svn_dirent_basename(const char *dirent, apr_pool_t *pool) 1211{ 1212 apr_size_t len = strlen(dirent); 1213 apr_size_t start; 1214 1215 assert(!pool || svn_dirent_is_canonical(dirent, pool)); 1216 1217 if (svn_dirent_is_root(dirent, len)) 1218 return ""; 1219 else 1220 { 1221 start = len; 1222 while (start > 0 && dirent[start - 1] != '/' 1223#ifdef SVN_USE_DOS_PATHS 1224 && dirent[start - 1] != ':' 1225#endif 1226 ) 1227 --start; 1228 } 1229 1230 if (pool) 1231 return apr_pstrmemdup(pool, dirent + start, len - start); 1232 else 1233 return dirent + start; 1234} 1235 1236void 1237svn_dirent_split(const char **dirpath, 1238 const char **base_name, 1239 const char *dirent, 1240 apr_pool_t *pool) 1241{ 1242 assert(dirpath != base_name); 1243 1244 if (dirpath) 1245 *dirpath = svn_dirent_dirname(dirent, pool); 1246 1247 if (base_name) 1248 *base_name = svn_dirent_basename(dirent, pool); 1249} 1250 1251char * 1252svn_relpath_dirname(const char *relpath, 1253 apr_pool_t *pool) 1254{ 1255 apr_size_t len = strlen(relpath); 1256 1257 assert(relpath_is_canonical(relpath)); 1258 1259 return apr_pstrmemdup(pool, relpath, 1260 relpath_previous_segment(relpath, len)); 1261} 1262 1263const char * 1264svn_relpath_basename(const char *relpath, 1265 apr_pool_t *pool) 1266{ 1267 apr_size_t len = strlen(relpath); 1268 apr_size_t start; 1269 1270 assert(relpath_is_canonical(relpath)); 1271 1272 start = len; 1273 while (start > 0 && relpath[start - 1] != '/') 1274 --start; 1275 1276 if (pool) 1277 return apr_pstrmemdup(pool, relpath + start, len - start); 1278 else 1279 return relpath + start; 1280} 1281 1282void 1283svn_relpath_split(const char **dirpath, 1284 const char **base_name, 1285 const char *relpath, 1286 apr_pool_t *pool) 1287{ 1288 assert(dirpath != base_name); 1289 1290 if (dirpath) 1291 *dirpath = svn_relpath_dirname(relpath, pool); 1292 1293 if (base_name) 1294 *base_name = svn_relpath_basename(relpath, pool); 1295} 1296 1297const char * 1298svn_relpath_prefix(const char *relpath, 1299 int max_components, 1300 apr_pool_t *result_pool) 1301{ 1302 const char *end; 1303 assert(relpath_is_canonical(relpath)); 1304 1305 if (max_components <= 0) 1306 return ""; 1307 1308 for (end = relpath; *end; end++) 1309 { 1310 if (*end == '/') 1311 { 1312 if (!--max_components) 1313 break; 1314 } 1315 } 1316 1317 return apr_pstrmemdup(result_pool, relpath, end-relpath); 1318} 1319 1320char * 1321svn_uri_dirname(const char *uri, apr_pool_t *pool) 1322{ 1323 apr_size_t len = strlen(uri); 1324 1325 assert(svn_uri_is_canonical(uri, pool)); 1326 1327 if (svn_uri_is_root(uri, len)) 1328 return apr_pstrmemdup(pool, uri, len); 1329 else 1330 return apr_pstrmemdup(pool, uri, uri_previous_segment(uri, len)); 1331} 1332 1333const char * 1334svn_uri_basename(const char *uri, apr_pool_t *pool) 1335{ 1336 apr_size_t len = strlen(uri); 1337 apr_size_t start; 1338 1339 assert(svn_uri_is_canonical(uri, NULL)); 1340 1341 if (svn_uri_is_root(uri, len)) 1342 return ""; 1343 1344 start = len; 1345 while (start > 0 && uri[start - 1] != '/') 1346 --start; 1347 1348 return svn_path_uri_decode(uri + start, pool); 1349} 1350 1351void 1352svn_uri_split(const char **dirpath, 1353 const char **base_name, 1354 const char *uri, 1355 apr_pool_t *pool) 1356{ 1357 assert(dirpath != base_name); 1358 1359 if (dirpath) 1360 *dirpath = svn_uri_dirname(uri, pool); 1361 1362 if (base_name) 1363 *base_name = svn_uri_basename(uri, pool); 1364} 1365 1366char * 1367svn_dirent_get_longest_ancestor(const char *dirent1, 1368 const char *dirent2, 1369 apr_pool_t *pool) 1370{ 1371 return apr_pstrndup(pool, dirent1, 1372 get_longest_ancestor_length(type_dirent, dirent1, 1373 dirent2, pool)); 1374} 1375 1376char * 1377svn_relpath_get_longest_ancestor(const char *relpath1, 1378 const char *relpath2, 1379 apr_pool_t *pool) 1380{ 1381 assert(relpath_is_canonical(relpath1)); 1382 assert(relpath_is_canonical(relpath2)); 1383 1384 return apr_pstrndup(pool, relpath1, 1385 get_longest_ancestor_length(type_relpath, relpath1, 1386 relpath2, pool)); 1387} 1388 1389char * 1390svn_uri_get_longest_ancestor(const char *uri1, 1391 const char *uri2, 1392 apr_pool_t *pool) 1393{ 1394 apr_size_t uri_ancestor_len; 1395 apr_size_t i = 0; 1396 1397 assert(svn_uri_is_canonical(uri1, NULL)); 1398 assert(svn_uri_is_canonical(uri2, NULL)); 1399 1400 /* Find ':' */ 1401 while (1) 1402 { 1403 /* No shared protocol => no common prefix */ 1404 if (uri1[i] != uri2[i]) 1405 return apr_pmemdup(pool, SVN_EMPTY_PATH, 1406 sizeof(SVN_EMPTY_PATH)); 1407 1408 if (uri1[i] == ':') 1409 break; 1410 1411 /* They're both URLs, so EOS can't come before ':' */ 1412 assert((uri1[i] != '\0') && (uri2[i] != '\0')); 1413 1414 i++; 1415 } 1416 1417 i += 3; /* Advance past '://' */ 1418 1419 uri_ancestor_len = get_longest_ancestor_length(type_uri, uri1 + i, 1420 uri2 + i, pool); 1421 1422 if (uri_ancestor_len == 0 || 1423 (uri_ancestor_len == 1 && (uri1 + i)[0] == '/')) 1424 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH)); 1425 else 1426 return apr_pstrndup(pool, uri1, uri_ancestor_len + i); 1427} 1428 1429const char * 1430svn_dirent_is_child(const char *parent_dirent, 1431 const char *child_dirent, 1432 apr_pool_t *pool) 1433{ 1434 return is_child(type_dirent, parent_dirent, child_dirent, pool); 1435} 1436 1437const char * 1438svn_dirent_skip_ancestor(const char *parent_dirent, 1439 const char *child_dirent) 1440{ 1441 apr_size_t len = strlen(parent_dirent); 1442 apr_size_t root_len; 1443 1444 if (0 != strncmp(parent_dirent, child_dirent, len)) 1445 return NULL; /* parent_dirent is no ancestor of child_dirent */ 1446 1447 if (child_dirent[len] == 0) 1448 return ""; /* parent_dirent == child_dirent */ 1449 1450 /* Child == parent + more-characters */ 1451 1452 root_len = dirent_root_length(child_dirent, strlen(child_dirent)); 1453 if (root_len > len) 1454 /* Different root, e.g. ("" "/...") or ("//z" "//z/share") */ 1455 return NULL; 1456 1457 /* Now, child == [root-of-parent] + [rest-of-parent] + more-characters. 1458 * It must be one of the following forms. 1459 * 1460 * rlen parent child bad? rlen=len? c[len]=/? 1461 * 0 "" "foo" * 1462 * 0 "b" "bad" ! 1463 * 0 "b" "b/foo" * 1464 * 1 "/" "/foo" * 1465 * 1 "/b" "/bad" ! 1466 * 1 "/b" "/b/foo" * 1467 * 2 "a:" "a:foo" * 1468 * 2 "a:b" "a:bad" ! 1469 * 2 "a:b" "a:b/foo" * 1470 * 3 "a:/" "a:/foo" * 1471 * 3 "a:/b" "a:/bad" ! 1472 * 3 "a:/b" "a:/b/foo" * 1473 * 5 "//s/s" "//s/s/foo" * * 1474 * 5 "//s/s/b" "//s/s/bad" ! 1475 * 5 "//s/s/b" "//s/s/b/foo" * 1476 */ 1477 1478 if (child_dirent[len] == '/') 1479 /* "parent|child" is one of: 1480 * "[a:]b|/foo" "[a:]/b|/foo" "//s/s|/foo" "//s/s/b|/foo" */ 1481 return child_dirent + len + 1; 1482 1483 if (root_len == len) 1484 /* "parent|child" is "|foo" "/|foo" "a:|foo" "a:/|foo" "//s/s|/foo" */ 1485 return child_dirent + len; 1486 1487 return NULL; 1488} 1489 1490const char * 1491svn_relpath_skip_ancestor(const char *parent_relpath, 1492 const char *child_relpath) 1493{ 1494 apr_size_t len = strlen(parent_relpath); 1495 1496 assert(relpath_is_canonical(parent_relpath)); 1497 assert(relpath_is_canonical(child_relpath)); 1498 1499 if (len == 0) 1500 return child_relpath; 1501 1502 if (0 != strncmp(parent_relpath, child_relpath, len)) 1503 return NULL; /* parent_relpath is no ancestor of child_relpath */ 1504 1505 if (child_relpath[len] == 0) 1506 return ""; /* parent_relpath == child_relpath */ 1507 1508 if (child_relpath[len] == '/') 1509 return child_relpath + len + 1; 1510 1511 return NULL; 1512} 1513 1514 1515/* */ 1516static const char * 1517uri_skip_ancestor(const char *parent_uri, 1518 const char *child_uri) 1519{ 1520 apr_size_t len = strlen(parent_uri); 1521 1522 assert(svn_uri_is_canonical(parent_uri, NULL)); 1523 assert(svn_uri_is_canonical(child_uri, NULL)); 1524 1525 if (0 != strncmp(parent_uri, child_uri, len)) 1526 return NULL; /* parent_uri is no ancestor of child_uri */ 1527 1528 if (child_uri[len] == 0) 1529 return ""; /* parent_uri == child_uri */ 1530 1531 if (child_uri[len] == '/') 1532 return child_uri + len + 1; 1533 1534 return NULL; 1535} 1536 1537const char * 1538svn_uri_skip_ancestor(const char *parent_uri, 1539 const char *child_uri, 1540 apr_pool_t *result_pool) 1541{ 1542 const char *result = uri_skip_ancestor(parent_uri, child_uri); 1543 1544 return result ? svn_path_uri_decode(result, result_pool) : NULL; 1545} 1546 1547svn_boolean_t 1548svn_dirent_is_ancestor(const char *parent_dirent, const char *child_dirent) 1549{ 1550 return svn_dirent_skip_ancestor(parent_dirent, child_dirent) != NULL; 1551} 1552 1553svn_boolean_t 1554svn_uri__is_ancestor(const char *parent_uri, const char *child_uri) 1555{ 1556 return uri_skip_ancestor(parent_uri, child_uri) != NULL; 1557} 1558 1559 1560svn_boolean_t 1561svn_dirent_is_absolute(const char *dirent) 1562{ 1563 if (! dirent) 1564 return FALSE; 1565 1566 /* dirent is absolute if it starts with '/' on non-Windows platforms 1567 or with '//' on Windows platforms */ 1568 if (dirent[0] == '/' 1569#ifdef SVN_USE_DOS_PATHS 1570 && dirent[1] == '/' /* Single '/' depends on current drive */ 1571#endif 1572 ) 1573 return TRUE; 1574 1575 /* On Windows, dirent is also absolute when it starts with 'H:/' 1576 where 'H' is any letter. */ 1577#ifdef SVN_USE_DOS_PATHS 1578 if (((dirent[0] >= 'A' && dirent[0] <= 'Z')) && 1579 (dirent[1] == ':') && (dirent[2] == '/')) 1580 return TRUE; 1581#endif /* SVN_USE_DOS_PATHS */ 1582 1583 return FALSE; 1584} 1585 1586svn_error_t * 1587svn_dirent_get_absolute(const char **pabsolute, 1588 const char *relative, 1589 apr_pool_t *pool) 1590{ 1591 char *buffer; 1592 apr_status_t apr_err; 1593 const char *path_apr; 1594 1595 SVN_ERR_ASSERT(! svn_path_is_url(relative)); 1596 1597 /* Merge the current working directory with the relative dirent. */ 1598 SVN_ERR(svn_path_cstring_from_utf8(&path_apr, relative, pool)); 1599 1600 apr_err = apr_filepath_merge(&buffer, NULL, 1601 path_apr, 1602 APR_FILEPATH_NOTRELATIVE, 1603 pool); 1604 if (apr_err) 1605 { 1606 /* In some cases when the passed path or its ancestor(s) do not exist 1607 or no longer exist apr returns an error. 1608 1609 In many of these cases we would like to return a path anyway, when the 1610 passed path was already a safe absolute path. So check for that now to 1611 avoid an error. 1612 1613 svn_dirent_is_absolute() doesn't perform the necessary checks to see 1614 if the path doesn't need post processing to be in the canonical absolute 1615 format. 1616 */ 1617 1618 if (svn_dirent_is_absolute(relative) 1619 && svn_dirent_is_canonical(relative, pool) 1620 && !svn_path_is_backpath_present(relative)) 1621 { 1622 *pabsolute = apr_pstrdup(pool, relative); 1623 return SVN_NO_ERROR; 1624 } 1625 1626 return svn_error_createf(SVN_ERR_BAD_FILENAME, 1627 svn_error_create(apr_err, NULL, NULL), 1628 _("Couldn't determine absolute path of '%s'"), 1629 svn_dirent_local_style(relative, pool)); 1630 } 1631 1632 SVN_ERR(svn_path_cstring_to_utf8(pabsolute, buffer, pool)); 1633 *pabsolute = svn_dirent_canonicalize(*pabsolute, pool); 1634 return SVN_NO_ERROR; 1635} 1636 1637const char * 1638svn_uri_canonicalize(const char *uri, apr_pool_t *pool) 1639{ 1640 return canonicalize(type_uri, uri, pool); 1641} 1642 1643const char * 1644svn_relpath_canonicalize(const char *relpath, apr_pool_t *pool) 1645{ 1646 return canonicalize(type_relpath, relpath, pool); 1647} 1648 1649const char * 1650svn_dirent_canonicalize(const char *dirent, apr_pool_t *pool) 1651{ 1652 const char *dst = canonicalize(type_dirent, dirent, pool); 1653 1654#ifdef SVN_USE_DOS_PATHS 1655 /* Handle a specific case on Windows where path == "X:/". Here we have to 1656 append the final '/', as svn_path_canonicalize will chop this of. */ 1657 if (((dirent[0] >= 'A' && dirent[0] <= 'Z') || 1658 (dirent[0] >= 'a' && dirent[0] <= 'z')) && 1659 dirent[1] == ':' && dirent[2] == '/' && 1660 dst[3] == '\0') 1661 { 1662 char *dst_slash = apr_pcalloc(pool, 4); 1663 dst_slash[0] = canonicalize_to_upper(dirent[0]); 1664 dst_slash[1] = ':'; 1665 dst_slash[2] = '/'; 1666 dst_slash[3] = '\0'; 1667 1668 return dst_slash; 1669 } 1670#endif /* SVN_USE_DOS_PATHS */ 1671 1672 return dst; 1673} 1674 1675svn_boolean_t 1676svn_dirent_is_canonical(const char *dirent, apr_pool_t *scratch_pool) 1677{ 1678 const char *ptr = dirent; 1679 if (*ptr == '/') 1680 { 1681 ptr++; 1682#ifdef SVN_USE_DOS_PATHS 1683 /* Check for UNC paths */ 1684 if (*ptr == '/') 1685 { 1686 /* TODO: Scan hostname and sharename and fall back to part code */ 1687 1688 /* ### Fall back to old implementation */ 1689 return (strcmp(dirent, svn_dirent_canonicalize(dirent, scratch_pool)) 1690 == 0); 1691 } 1692#endif /* SVN_USE_DOS_PATHS */ 1693 } 1694#ifdef SVN_USE_DOS_PATHS 1695 else if (((*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z')) && 1696 (ptr[1] == ':')) 1697 { 1698 /* The only canonical drive names are "A:"..."Z:", no lower case */ 1699 if (*ptr < 'A' || *ptr > 'Z') 1700 return FALSE; 1701 1702 ptr += 2; 1703 1704 if (*ptr == '/') 1705 ptr++; 1706 } 1707#endif /* SVN_USE_DOS_PATHS */ 1708 1709 return relpath_is_canonical(ptr); 1710} 1711 1712static svn_boolean_t 1713relpath_is_canonical(const char *relpath) 1714{ 1715 const char *dot_pos, *ptr = relpath; 1716 apr_size_t i, len; 1717 unsigned pattern = 0; 1718 1719 /* RELPATH is canonical if it has: 1720 * - no '.' segments 1721 * - no start and closing '/' 1722 * - no '//' 1723 */ 1724 1725 /* invalid beginnings */ 1726 if (*ptr == '/') 1727 return FALSE; 1728 1729 if (ptr[0] == '.' && (ptr[1] == '/' || ptr[1] == '\0')) 1730 return FALSE; 1731 1732 /* valid special cases */ 1733 len = strlen(ptr); 1734 if (len < 2) 1735 return TRUE; 1736 1737 /* invalid endings */ 1738 if (ptr[len-1] == '/' || (ptr[len-1] == '.' && ptr[len-2] == '/')) 1739 return FALSE; 1740 1741 /* '.' are rare. So, search for them globally. There will often be no 1742 * more than one hit. Also note that we already checked for invalid 1743 * starts and endings, i.e. we only need to check for "/./" 1744 */ 1745 for (dot_pos = memchr(ptr, '.', len); 1746 dot_pos; 1747 dot_pos = strchr(dot_pos+1, '.')) 1748 if (dot_pos > ptr && dot_pos[-1] == '/' && dot_pos[1] == '/') 1749 return FALSE; 1750 1751 /* Now validate the rest of the path. */ 1752 for (i = 0; i < len - 1; ++i) 1753 { 1754 pattern = ((pattern & 0xff) << 8) + (unsigned char)ptr[i]; 1755 if (pattern == 0x101 * (unsigned char)('/')) 1756 return FALSE; 1757 } 1758 1759 return TRUE; 1760} 1761 1762svn_boolean_t 1763svn_relpath_is_canonical(const char *relpath) 1764{ 1765 return relpath_is_canonical(relpath); 1766} 1767 1768svn_boolean_t 1769svn_uri_is_canonical(const char *uri, apr_pool_t *scratch_pool) 1770{ 1771 const char *ptr = uri, *seg = uri; 1772 const char *schema_data = NULL; 1773 1774 /* URI is canonical if it has: 1775 * - lowercase URL scheme 1776 * - lowercase URL hostname 1777 * - no '.' segments 1778 * - no closing '/' 1779 * - no '//' 1780 * - uppercase hex-encoded pair digits ("%AB", not "%ab") 1781 */ 1782 1783 if (*uri == '\0') 1784 return FALSE; 1785 1786 if (! svn_path_is_url(uri)) 1787 return FALSE; 1788 1789 /* Skip the scheme. */ 1790 while (*ptr && (*ptr != '/') && (*ptr != ':')) 1791 ptr++; 1792 1793 /* No scheme? No good. */ 1794 if (! (*ptr == ':' && *(ptr+1) == '/' && *(ptr+2) == '/')) 1795 return FALSE; 1796 1797 /* Found a scheme, check that it's all lowercase. */ 1798 ptr = uri; 1799 while (*ptr != ':') 1800 { 1801 if (*ptr >= 'A' && *ptr <= 'Z') 1802 return FALSE; 1803 ptr++; 1804 } 1805 /* Skip :// */ 1806 ptr += 3; 1807 1808 /* Scheme only? That works. */ 1809 if (! *ptr) 1810 return TRUE; 1811 1812 /* This might be the hostname */ 1813 seg = ptr; 1814 while (*ptr && (*ptr != '/') && (*ptr != '@')) 1815 ptr++; 1816 1817 if (*ptr == '@') 1818 seg = ptr + 1; 1819 1820 /* Found a hostname, check that it's all lowercase. */ 1821 ptr = seg; 1822 1823 if (*ptr == '[') 1824 { 1825 ptr++; 1826 while (*ptr == ':' 1827 || (*ptr >= '0' && *ptr <= '9') 1828 || (*ptr >= 'a' && *ptr <= 'f')) 1829 { 1830 ptr++; 1831 } 1832 1833 if (*ptr != ']') 1834 return FALSE; 1835 ptr++; 1836 } 1837 else 1838 while (*ptr && *ptr != '/' && *ptr != ':') 1839 { 1840 if (*ptr >= 'A' && *ptr <= 'Z') 1841 return FALSE; 1842 ptr++; 1843 } 1844 1845 /* Found a portnumber */ 1846 if (*ptr == ':') 1847 { 1848 apr_int64_t port = 0; 1849 1850 ptr++; 1851 schema_data = ptr; 1852 1853 while (*ptr >= '0' && *ptr <= '9') 1854 { 1855 port = 10 * port + (*ptr - '0'); 1856 ptr++; 1857 } 1858 1859 if (ptr == schema_data) 1860 return FALSE; /* Fail on "http://host:" */ 1861 1862 if (*ptr && *ptr != '/') 1863 return FALSE; /* Not a port number */ 1864 1865 if (port == 80 && strncmp(uri, "http:", 5) == 0) 1866 return FALSE; 1867 else if (port == 443 && strncmp(uri, "https:", 6) == 0) 1868 return FALSE; 1869 else if (port == 3690 && strncmp(uri, "svn:", 4) == 0) 1870 return FALSE; 1871 } 1872 1873 schema_data = ptr; 1874 1875#ifdef SVN_USE_DOS_PATHS 1876 if (schema_data && *ptr == '/') 1877 { 1878 /* If this is a file url, ptr now points to the third '/' in 1879 file:///C:/path. Check that if we have such a URL the drive 1880 letter is in uppercase. */ 1881 if (strncmp(uri, "file:", 5) == 0 && 1882 ! (*(ptr+1) >= 'A' && *(ptr+1) <= 'Z') && 1883 *(ptr+2) == ':') 1884 return FALSE; 1885 } 1886#endif /* SVN_USE_DOS_PATHS */ 1887 1888 /* Now validate the rest of the URI. */ 1889 seg = ptr; 1890 while (*ptr && (*ptr != '/')) 1891 ptr++; 1892 while(1) 1893 { 1894 apr_size_t seglen = ptr - seg; 1895 1896 if (seglen == 1 && *seg == '.') 1897 return FALSE; /* /./ */ 1898 1899 if (*ptr == '/' && *(ptr+1) == '/') 1900 return FALSE; /* // */ 1901 1902 if (! *ptr && *(ptr - 1) == '/' && ptr - 1 != uri) 1903 return FALSE; /* foo/ */ 1904 1905 if (! *ptr) 1906 break; 1907 1908 if (*ptr == '/') 1909 ptr++; 1910 1911 seg = ptr; 1912 while (*ptr && (*ptr != '/')) 1913 ptr++; 1914 } 1915 1916 ptr = schema_data; 1917 1918 while (*ptr) 1919 { 1920 if (*ptr == '%') 1921 { 1922 char digitz[3]; 1923 int val; 1924 1925 /* Can't usesvn_ctype_isxdigit() because lower case letters are 1926 not in our canonical format */ 1927 if (((*(ptr+1) < '0' || *(ptr+1) > '9')) 1928 && (*(ptr+1) < 'A' || *(ptr+1) > 'F')) 1929 return FALSE; 1930 else if (((*(ptr+2) < '0' || *(ptr+2) > '9')) 1931 && (*(ptr+2) < 'A' || *(ptr+2) > 'F')) 1932 return FALSE; 1933 1934 digitz[0] = *(++ptr); 1935 digitz[1] = *(++ptr); 1936 digitz[2] = '\0'; 1937 val = (int)strtol(digitz, NULL, 16); 1938 1939 if (svn_uri__char_validity[val]) 1940 return FALSE; /* Should not have been escaped */ 1941 } 1942 else if (*ptr != '/' && !svn_uri__char_validity[(unsigned char)*ptr]) 1943 return FALSE; /* Character should have been escaped */ 1944 ptr++; 1945 } 1946 1947 return TRUE; 1948} 1949 1950svn_error_t * 1951svn_dirent_condense_targets(const char **pcommon, 1952 apr_array_header_t **pcondensed_targets, 1953 const apr_array_header_t *targets, 1954 svn_boolean_t remove_redundancies, 1955 apr_pool_t *result_pool, 1956 apr_pool_t *scratch_pool) 1957{ 1958 int i, num_condensed = targets->nelts; 1959 svn_boolean_t *removed; 1960 apr_array_header_t *abs_targets; 1961 1962 /* Early exit when there's no data to work on. */ 1963 if (targets->nelts <= 0) 1964 { 1965 *pcommon = NULL; 1966 if (pcondensed_targets) 1967 *pcondensed_targets = NULL; 1968 return SVN_NO_ERROR; 1969 } 1970 1971 /* Get the absolute path of the first target. */ 1972 SVN_ERR(svn_dirent_get_absolute(pcommon, 1973 APR_ARRAY_IDX(targets, 0, const char *), 1974 scratch_pool)); 1975 1976 /* Early exit when there's only one dirent to work on. */ 1977 if (targets->nelts == 1) 1978 { 1979 *pcommon = apr_pstrdup(result_pool, *pcommon); 1980 if (pcondensed_targets) 1981 *pcondensed_targets = apr_array_make(result_pool, 0, 1982 sizeof(const char *)); 1983 return SVN_NO_ERROR; 1984 } 1985 1986 /* Copy the targets array, but with absolute dirents instead of 1987 relative. Also, find the pcommon argument by finding what is 1988 common in all of the absolute dirents. NOTE: This is not as 1989 efficient as it could be. The calculation of the basedir could 1990 be done in the loop below, which would save some calls to 1991 svn_dirent_get_longest_ancestor. I decided to do it this way 1992 because I thought it would be simpler, since this way, we don't 1993 even do the loop if we don't need to condense the targets. */ 1994 1995 removed = apr_pcalloc(scratch_pool, (targets->nelts * 1996 sizeof(svn_boolean_t))); 1997 abs_targets = apr_array_make(scratch_pool, targets->nelts, 1998 sizeof(const char *)); 1999 2000 APR_ARRAY_PUSH(abs_targets, const char *) = *pcommon; 2001 2002 for (i = 1; i < targets->nelts; ++i) 2003 { 2004 const char *rel = APR_ARRAY_IDX(targets, i, const char *); 2005 const char *absolute; 2006 SVN_ERR(svn_dirent_get_absolute(&absolute, rel, scratch_pool)); 2007 APR_ARRAY_PUSH(abs_targets, const char *) = absolute; 2008 *pcommon = svn_dirent_get_longest_ancestor(*pcommon, absolute, 2009 scratch_pool); 2010 } 2011 2012 *pcommon = apr_pstrdup(result_pool, *pcommon); 2013 2014 if (pcondensed_targets != NULL) 2015 { 2016 size_t basedir_len; 2017 2018 if (remove_redundancies) 2019 { 2020 /* Find the common part of each pair of targets. If 2021 common part is equal to one of the dirents, the other 2022 is a child of it, and can be removed. If a target is 2023 equal to *pcommon, it can also be removed. */ 2024 2025 /* First pass: when one non-removed target is a child of 2026 another non-removed target, remove the child. */ 2027 for (i = 0; i < abs_targets->nelts; ++i) 2028 { 2029 int j; 2030 2031 if (removed[i]) 2032 continue; 2033 2034 for (j = i + 1; j < abs_targets->nelts; ++j) 2035 { 2036 const char *abs_targets_i; 2037 const char *abs_targets_j; 2038 const char *ancestor; 2039 2040 if (removed[j]) 2041 continue; 2042 2043 abs_targets_i = APR_ARRAY_IDX(abs_targets, i, const char *); 2044 abs_targets_j = APR_ARRAY_IDX(abs_targets, j, const char *); 2045 2046 ancestor = svn_dirent_get_longest_ancestor 2047 (abs_targets_i, abs_targets_j, scratch_pool); 2048 2049 if (*ancestor == '\0') 2050 continue; 2051 2052 if (strcmp(ancestor, abs_targets_i) == 0) 2053 { 2054 removed[j] = TRUE; 2055 num_condensed--; 2056 } 2057 else if (strcmp(ancestor, abs_targets_j) == 0) 2058 { 2059 removed[i] = TRUE; 2060 num_condensed--; 2061 } 2062 } 2063 } 2064 2065 /* Second pass: when a target is the same as *pcommon, 2066 remove the target. */ 2067 for (i = 0; i < abs_targets->nelts; ++i) 2068 { 2069 const char *abs_targets_i = APR_ARRAY_IDX(abs_targets, i, 2070 const char *); 2071 2072 if ((strcmp(abs_targets_i, *pcommon) == 0) && (! removed[i])) 2073 { 2074 removed[i] = TRUE; 2075 num_condensed--; 2076 } 2077 } 2078 } 2079 2080 /* Now create the return array, and copy the non-removed items */ 2081 basedir_len = strlen(*pcommon); 2082 *pcondensed_targets = apr_array_make(result_pool, num_condensed, 2083 sizeof(const char *)); 2084 2085 for (i = 0; i < abs_targets->nelts; ++i) 2086 { 2087 const char *rel_item = APR_ARRAY_IDX(abs_targets, i, const char *); 2088 2089 /* Skip this if it's been removed. */ 2090 if (removed[i]) 2091 continue; 2092 2093 /* If a common prefix was found, condensed_targets are given 2094 relative to that prefix. */ 2095 if (basedir_len > 0) 2096 { 2097 /* Only advance our pointer past a dirent separator if 2098 REL_ITEM isn't the same as *PCOMMON. 2099 2100 If *PCOMMON is a root dirent, basedir_len will already 2101 include the closing '/', so never advance the pointer 2102 here. 2103 */ 2104 rel_item += basedir_len; 2105 if (rel_item[0] && 2106 ! svn_dirent_is_root(*pcommon, basedir_len)) 2107 rel_item++; 2108 } 2109 2110 APR_ARRAY_PUSH(*pcondensed_targets, const char *) 2111 = apr_pstrdup(result_pool, rel_item); 2112 } 2113 } 2114 2115 return SVN_NO_ERROR; 2116} 2117 2118svn_error_t * 2119svn_uri_condense_targets(const char **pcommon, 2120 apr_array_header_t **pcondensed_targets, 2121 const apr_array_header_t *targets, 2122 svn_boolean_t remove_redundancies, 2123 apr_pool_t *result_pool, 2124 apr_pool_t *scratch_pool) 2125{ 2126 int i, num_condensed = targets->nelts; 2127 apr_array_header_t *uri_targets; 2128 svn_boolean_t *removed; 2129 2130 /* Early exit when there's no data to work on. */ 2131 if (targets->nelts <= 0) 2132 { 2133 *pcommon = NULL; 2134 if (pcondensed_targets) 2135 *pcondensed_targets = NULL; 2136 return SVN_NO_ERROR; 2137 } 2138 2139 *pcommon = svn_uri_canonicalize(APR_ARRAY_IDX(targets, 0, const char *), 2140 scratch_pool); 2141 2142 /* Early exit when there's only one uri to work on. */ 2143 if (targets->nelts == 1) 2144 { 2145 *pcommon = apr_pstrdup(result_pool, *pcommon); 2146 if (pcondensed_targets) 2147 *pcondensed_targets = apr_array_make(result_pool, 0, 2148 sizeof(const char *)); 2149 return SVN_NO_ERROR; 2150 } 2151 2152 /* Find the pcommon argument by finding what is common in all of the 2153 uris. NOTE: This is not as efficient as it could be. The calculation 2154 of the basedir could be done in the loop below, which would 2155 save some calls to svn_uri_get_longest_ancestor. I decided to do it 2156 this way because I thought it would be simpler, since this way, we don't 2157 even do the loop if we don't need to condense the targets. */ 2158 2159 removed = apr_pcalloc(scratch_pool, (targets->nelts * 2160 sizeof(svn_boolean_t))); 2161 uri_targets = apr_array_make(scratch_pool, targets->nelts, 2162 sizeof(const char *)); 2163 2164 APR_ARRAY_PUSH(uri_targets, const char *) = *pcommon; 2165 2166 for (i = 1; i < targets->nelts; ++i) 2167 { 2168 const char *uri = svn_uri_canonicalize( 2169 APR_ARRAY_IDX(targets, i, const char *), 2170 scratch_pool); 2171 APR_ARRAY_PUSH(uri_targets, const char *) = uri; 2172 2173 /* If the commonmost ancestor so far is empty, there's no point 2174 in continuing to search for a common ancestor at all. But 2175 we'll keep looping for the sake of canonicalizing the 2176 targets, I suppose. */ 2177 if (**pcommon != '\0') 2178 *pcommon = svn_uri_get_longest_ancestor(*pcommon, uri, 2179 scratch_pool); 2180 } 2181 2182 *pcommon = apr_pstrdup(result_pool, *pcommon); 2183 2184 if (pcondensed_targets != NULL) 2185 { 2186 size_t basedir_len; 2187 2188 if (remove_redundancies) 2189 { 2190 /* Find the common part of each pair of targets. If 2191 common part is equal to one of the dirents, the other 2192 is a child of it, and can be removed. If a target is 2193 equal to *pcommon, it can also be removed. */ 2194 2195 /* First pass: when one non-removed target is a child of 2196 another non-removed target, remove the child. */ 2197 for (i = 0; i < uri_targets->nelts; ++i) 2198 { 2199 int j; 2200 2201 if (removed[i]) 2202 continue; 2203 2204 for (j = i + 1; j < uri_targets->nelts; ++j) 2205 { 2206 const char *uri_i; 2207 const char *uri_j; 2208 const char *ancestor; 2209 2210 if (removed[j]) 2211 continue; 2212 2213 uri_i = APR_ARRAY_IDX(uri_targets, i, const char *); 2214 uri_j = APR_ARRAY_IDX(uri_targets, j, const char *); 2215 2216 ancestor = svn_uri_get_longest_ancestor(uri_i, 2217 uri_j, 2218 scratch_pool); 2219 2220 if (*ancestor == '\0') 2221 continue; 2222 2223 if (strcmp(ancestor, uri_i) == 0) 2224 { 2225 removed[j] = TRUE; 2226 num_condensed--; 2227 } 2228 else if (strcmp(ancestor, uri_j) == 0) 2229 { 2230 removed[i] = TRUE; 2231 num_condensed--; 2232 } 2233 } 2234 } 2235 2236 /* Second pass: when a target is the same as *pcommon, 2237 remove the target. */ 2238 for (i = 0; i < uri_targets->nelts; ++i) 2239 { 2240 const char *uri_targets_i = APR_ARRAY_IDX(uri_targets, i, 2241 const char *); 2242 2243 if ((strcmp(uri_targets_i, *pcommon) == 0) && (! removed[i])) 2244 { 2245 removed[i] = TRUE; 2246 num_condensed--; 2247 } 2248 } 2249 } 2250 2251 /* Now create the return array, and copy the non-removed items */ 2252 basedir_len = strlen(*pcommon); 2253 *pcondensed_targets = apr_array_make(result_pool, num_condensed, 2254 sizeof(const char *)); 2255 2256 for (i = 0; i < uri_targets->nelts; ++i) 2257 { 2258 const char *rel_item = APR_ARRAY_IDX(uri_targets, i, const char *); 2259 2260 /* Skip this if it's been removed. */ 2261 if (removed[i]) 2262 continue; 2263 2264 /* If a common prefix was found, condensed_targets are given 2265 relative to that prefix. */ 2266 if (basedir_len > 0) 2267 { 2268 /* Only advance our pointer past a dirent separator if 2269 REL_ITEM isn't the same as *PCOMMON. 2270 2271 If *PCOMMON is a root dirent, basedir_len will already 2272 include the closing '/', so never advance the pointer 2273 here. 2274 */ 2275 rel_item += basedir_len; 2276 if ((rel_item[0] == '/') || 2277 (rel_item[0] && !svn_uri_is_root(*pcommon, basedir_len))) 2278 { 2279 rel_item++; 2280 } 2281 } 2282 2283 APR_ARRAY_PUSH(*pcondensed_targets, const char *) 2284 = svn_path_uri_decode(rel_item, result_pool); 2285 } 2286 } 2287 2288 return SVN_NO_ERROR; 2289} 2290 2291svn_error_t * 2292svn_dirent_is_under_root(svn_boolean_t *under_root, 2293 const char **result_path, 2294 const char *base_path, 2295 const char *path, 2296 apr_pool_t *result_pool) 2297{ 2298 apr_status_t status; 2299 char *full_path; 2300 2301 *under_root = FALSE; 2302 if (result_path) 2303 *result_path = NULL; 2304 2305 status = apr_filepath_merge(&full_path, 2306 base_path, 2307 path, 2308 APR_FILEPATH_NOTABOVEROOT 2309 | APR_FILEPATH_SECUREROOTTEST, 2310 result_pool); 2311 2312 if (status == APR_SUCCESS) 2313 { 2314 if (result_path) 2315 *result_path = svn_dirent_canonicalize(full_path, result_pool); 2316 *under_root = TRUE; 2317 return SVN_NO_ERROR; 2318 } 2319 else if (status == APR_EABOVEROOT) 2320 { 2321 *under_root = FALSE; 2322 return SVN_NO_ERROR; 2323 } 2324 2325 return svn_error_wrap_apr(status, NULL); 2326} 2327 2328svn_error_t * 2329svn_uri_get_dirent_from_file_url(const char **dirent, 2330 const char *url, 2331 apr_pool_t *pool) 2332{ 2333 const char *hostname, *path; 2334 2335 SVN_ERR_ASSERT(svn_uri_is_canonical(url, pool)); 2336 2337 /* Verify that the URL is well-formed (loosely) */ 2338 2339 /* First, check for the "file://" prefix. */ 2340 if (strncmp(url, "file://", 7) != 0) 2341 return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL, 2342 _("Local URL '%s' does not contain 'file://' " 2343 "prefix"), url); 2344 2345 /* Find the HOSTNAME portion and the PATH portion of the URL. The host 2346 name is between the "file://" prefix and the next occurrence of '/'. We 2347 are considering everything from that '/' until the end of the URL to be 2348 the absolute path portion of the URL. 2349 If we got just "file://", treat it the same as "file:///". */ 2350 hostname = url + 7; 2351 path = strchr(hostname, '/'); 2352 if (path) 2353 hostname = apr_pstrmemdup(pool, hostname, path - hostname); 2354 else 2355 path = "/"; 2356 2357 /* URI-decode HOSTNAME, and set it to NULL if it is "" or "localhost". */ 2358 if (*hostname == '\0') 2359 hostname = NULL; 2360 else 2361 { 2362 hostname = svn_path_uri_decode(hostname, pool); 2363 if (strcmp(hostname, "localhost") == 0) 2364 hostname = NULL; 2365 } 2366 2367 /* Duplicate the URL, starting at the top of the path. 2368 At the same time, we URI-decode the path. */ 2369#ifdef SVN_USE_DOS_PATHS 2370 /* On Windows, we'll typically have to skip the leading / if the 2371 path starts with a drive letter. Like most Web browsers, We 2372 support two variants of this scheme: 2373 2374 file:///X:/path and 2375 file:///X|/path 2376 2377 Note that, at least on WinNT and above, file:////./X:/path will 2378 also work, so we must make sure the transformation doesn't break 2379 that, and file:///path (that looks within the current drive 2380 only) should also keep working. 2381 If we got a non-empty hostname other than localhost, we convert this 2382 into an UNC path. In this case, we obviously don't strip the slash 2383 even if the path looks like it starts with a drive letter. 2384 */ 2385 { 2386 static const char valid_drive_letters[] = 2387 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; 2388 /* Casting away const! */ 2389 char *dup_path = (char *)svn_path_uri_decode(path, pool); 2390 2391 /* This check assumes ':' and '|' are already decoded! */ 2392 if (!hostname && dup_path[1] && strchr(valid_drive_letters, dup_path[1]) 2393 && (dup_path[2] == ':' || dup_path[2] == '|')) 2394 { 2395 /* Skip the leading slash. */ 2396 ++dup_path; 2397 2398 if (dup_path[1] == '|') 2399 dup_path[1] = ':'; 2400 2401 if (dup_path[2] == '/' || dup_path[2] == '\0') 2402 { 2403 if (dup_path[2] == '\0') 2404 { 2405 /* A valid dirent for the driveroot must be like "C:/" instead of 2406 just "C:" or svn_dirent_join() will use the current directory 2407 on the drive instead */ 2408 char *new_path = apr_pcalloc(pool, 4); 2409 new_path[0] = dup_path[0]; 2410 new_path[1] = ':'; 2411 new_path[2] = '/'; 2412 new_path[3] = '\0'; 2413 dup_path = new_path; 2414 } 2415 } 2416 } 2417 if (hostname) 2418 { 2419 if (dup_path[0] == '/' && dup_path[1] == '\0') 2420 return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL, 2421 _("Local URL '%s' contains only a hostname, " 2422 "no path"), url); 2423 2424 /* We still know that the path starts with a slash. */ 2425 *dirent = apr_pstrcat(pool, "//", hostname, dup_path, SVN_VA_NULL); 2426 } 2427 else 2428 *dirent = dup_path; 2429 } 2430#else /* !SVN_USE_DOS_PATHS */ 2431 /* Currently, the only hostnames we are allowing on non-Win32 platforms 2432 are the empty string and 'localhost'. */ 2433 if (hostname) 2434 return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL, 2435 _("Local URL '%s' contains unsupported hostname"), 2436 url); 2437 2438 *dirent = svn_path_uri_decode(path, pool); 2439#endif /* SVN_USE_DOS_PATHS */ 2440 return SVN_NO_ERROR; 2441} 2442 2443svn_error_t * 2444svn_uri_get_file_url_from_dirent(const char **url, 2445 const char *dirent, 2446 apr_pool_t *pool) 2447{ 2448 assert(svn_dirent_is_canonical(dirent, pool)); 2449 2450 SVN_ERR(svn_dirent_get_absolute(&dirent, dirent, pool)); 2451 2452 dirent = svn_path_uri_encode(dirent, pool); 2453 2454#ifndef SVN_USE_DOS_PATHS 2455 if (dirent[0] == '/' && dirent[1] == '\0') 2456 dirent = NULL; /* "file://" is the canonical form of "file:///" */ 2457 2458 *url = apr_pstrcat(pool, "file://", dirent, SVN_VA_NULL); 2459#else 2460 if (dirent[0] == '/') 2461 { 2462 /* Handle UNC paths //server/share -> file://server/share */ 2463 assert(dirent[1] == '/'); /* Expect UNC, not non-absolute */ 2464 2465 *url = apr_pstrcat(pool, "file:", dirent, SVN_VA_NULL); 2466 } 2467 else 2468 { 2469 char *uri = apr_pstrcat(pool, "file:///", dirent, SVN_VA_NULL); 2470 apr_size_t len = 8 /* strlen("file:///") */ + strlen(dirent); 2471 2472 /* "C:/" is a canonical dirent on Windows, 2473 but "file:///C:/" is not a canonical uri */ 2474 if (uri[len-1] == '/') 2475 uri[len-1] = '\0'; 2476 2477 *url = uri; 2478 } 2479#endif 2480 2481 return SVN_NO_ERROR; 2482} 2483 2484 2485 2486/* -------------- The fspath API (see private/svn_fspath.h) -------------- */ 2487 2488svn_boolean_t 2489svn_fspath__is_canonical(const char *fspath) 2490{ 2491 return fspath[0] == '/' && relpath_is_canonical(fspath + 1); 2492} 2493 2494 2495const char * 2496svn_fspath__canonicalize(const char *fspath, 2497 apr_pool_t *pool) 2498{ 2499 if ((fspath[0] == '/') && (fspath[1] == '\0')) 2500 return "/"; 2501 2502 return apr_pstrcat(pool, "/", svn_relpath_canonicalize(fspath, pool), 2503 SVN_VA_NULL); 2504} 2505 2506 2507svn_boolean_t 2508svn_fspath__is_root(const char *fspath, apr_size_t len) 2509{ 2510 /* directory is root if it's equal to '/' */ 2511 return (len == 1 && fspath[0] == '/'); 2512} 2513 2514 2515const char * 2516svn_fspath__skip_ancestor(const char *parent_fspath, 2517 const char *child_fspath) 2518{ 2519 assert(svn_fspath__is_canonical(parent_fspath)); 2520 assert(svn_fspath__is_canonical(child_fspath)); 2521 2522 return svn_relpath_skip_ancestor(parent_fspath + 1, child_fspath + 1); 2523} 2524 2525 2526const char * 2527svn_fspath__dirname(const char *fspath, 2528 apr_pool_t *pool) 2529{ 2530 assert(svn_fspath__is_canonical(fspath)); 2531 2532 if (fspath[0] == '/' && fspath[1] == '\0') 2533 return apr_pstrdup(pool, fspath); 2534 else 2535 return apr_pstrcat(pool, "/", svn_relpath_dirname(fspath + 1, pool), 2536 SVN_VA_NULL); 2537} 2538 2539 2540const char * 2541svn_fspath__basename(const char *fspath, 2542 apr_pool_t *pool) 2543{ 2544 const char *result; 2545 assert(svn_fspath__is_canonical(fspath)); 2546 2547 result = svn_relpath_basename(fspath + 1, pool); 2548 2549 assert(strchr(result, '/') == NULL); 2550 return result; 2551} 2552 2553void 2554svn_fspath__split(const char **dirpath, 2555 const char **base_name, 2556 const char *fspath, 2557 apr_pool_t *result_pool) 2558{ 2559 assert(dirpath != base_name); 2560 2561 if (dirpath) 2562 *dirpath = svn_fspath__dirname(fspath, result_pool); 2563 2564 if (base_name) 2565 *base_name = svn_fspath__basename(fspath, result_pool); 2566} 2567 2568char * 2569svn_fspath__join(const char *fspath, 2570 const char *relpath, 2571 apr_pool_t *result_pool) 2572{ 2573 char *result; 2574 assert(svn_fspath__is_canonical(fspath)); 2575 assert(svn_relpath_is_canonical(relpath)); 2576 2577 if (relpath[0] == '\0') 2578 result = apr_pstrdup(result_pool, fspath); 2579 else if (fspath[1] == '\0') 2580 result = apr_pstrcat(result_pool, "/", relpath, SVN_VA_NULL); 2581 else 2582 result = apr_pstrcat(result_pool, fspath, "/", relpath, SVN_VA_NULL); 2583 2584 assert(svn_fspath__is_canonical(result)); 2585 return result; 2586} 2587 2588char * 2589svn_fspath__get_longest_ancestor(const char *fspath1, 2590 const char *fspath2, 2591 apr_pool_t *result_pool) 2592{ 2593 char *result; 2594 assert(svn_fspath__is_canonical(fspath1)); 2595 assert(svn_fspath__is_canonical(fspath2)); 2596 2597 result = apr_pstrcat(result_pool, "/", 2598 svn_relpath_get_longest_ancestor(fspath1 + 1, 2599 fspath2 + 1, 2600 result_pool), 2601 SVN_VA_NULL); 2602 2603 assert(svn_fspath__is_canonical(result)); 2604 return result; 2605} 2606 2607 2608 2609 2610/* -------------- The urlpath API (see private/svn_fspath.h) ------------- */ 2611 2612const char * 2613svn_urlpath__canonicalize(const char *uri, 2614 apr_pool_t *pool) 2615{ 2616 if (svn_path_is_url(uri)) 2617 { 2618 uri = svn_uri_canonicalize(uri, pool); 2619 } 2620 else 2621 { 2622 uri = svn_fspath__canonicalize(uri, pool); 2623 /* Do a little dance to normalize hex encoding. */ 2624 uri = svn_path_uri_decode(uri, pool); 2625 uri = svn_path_uri_encode(uri, pool); 2626 } 2627 return uri; 2628} 2629 2630 2631/* -------------- The cert API (see private/svn_cert.h) ------------- */ 2632 2633svn_boolean_t 2634svn_cert__match_dns_identity(svn_string_t *pattern, svn_string_t *hostname) 2635{ 2636 apr_size_t pattern_pos = 0, hostname_pos = 0; 2637 2638 /* support leading wildcards that composed of the only character in the 2639 * left-most label. */ 2640 if (pattern->len >= 2 && 2641 pattern->data[pattern_pos] == '*' && 2642 pattern->data[pattern_pos + 1] == '.') 2643 { 2644 while (hostname_pos < hostname->len && 2645 hostname->data[hostname_pos] != '.') 2646 { 2647 hostname_pos++; 2648 } 2649 /* Assume that the wildcard must match something. Rule 2 says 2650 * that *.example.com should not match example.com. If the wildcard 2651 * ends up not matching anything then it matches .example.com which 2652 * seems to be essentially the same as just example.com */ 2653 if (hostname_pos == 0) 2654 return FALSE; 2655 2656 pattern_pos++; 2657 } 2658 2659 while (pattern_pos < pattern->len && hostname_pos < hostname->len) 2660 { 2661 char pattern_c = pattern->data[pattern_pos]; 2662 char hostname_c = hostname->data[hostname_pos]; 2663 2664 /* fold case as described in RFC 4343. 2665 * Note: We actually convert to lowercase, since our URI 2666 * canonicalization code converts to lowercase and generally 2667 * most certs are issued with lowercase DNS names, meaning 2668 * this avoids the fold operation in most cases. The RFC 2669 * suggests the opposite transformation, but doesn't require 2670 * any specific implementation in any case. It is critical 2671 * that this folding be locale independent so you can't use 2672 * tolower(). */ 2673 pattern_c = canonicalize_to_lower(pattern_c); 2674 hostname_c = canonicalize_to_lower(hostname_c); 2675 2676 if (pattern_c != hostname_c) 2677 { 2678 /* doesn't match */ 2679 return FALSE; 2680 } 2681 else 2682 { 2683 /* characters match so skip both */ 2684 pattern_pos++; 2685 hostname_pos++; 2686 } 2687 } 2688 2689 /* ignore a trailing period on the hostname since this has no effect on the 2690 * security of the matching. See the following for the long explanation as 2691 * to why: 2692 * https://bugzilla.mozilla.org/show_bug.cgi?id=134402#c28 2693 */ 2694 if (pattern_pos == pattern->len && 2695 hostname_pos == hostname->len - 1 && 2696 hostname->data[hostname_pos] == '.') 2697 hostname_pos++; 2698 2699 if (pattern_pos != pattern->len || hostname_pos != hostname->len) 2700 { 2701 /* end didn't match */ 2702 return FALSE; 2703 } 2704 2705 return TRUE; 2706} 2707