path.c revision 362181
1/* 2 * paths.c: a path manipulation library using svn_stringbuf_t 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25 26#include <string.h> 27#include <assert.h> 28 29#include <apr_file_info.h> 30#include <apr_lib.h> 31#include <apr_uri.h> 32 33#include "svn_string.h" 34#include "svn_dirent_uri.h" 35#include "svn_path.h" 36#include "svn_private_config.h" /* for SVN_PATH_LOCAL_SEPARATOR */ 37#include "svn_utf.h" 38#include "svn_io.h" /* for svn_io_stat() */ 39#include "svn_ctype.h" 40 41#include "dirent_uri.h" 42 43 44/* The canonical empty path. Can this be changed? Well, change the empty 45 test below and the path library will work, not so sure about the fs/wc 46 libraries. */ 47#define SVN_EMPTY_PATH "" 48 49/* TRUE if s is the canonical empty path, FALSE otherwise */ 50#define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0') 51 52/* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can 53 this be changed? Well, the path library will work, not so sure about 54 the OS! */ 55#define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.') 56 57 58 59 60#ifndef NDEBUG 61/* This function is an approximation of svn_path_is_canonical. 62 * It is supposed to be used in functions that do not have access 63 * to a pool, but still want to assert that a path is canonical. 64 * 65 * PATH with length LEN is assumed to be canonical if it isn't 66 * the platform's empty path (see definition of SVN_PATH_IS_PLATFORM_EMPTY), 67 * and does not contain "/./", and any one of the following 68 * conditions is also met: 69 * 70 * 1. PATH has zero length 71 * 2. PATH is the root directory (what exactly a root directory is 72 * depends on the platform) 73 * 3. PATH is not a root directory and does not end with '/' 74 * 75 * If possible, please use svn_path_is_canonical instead. 76 */ 77static svn_boolean_t 78is_canonical(const char *path, 79 apr_size_t len) 80{ 81 return (! SVN_PATH_IS_PLATFORM_EMPTY(path, len) 82 && strstr(path, "/./") == NULL 83 && (len == 0 84 || (len == 1 && path[0] == '/') 85 || (path[len-1] != '/') 86#if defined(WIN32) || defined(__CYGWIN__) 87 || svn_dirent_is_root(path, len) 88#endif 89 )); 90} 91#endif 92 93 94/* functionality of svn_path_is_canonical but without the deprecation */ 95static svn_boolean_t 96svn_path_is_canonical_internal(const char *path, apr_pool_t *pool) 97{ 98 return svn_uri_is_canonical(path, pool) || 99 svn_dirent_is_canonical(path, pool) || 100 svn_relpath_is_canonical(path); 101} 102 103svn_boolean_t 104svn_path_is_canonical(const char *path, apr_pool_t *pool) 105{ 106 return svn_path_is_canonical_internal(path, pool); 107} 108 109/* functionality of svn_path_join but without the deprecation */ 110static char * 111svn_path_join_internal(const char *base, 112 const char *component, 113 apr_pool_t *pool) 114{ 115 apr_size_t blen = strlen(base); 116 apr_size_t clen = strlen(component); 117 char *path; 118 119 assert(svn_path_is_canonical_internal(base, pool)); 120 assert(svn_path_is_canonical_internal(component, pool)); 121 122 /* If the component is absolute, then return it. */ 123 if (*component == '/') 124 return apr_pmemdup(pool, component, clen + 1); 125 126 /* If either is empty return the other */ 127 if (SVN_PATH_IS_EMPTY(base)) 128 return apr_pmemdup(pool, component, clen + 1); 129 if (SVN_PATH_IS_EMPTY(component)) 130 return apr_pmemdup(pool, base, blen + 1); 131 132 if (blen == 1 && base[0] == '/') 133 blen = 0; /* Ignore base, just return separator + component */ 134 135 /* Construct the new, combined path. */ 136 path = apr_palloc(pool, blen + 1 + clen + 1); 137 memcpy(path, base, blen); 138 path[blen] = '/'; 139 memcpy(path + blen + 1, component, clen + 1); 140 141 return path; 142} 143 144char *svn_path_join(const char *base, 145 const char *component, 146 apr_pool_t *pool) 147{ 148 return svn_path_join_internal(base, component, pool); 149} 150 151char *svn_path_join_many(apr_pool_t *pool, const char *base, ...) 152{ 153#define MAX_SAVED_LENGTHS 10 154 apr_size_t saved_lengths[MAX_SAVED_LENGTHS]; 155 apr_size_t total_len; 156 int nargs; 157 va_list va; 158 const char *s; 159 apr_size_t len; 160 char *path; 161 char *p; 162 svn_boolean_t base_is_empty = FALSE, base_is_root = FALSE; 163 int base_arg = 0; 164 165 total_len = strlen(base); 166 167 assert(svn_path_is_canonical_internal(base, pool)); 168 169 if (total_len == 1 && *base == '/') 170 base_is_root = TRUE; 171 else if (SVN_PATH_IS_EMPTY(base)) 172 { 173 total_len = sizeof(SVN_EMPTY_PATH) - 1; 174 base_is_empty = TRUE; 175 } 176 177 saved_lengths[0] = total_len; 178 179 /* Compute the length of the resulting string. */ 180 181 nargs = 0; 182 va_start(va, base); 183 while ((s = va_arg(va, const char *)) != NULL) 184 { 185 len = strlen(s); 186 187 assert(svn_path_is_canonical_internal(s, pool)); 188 189 if (SVN_PATH_IS_EMPTY(s)) 190 continue; 191 192 if (nargs++ < MAX_SAVED_LENGTHS) 193 saved_lengths[nargs] = len; 194 195 if (*s == '/') 196 { 197 /* an absolute path. skip all components to this point and reset 198 the total length. */ 199 total_len = len; 200 base_arg = nargs; 201 base_is_root = len == 1; 202 base_is_empty = FALSE; 203 } 204 else if (nargs == base_arg 205 || (nargs == base_arg + 1 && base_is_root) 206 || base_is_empty) 207 { 208 /* if we have skipped everything up to this arg, then the base 209 and all prior components are empty. just set the length to 210 this component; do not add a separator. If the base is empty 211 we can now ignore it. */ 212 if (base_is_empty) 213 { 214 base_is_empty = FALSE; 215 total_len = 0; 216 } 217 total_len += len; 218 } 219 else 220 { 221 total_len += 1 + len; 222 } 223 } 224 va_end(va); 225 226 /* base == "/" and no further components. just return that. */ 227 if (base_is_root && total_len == 1) 228 return apr_pmemdup(pool, "/", 2); 229 230 /* we got the total size. allocate it, with room for a NULL character. */ 231 path = p = apr_palloc(pool, total_len + 1); 232 233 /* if we aren't supposed to skip forward to an absolute component, and if 234 this is not an empty base that we are skipping, then copy the base 235 into the output. */ 236 if (base_arg == 0 && ! (SVN_PATH_IS_EMPTY(base) && ! base_is_empty)) 237 { 238 if (SVN_PATH_IS_EMPTY(base)) 239 memcpy(p, SVN_EMPTY_PATH, len = saved_lengths[0]); 240 else 241 memcpy(p, base, len = saved_lengths[0]); 242 p += len; 243 } 244 245 nargs = 0; 246 va_start(va, base); 247 while ((s = va_arg(va, const char *)) != NULL) 248 { 249 if (SVN_PATH_IS_EMPTY(s)) 250 continue; 251 252 if (++nargs < base_arg) 253 continue; 254 255 if (nargs < MAX_SAVED_LENGTHS) 256 len = saved_lengths[nargs]; 257 else 258 len = strlen(s); 259 260 /* insert a separator if we aren't copying in the first component 261 (which can happen when base_arg is set). also, don't put in a slash 262 if the prior character is a slash (occurs when prior component 263 is "/"). */ 264 if (p != path && p[-1] != '/') 265 *p++ = '/'; 266 267 /* copy the new component and advance the pointer */ 268 memcpy(p, s, len); 269 p += len; 270 } 271 va_end(va); 272 273 *p = '\0'; 274 assert((apr_size_t)(p - path) == total_len); 275 276 return path; 277} 278 279 280 281apr_size_t 282svn_path_component_count(const char *path) 283{ 284 apr_size_t count = 0; 285 286 assert(is_canonical(path, strlen(path))); 287 288 while (*path) 289 { 290 const char *start; 291 292 while (*path == '/') 293 ++path; 294 295 start = path; 296 297 while (*path && *path != '/') 298 ++path; 299 300 if (path != start) 301 ++count; 302 } 303 304 return count; 305} 306 307 308/* Return the length of substring necessary to encompass the entire 309 * previous path segment in PATH, which should be a LEN byte string. 310 * 311 * A trailing slash will not be included in the returned length except 312 * in the case in which PATH is absolute and there are no more 313 * previous segments. 314 */ 315static apr_size_t 316previous_segment(const char *path, 317 apr_size_t len) 318{ 319 if (len == 0) 320 return 0; 321 322 while (len > 0 && path[--len] != '/') 323 ; 324 325 if (len == 0 && path[0] == '/') 326 return 1; 327 else 328 return len; 329} 330 331 332void 333svn_path_add_component(svn_stringbuf_t *path, 334 const char *component) 335{ 336 apr_size_t len = strlen(component); 337 338 assert(is_canonical(path->data, path->len)); 339 assert(is_canonical(component, strlen(component))); 340 341 /* Append a dir separator, but only if this path is neither empty 342 nor consists of a single dir separator already. */ 343 if ((! SVN_PATH_IS_EMPTY(path->data)) 344 && (! ((path->len == 1) && (*(path->data) == '/')))) 345 { 346 char dirsep = '/'; 347 svn_stringbuf_appendbytes(path, &dirsep, sizeof(dirsep)); 348 } 349 350 svn_stringbuf_appendbytes(path, component, len); 351} 352 353 354void 355svn_path_remove_component(svn_stringbuf_t *path) 356{ 357 assert(is_canonical(path->data, path->len)); 358 359 path->len = previous_segment(path->data, path->len); 360 path->data[path->len] = '\0'; 361} 362 363 364void 365svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n) 366{ 367 while (n > 0) 368 { 369 svn_path_remove_component(path); 370 n--; 371 } 372} 373 374 375char * 376svn_path_dirname(const char *path, apr_pool_t *pool) 377{ 378 apr_size_t len = strlen(path); 379 380 assert(svn_path_is_canonical_internal(path, pool)); 381 382 return apr_pstrmemdup(pool, path, previous_segment(path, len)); 383} 384 385 386char * 387svn_path_basename(const char *path, apr_pool_t *pool) 388{ 389 apr_size_t len = strlen(path); 390 apr_size_t start; 391 392 assert(svn_path_is_canonical_internal(path, pool)); 393 394 if (len == 1 && path[0] == '/') 395 start = 0; 396 else 397 { 398 start = len; 399 while (start > 0 && path[start - 1] != '/') 400 --start; 401 } 402 403 return apr_pstrmemdup(pool, path + start, len - start); 404} 405 406int 407svn_path_is_empty(const char *path) 408{ 409 assert(is_canonical(path, strlen(path))); 410 411 if (SVN_PATH_IS_EMPTY(path)) 412 return 1; 413 414 return 0; 415} 416 417int 418svn_path_compare_paths(const char *path1, 419 const char *path2) 420{ 421 apr_size_t path1_len = strlen(path1); 422 apr_size_t path2_len = strlen(path2); 423 apr_size_t min_len = ((path1_len < path2_len) ? path1_len : path2_len); 424 apr_size_t i = 0; 425 426 assert(is_canonical(path1, path1_len)); 427 assert(is_canonical(path2, path2_len)); 428 429 /* Skip past common prefix. */ 430 while (i < min_len && path1[i] == path2[i]) 431 ++i; 432 433 /* Are the paths exactly the same? */ 434 if ((path1_len == path2_len) && (i >= min_len)) 435 return 0; 436 437 /* Children of paths are greater than their parents, but less than 438 greater siblings of their parents. */ 439 if ((path1[i] == '/') && (path2[i] == 0)) 440 return 1; 441 if ((path2[i] == '/') && (path1[i] == 0)) 442 return -1; 443 if (path1[i] == '/') 444 return -1; 445 if (path2[i] == '/') 446 return 1; 447 448 /* Common prefix was skipped above, next character is compared to 449 determine order. We need to use an unsigned comparison, though, 450 so a "next character" of NULL (0x00) sorts numerically 451 smallest. */ 452 return (unsigned char)(path1[i]) < (unsigned char)(path2[i]) ? -1 : 1; 453} 454 455/* Return the string length of the longest common ancestor of PATH1 and PATH2. 456 * 457 * This function handles everything except the URL-handling logic 458 * of svn_path_get_longest_ancestor, and assumes that PATH1 and 459 * PATH2 are *not* URLs. 460 * 461 * If the two paths do not share a common ancestor, return 0. 462 * 463 * New strings are allocated in POOL. 464 */ 465static apr_size_t 466get_path_ancestor_length(const char *path1, 467 const char *path2, 468 apr_pool_t *pool) 469{ 470 apr_size_t path1_len, path2_len; 471 apr_size_t i = 0; 472 apr_size_t last_dirsep = 0; 473 474 path1_len = strlen(path1); 475 path2_len = strlen(path2); 476 477 if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2)) 478 return 0; 479 480 while (path1[i] == path2[i]) 481 { 482 /* Keep track of the last directory separator we hit. */ 483 if (path1[i] == '/') 484 last_dirsep = i; 485 486 i++; 487 488 /* If we get to the end of either path, break out. */ 489 if ((i == path1_len) || (i == path2_len)) 490 break; 491 } 492 493 /* two special cases: 494 1. '/' is the longest common ancestor of '/' and '/foo' 495 2. '/' is the longest common ancestor of '/rif' and '/raf' */ 496 if (i == 1 && path1[0] == '/' && path2[0] == '/') 497 return 1; 498 499 /* last_dirsep is now the offset of the last directory separator we 500 crossed before reaching a non-matching byte. i is the offset of 501 that non-matching byte. */ 502 if (((i == path1_len) && (path2[i] == '/')) 503 || ((i == path2_len) && (path1[i] == '/')) 504 || ((i == path1_len) && (i == path2_len))) 505 return i; 506 else 507 if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/') 508 return 1; 509 return last_dirsep; 510} 511 512 513char * 514svn_path_get_longest_ancestor(const char *path1, 515 const char *path2, 516 apr_pool_t *pool) 517{ 518 svn_boolean_t path1_is_url = svn_path_is_url(path1); 519 svn_boolean_t path2_is_url = svn_path_is_url(path2); 520 521 /* Are we messing with URLs? If we have a mix of URLs and non-URLs, 522 there's nothing common between them. */ 523 if (path1_is_url && path2_is_url) 524 { 525 return svn_uri_get_longest_ancestor(path1, path2, pool); 526 } 527 else if ((! path1_is_url) && (! path2_is_url)) 528 { 529 return apr_pstrndup(pool, path1, 530 get_path_ancestor_length(path1, path2, pool)); 531 } 532 else 533 { 534 /* A URL and a non-URL => no common prefix */ 535 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH)); 536 } 537} 538 539const char * 540svn_path_is_child(const char *path1, 541 const char *path2, 542 apr_pool_t *pool) 543{ 544 apr_size_t i; 545 546 /* assert (is_canonical (path1, strlen (path1))); ### Expensive strlen */ 547 /* assert (is_canonical (path2, strlen (path2))); ### Expensive strlen */ 548 549 /* Allow "" and "foo" to be parent/child */ 550 if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */ 551 { 552 if (SVN_PATH_IS_EMPTY(path2) /* "" not a child */ 553 || path2[0] == '/') /* "/foo" not a child */ 554 return NULL; 555 else 556 /* everything else is child */ 557 return pool ? apr_pstrdup(pool, path2) : path2; 558 } 559 560 /* Reach the end of at least one of the paths. How should we handle 561 things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't 562 appear to arise in the current Subversion code, it's not clear to me 563 if they should be parent/child or not. */ 564 for (i = 0; path1[i] && path2[i]; i++) 565 if (path1[i] != path2[i]) 566 return NULL; 567 568 /* There are two cases that are parent/child 569 ... path1[i] == '\0' 570 .../foo path2[i] == '/' 571 or 572 / path1[i] == '\0' 573 /foo path2[i] != '/' 574 */ 575 if (path1[i] == '\0' && path2[i]) 576 { 577 if (path2[i] == '/') 578 return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1; 579 else if (i == 1 && path1[0] == '/') 580 return pool ? apr_pstrdup(pool, path2 + 1) : path2 + 1; 581 } 582 583 /* Otherwise, path2 isn't a child. */ 584 return NULL; 585} 586 587 588svn_boolean_t 589svn_path_is_ancestor(const char *path1, const char *path2) 590{ 591 apr_size_t path1_len = strlen(path1); 592 593 /* If path1 is empty and path2 is not absoulte, then path1 is an ancestor. */ 594 if (SVN_PATH_IS_EMPTY(path1)) 595 return *path2 != '/'; 596 597 /* If path1 is a prefix of path2, then: 598 - If path1 ends in a path separator, 599 - If the paths are of the same length 600 OR 601 - path2 starts a new path component after the common prefix, 602 then path1 is an ancestor. */ 603 if (strncmp(path1, path2, path1_len) == 0) 604 return path1[path1_len - 1] == '/' 605 || (path2[path1_len] == '/' || path2[path1_len] == '\0'); 606 607 return FALSE; 608} 609 610 611apr_array_header_t * 612svn_path_decompose(const char *path, 613 apr_pool_t *pool) 614{ 615 apr_size_t i, oldi; 616 617 apr_array_header_t *components = 618 apr_array_make(pool, 1, sizeof(const char *)); 619 620 assert(svn_path_is_canonical_internal(path, pool)); 621 622 if (SVN_PATH_IS_EMPTY(path)) 623 return components; /* ### Should we return a "" component? */ 624 625 /* If PATH is absolute, store the '/' as the first component. */ 626 i = oldi = 0; 627 if (path[i] == '/') 628 { 629 char dirsep = '/'; 630 631 APR_ARRAY_PUSH(components, const char *) 632 = apr_pstrmemdup(pool, &dirsep, sizeof(dirsep)); 633 634 i++; 635 oldi++; 636 if (path[i] == '\0') /* path is a single '/' */ 637 return components; 638 } 639 640 do 641 { 642 if ((path[i] == '/') || (path[i] == '\0')) 643 { 644 if (SVN_PATH_IS_PLATFORM_EMPTY(path + oldi, i - oldi)) 645 APR_ARRAY_PUSH(components, const char *) = SVN_EMPTY_PATH; 646 else 647 APR_ARRAY_PUSH(components, const char *) 648 = apr_pstrmemdup(pool, path + oldi, i - oldi); 649 650 i++; 651 oldi = i; /* skipping past the dirsep */ 652 continue; 653 } 654 i++; 655 } 656 while (path[i-1]); 657 658 return components; 659} 660 661 662const char * 663svn_path_compose(const apr_array_header_t *components, 664 apr_pool_t *pool) 665{ 666 apr_size_t *lengths = apr_palloc(pool, components->nelts*sizeof(*lengths)); 667 apr_size_t max_length = components->nelts; 668 char *path; 669 char *p; 670 int i; 671 672 /* Get the length of each component so a total length can be 673 calculated. */ 674 for (i = 0; i < components->nelts; ++i) 675 { 676 apr_size_t l = strlen(APR_ARRAY_IDX(components, i, const char *)); 677 lengths[i] = l; 678 max_length += l; 679 } 680 681 path = apr_palloc(pool, max_length + 1); 682 p = path; 683 684 for (i = 0; i < components->nelts; ++i) 685 { 686 /* Append a '/' to the path. Handle the case with an absolute 687 path where a '/' appears in the first component. Only append 688 a '/' if the component is the second component that does not 689 follow a "/" first component; or it is the third or later 690 component. */ 691 if (i > 1 || 692 (i == 1 && strcmp("/", APR_ARRAY_IDX(components, 693 0, 694 const char *)) != 0)) 695 { 696 *p++ = '/'; 697 } 698 699 memcpy(p, APR_ARRAY_IDX(components, i, const char *), lengths[i]); 700 p += lengths[i]; 701 } 702 703 *p = '\0'; 704 705 return path; 706} 707 708 709svn_boolean_t 710svn_path_is_single_path_component(const char *name) 711{ 712 assert(is_canonical(name, strlen(name))); 713 714 /* Can't be empty or `..' */ 715 if (SVN_PATH_IS_EMPTY(name) 716 || (name[0] == '.' && name[1] == '.' && name[2] == '\0')) 717 return FALSE; 718 719 /* Slashes are bad, m'kay... */ 720 if (strchr(name, '/') != NULL) 721 return FALSE; 722 723 /* It is valid. */ 724 return TRUE; 725} 726 727 728svn_boolean_t 729svn_path_is_dotpath_present(const char *path) 730{ 731 size_t len; 732 733 /* The empty string does not have a dotpath */ 734 if (path[0] == '\0') 735 return FALSE; 736 737 /* Handle "." or a leading "./" */ 738 if (path[0] == '.' && (path[1] == '\0' || path[1] == '/')) 739 return TRUE; 740 741 /* Paths of length 1 (at this point) have no dotpath present. */ 742 if (path[1] == '\0') 743 return FALSE; 744 745 /* If any segment is "/./", then a dotpath is present. */ 746 if (strstr(path, "/./") != NULL) 747 return TRUE; 748 749 /* Does the path end in "/." ? */ 750 len = strlen(path); 751 return path[len - 2] == '/' && path[len - 1] == '.'; 752} 753 754svn_boolean_t 755svn_path_is_backpath_present(const char *path) 756{ 757 size_t len; 758 759 /* 0 and 1-length paths do not have a backpath */ 760 if (path[0] == '\0' || path[1] == '\0') 761 return FALSE; 762 763 /* Handle ".." or a leading "../" */ 764 if (path[0] == '.' && path[1] == '.' && (path[2] == '\0' || path[2] == '/')) 765 return TRUE; 766 767 /* Paths of length 2 (at this point) have no backpath present. */ 768 if (path[2] == '\0') 769 return FALSE; 770 771 /* If any segment is "..", then a backpath is present. */ 772 if (strstr(path, "/../") != NULL) 773 return TRUE; 774 775 /* Does the path end in "/.." ? */ 776 len = strlen(path); 777 return path[len - 3] == '/' && path[len - 2] == '.' && path[len - 1] == '.'; 778} 779 780 781/*** URI Stuff ***/ 782 783/* Examine PATH as a potential URI, and return a substring of PATH 784 that immediately follows the (scheme):// portion of the URI, or 785 NULL if PATH doesn't appear to be a valid URI. The returned value 786 is not alloced -- it shares memory with PATH. */ 787static const char * 788skip_uri_scheme(const char *path) 789{ 790 apr_size_t j; 791 792 /* A scheme is terminated by a : and cannot contain any /'s. */ 793 for (j = 0; path[j] && path[j] != ':'; ++j) 794 if (path[j] == '/') 795 return NULL; 796 797 if (j > 0 && path[j] == ':' && path[j+1] == '/' && path[j+2] == '/') 798 return path + j + 3; 799 800 return NULL; 801} 802 803 804svn_boolean_t 805svn_path_is_url(const char *path) 806{ 807 /* ### This function is reaaaaaaaaaaaaaally stupid right now. 808 We're just going to look for: 809 810 (scheme)://(optional_stuff) 811 812 Where (scheme) has no ':' or '/' characters. 813 814 Someday it might be nice to have an actual URI parser here. 815 */ 816 return skip_uri_scheme(path) != NULL; 817} 818 819 820 821/* Here is the BNF for path components in a URI. "pchar" is a 822 character in a path component. 823 824 pchar = unreserved | escaped | 825 ":" | "@" | "&" | "=" | "+" | "$" | "," 826 unreserved = alphanum | mark 827 mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 828 829 Note that "escaped" doesn't really apply to what users can put in 830 their paths, so that really means the set of characters is: 831 832 alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | "," 833*/ 834const char svn_uri__char_validity[256] = { 835 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 836 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 837 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 838 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 839 840 /* 64 */ 841 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 842 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 843 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 844 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 845 846 /* 128 */ 847 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 848 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 849 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 850 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 851 852 /* 192 */ 853 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 854 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 855 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 856 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 857}; 858 859 860svn_boolean_t 861svn_path_is_uri_safe(const char *path) 862{ 863 apr_size_t i; 864 865 /* Skip the URI scheme. */ 866 path = skip_uri_scheme(path); 867 868 /* No scheme? Get outta here. */ 869 if (! path) 870 return FALSE; 871 872 /* Skip to the first slash that's after the URI scheme. */ 873 path = strchr(path, '/'); 874 875 /* If there's no first slash, then there's only a host portion; 876 therefore there couldn't be any uri-unsafe characters after the 877 host... so return true. */ 878 if (path == NULL) 879 return TRUE; 880 881 for (i = 0; path[i]; i++) 882 { 883 /* Allow '%XX' (where each X is a hex digit) */ 884 if (path[i] == '%') 885 { 886 if (svn_ctype_isxdigit(path[i + 1]) && 887 svn_ctype_isxdigit(path[i + 2])) 888 { 889 i += 2; 890 continue; 891 } 892 return FALSE; 893 } 894 else if (! svn_uri__char_validity[((unsigned char)path[i])]) 895 { 896 return FALSE; 897 } 898 } 899 900 return TRUE; 901} 902 903 904/* URI-encode each character c in PATH for which TABLE[c] is 0. 905 If no encoding was needed, return PATH, else return a new string allocated 906 in POOL. */ 907static const char * 908uri_escape(const char *path, const char table[], apr_pool_t *pool) 909{ 910 svn_stringbuf_t *retstr; 911 apr_size_t i, copied = 0; 912 int c; 913 apr_size_t len; 914 const char *p; 915 916 /* To terminate our scanning loop, table[NUL] must report "invalid". */ 917 assert(table[0] == 0); 918 919 /* Quick check: Does any character need escaping? */ 920 for (p = path; table[(unsigned char)*p]; ++p) 921 {} 922 923 /* No char to escape before EOS? */ 924 if (*p == '\0') 925 return path; 926 927 /* We need to escape at least one character. */ 928 len = strlen(p) + (p - path); 929 retstr = svn_stringbuf_create_ensure(len, pool); 930 for (i = p - path; i < len; i++) 931 { 932 c = (unsigned char)path[i]; 933 if (table[c]) 934 continue; 935 936 /* If we got here, we're looking at a character that isn't 937 supported by the (or at least, our) URI encoding scheme. We 938 need to escape this character. */ 939 940 /* First things first, copy all the good stuff that we haven't 941 yet copied into our output buffer. */ 942 if (i - copied) 943 svn_stringbuf_appendbytes(retstr, path + copied, 944 i - copied); 945 946 /* Now, write in our escaped character, consisting of the 947 '%' and two digits. We cast the C to unsigned char here because 948 the 'X' format character will be tempted to treat it as an unsigned 949 int...which causes problem when messing with 0x80-0xFF chars. 950 We also need space for a null as apr_snprintf will write one. */ 951 svn_stringbuf_ensure(retstr, retstr->len + 4); 952 apr_snprintf(retstr->data + retstr->len, 4, "%%%02X", (unsigned char)c); 953 retstr->len += 3; 954 955 /* Finally, update our copy counter. */ 956 copied = i + 1; 957 } 958 959 /* Anything left to copy? */ 960 if (i - copied) 961 svn_stringbuf_appendbytes(retstr, path + copied, i - copied); 962 963 /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf 964 functions. */ 965 966 return retstr->data; 967} 968 969 970const char * 971svn_path_uri_encode(const char *path, apr_pool_t *pool) 972{ 973 const char *ret; 974 975 ret = uri_escape(path, svn_uri__char_validity, pool); 976 977 /* Our interface guarantees a copy. */ 978 if (ret == path) 979 return apr_pstrdup(pool, path); 980 else 981 return ret; 982} 983 984static const char iri_escape_chars[256] = { 985 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 986 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 987 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 988 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 989 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 990 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 991 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 992 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 993 994 /* 128 */ 995 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 996 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 997 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 998 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 999 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1001 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1002 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 1003}; 1004 1005const char * 1006svn_path_uri_from_iri(const char *iri, apr_pool_t *pool) 1007{ 1008 return uri_escape(iri, iri_escape_chars, pool); 1009} 1010 1011static const char uri_autoescape_chars[256] = { 1012 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1013 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1014 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1015 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1016 1017 /* 64 */ 1018 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1019 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1020 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1021 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1022 1023 /* 128 */ 1024 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1025 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1026 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1027 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1028 1029 /* 192 */ 1030 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1031 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1032 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1033 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1034}; 1035 1036const char * 1037svn_path_uri_autoescape(const char *uri, apr_pool_t *pool) 1038{ 1039 return uri_escape(uri, uri_autoescape_chars, pool); 1040} 1041 1042const char * 1043svn_path_uri_decode(const char *path, apr_pool_t *pool) 1044{ 1045 svn_stringbuf_t *retstr; 1046 apr_size_t i; 1047 svn_boolean_t query_start = FALSE; 1048 1049 /* avoid repeated realloc */ 1050 retstr = svn_stringbuf_create_ensure(strlen(path) + 1, pool); 1051 1052 retstr->len = 0; 1053 for (i = 0; path[i]; i++) 1054 { 1055 char c = path[i]; 1056 1057 if (c == '?') 1058 { 1059 /* Mark the start of the query string, if it exists. */ 1060 query_start = TRUE; 1061 } 1062 else if (c == '+' && query_start) 1063 { 1064 /* Only do this if we are into the query string. 1065 * RFC 2396, section 3.3 */ 1066 c = ' '; 1067 } 1068 else if (c == '%' && svn_ctype_isxdigit(path[i + 1]) 1069 && svn_ctype_isxdigit(path[i+2])) 1070 { 1071 char digitz[3]; 1072 digitz[0] = path[++i]; 1073 digitz[1] = path[++i]; 1074 digitz[2] = '\0'; 1075 c = (char)(strtol(digitz, NULL, 16)); 1076 } 1077 1078 retstr->data[retstr->len++] = c; 1079 } 1080 1081 /* Null-terminate this bad-boy. */ 1082 retstr->data[retstr->len] = 0; 1083 1084 return retstr->data; 1085} 1086 1087 1088const char * 1089svn_path_url_add_component2(const char *url, 1090 const char *component, 1091 apr_pool_t *pool) 1092{ 1093 /* = svn_path_uri_encode() but without always copying */ 1094 component = uri_escape(component, svn_uri__char_validity, pool); 1095 1096 return svn_path_join_internal(url, component, pool); 1097} 1098 1099svn_error_t * 1100svn_path_get_absolute(const char **pabsolute, 1101 const char *relative, 1102 apr_pool_t *pool) 1103{ 1104 if (svn_path_is_url(relative)) 1105 { 1106 *pabsolute = apr_pstrdup(pool, relative); 1107 return SVN_NO_ERROR; 1108 } 1109 1110 return svn_dirent_get_absolute(pabsolute, relative, pool); 1111} 1112 1113 1114#if !defined(WIN32) && !defined(DARWIN) 1115/** Get APR's internal path encoding. */ 1116static svn_error_t * 1117get_path_encoding(svn_boolean_t *path_is_utf8, apr_pool_t *pool) 1118{ 1119 apr_status_t apr_err; 1120 int encoding_style; 1121 1122 apr_err = apr_filepath_encoding(&encoding_style, pool); 1123 if (apr_err) 1124 return svn_error_wrap_apr(apr_err, 1125 _("Can't determine the native path encoding")); 1126 1127 /* ### What to do about APR_FILEPATH_ENCODING_UNKNOWN? 1128 Well, for now we'll just punt to the svn_utf_ functions; 1129 those will at least do the ASCII-subset check. */ 1130 *path_is_utf8 = (encoding_style == APR_FILEPATH_ENCODING_UTF8); 1131 return SVN_NO_ERROR; 1132} 1133#endif 1134 1135 1136svn_error_t * 1137svn_path_cstring_from_utf8(const char **path_apr, 1138 const char *path_utf8, 1139 apr_pool_t *pool) 1140{ 1141#if !defined(WIN32) && !defined(DARWIN) 1142 svn_boolean_t path_is_utf8; 1143 SVN_ERR(get_path_encoding(&path_is_utf8, pool)); 1144 if (path_is_utf8) 1145#endif 1146 { 1147 *path_apr = apr_pstrdup(pool, path_utf8); 1148 return SVN_NO_ERROR; 1149 } 1150#if !defined(WIN32) && !defined(DARWIN) 1151 else 1152 return svn_utf_cstring_from_utf8(path_apr, path_utf8, pool); 1153#endif 1154} 1155 1156 1157svn_error_t * 1158svn_path_cstring_to_utf8(const char **path_utf8, 1159 const char *path_apr, 1160 apr_pool_t *pool) 1161{ 1162#if !defined(WIN32) && !defined(DARWIN) 1163 svn_boolean_t path_is_utf8; 1164 SVN_ERR(get_path_encoding(&path_is_utf8, pool)); 1165 if (path_is_utf8) 1166#endif 1167 { 1168 *path_utf8 = apr_pstrdup(pool, path_apr); 1169 return SVN_NO_ERROR; 1170 } 1171#if !defined(WIN32) && !defined(DARWIN) 1172 else 1173 return svn_utf_cstring_to_utf8(path_utf8, path_apr, pool); 1174#endif 1175} 1176 1177 1178const char * 1179svn_path_illegal_path_escape(const char *path, apr_pool_t *pool) 1180{ 1181 svn_stringbuf_t *retstr; 1182 apr_size_t i, copied = 0; 1183 int c; 1184 1185 /* At least one control character: 1186 strlen - 1 (control) + \ + N + N + N + null . */ 1187 retstr = svn_stringbuf_create_ensure(strlen(path) + 4, pool); 1188 for (i = 0; path[i]; i++) 1189 { 1190 c = (unsigned char)path[i]; 1191 if (! svn_ctype_iscntrl(c)) 1192 continue; 1193 1194 /* If we got here, we're looking at a character that isn't 1195 supported by the (or at least, our) URI encoding scheme. We 1196 need to escape this character. */ 1197 1198 /* First things first, copy all the good stuff that we haven't 1199 yet copied into our output buffer. */ 1200 if (i - copied) 1201 svn_stringbuf_appendbytes(retstr, path + copied, 1202 i - copied); 1203 1204 /* Make sure buffer is big enough for '\' 'N' 'N' 'N' (and NUL) */ 1205 svn_stringbuf_ensure(retstr, retstr->len + 5); 1206 /*### The backslash separator doesn't work too great with Windows, 1207 but it's what we'll use for consistency with invalid utf8 1208 formatting (until someone has a better idea) */ 1209 apr_snprintf(retstr->data + retstr->len, 5, "\\%03o", (unsigned char)c); 1210 retstr->len += 4; 1211 1212 /* Finally, update our copy counter. */ 1213 copied = i + 1; 1214 } 1215 1216 /* If we didn't encode anything, we don't need to duplicate the string. */ 1217 if (retstr->len == 0) 1218 return path; 1219 1220 /* Anything left to copy? */ 1221 if (i - copied) 1222 svn_stringbuf_appendbytes(retstr, path + copied, i - copied); 1223 1224 /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf 1225 functions. */ 1226 1227 return retstr->data; 1228} 1229 1230svn_error_t * 1231svn_path_check_valid(const char *path, apr_pool_t *pool) 1232{ 1233 const char *c; 1234 1235 for (c = path; *c; c++) 1236 { 1237 if (svn_ctype_iscntrl(*c)) 1238 { 1239 return svn_error_createf(SVN_ERR_FS_PATH_SYNTAX, NULL, 1240 _("Invalid control character '0x%02x' in path '%s'"), 1241 (unsigned char)*c, 1242 svn_path_illegal_path_escape(svn_dirent_local_style(path, pool), 1243 pool)); 1244 } 1245 } 1246 1247 return SVN_NO_ERROR; 1248} 1249 1250void 1251svn_path_splitext(const char **path_root, 1252 const char **path_ext, 1253 const char *path, 1254 apr_pool_t *pool) 1255{ 1256 const char *last_dot, *last_slash; 1257 1258 /* Easy out -- why do all the work when there's no way to report it? */ 1259 if (! (path_root || path_ext)) 1260 return; 1261 1262 /* Do we even have a period in this thing? And if so, is there 1263 anything after it? We look for the "rightmost" period in the 1264 string. */ 1265 last_dot = strrchr(path, '.'); 1266 if (last_dot && (*(last_dot + 1) != '\0')) 1267 { 1268 /* If we have a period, we need to make sure it occurs in the 1269 final path component -- that there's no path separator 1270 between the last period and the end of the PATH -- otherwise, 1271 it doesn't count. Also, we want to make sure that our period 1272 isn't the first character of the last component. */ 1273 last_slash = strrchr(path, '/'); 1274 if ((last_slash && (last_dot > (last_slash + 1))) 1275 || ((! last_slash) && (last_dot > path))) 1276 { 1277 if (path_root) 1278 *path_root = apr_pstrmemdup(pool, path, 1279 (last_dot - path + 1) * sizeof(*path)); 1280 if (path_ext) 1281 *path_ext = apr_pstrdup(pool, last_dot + 1); 1282 return; 1283 } 1284 } 1285 /* If we get here, we never found a suitable separator character, so 1286 there's no split. */ 1287 if (path_root) 1288 *path_root = apr_pstrdup(pool, path); 1289 if (path_ext) 1290 *path_ext = ""; 1291} 1292 1293 1294/* Repository relative URLs (^/). */ 1295 1296svn_boolean_t 1297svn_path_is_repos_relative_url(const char *path) 1298{ 1299 return (0 == strncmp("^/", path, 2)); 1300} 1301 1302svn_error_t * 1303svn_path_resolve_repos_relative_url(const char **absolute_url, 1304 const char *relative_url, 1305 const char *repos_root_url, 1306 apr_pool_t *pool) 1307{ 1308 if (! svn_path_is_repos_relative_url(relative_url)) 1309 return svn_error_createf(SVN_ERR_BAD_URL, NULL, 1310 _("Improper relative URL '%s'"), 1311 relative_url); 1312 1313 /* No assumptions are made about the canonicalization of the input 1314 * arguments, it is presumed that the output will be canonicalized after 1315 * this function, which will remove any duplicate path separator. 1316 */ 1317 *absolute_url = apr_pstrcat(pool, repos_root_url, relative_url + 1, 1318 SVN_VA_NULL); 1319 1320 return SVN_NO_ERROR; 1321} 1322 1323