1/* 2 * paths.c: a path manipulation library using svn_stringbuf_t 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25 26#include <string.h> 27#include <assert.h> 28 29#include <apr_file_info.h> 30#include <apr_lib.h> 31#include <apr_uri.h> 32 33#include "svn_string.h" 34#include "svn_dirent_uri.h" 35#include "svn_path.h" 36#include "svn_private_config.h" /* for SVN_PATH_LOCAL_SEPARATOR */ 37#include "svn_utf.h" 38#include "svn_io.h" /* for svn_io_stat() */ 39#include "svn_ctype.h" 40 41#include "dirent_uri.h" 42 43 44/* The canonical empty path. Can this be changed? Well, change the empty 45 test below and the path library will work, not so sure about the fs/wc 46 libraries. */ 47#define SVN_EMPTY_PATH "" 48 49/* TRUE if s is the canonical empty path, FALSE otherwise */ 50#define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0') 51 52/* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can 53 this be changed? Well, the path library will work, not so sure about 54 the OS! */ 55#define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.') 56 57 58 59 60#ifndef NDEBUG 61/* This function is an approximation of svn_path_is_canonical. 62 * It is supposed to be used in functions that do not have access 63 * to a pool, but still want to assert that a path is canonical. 64 * 65 * PATH with length LEN is assumed to be canonical if it isn't 66 * the platform's empty path (see definition of SVN_PATH_IS_PLATFORM_EMPTY), 67 * and does not contain "/./", and any one of the following 68 * conditions is also met: 69 * 70 * 1. PATH has zero length 71 * 2. PATH is the root directory (what exactly a root directory is 72 * depends on the platform) 73 * 3. PATH is not a root directory and does not end with '/' 74 * 75 * If possible, please use svn_path_is_canonical instead. 76 */ 77static svn_boolean_t 78is_canonical(const char *path, 79 apr_size_t len) 80{ 81 return (! SVN_PATH_IS_PLATFORM_EMPTY(path, len) 82 && strstr(path, "/./") == NULL 83 && (len == 0 84 || (len == 1 && path[0] == '/') 85 || (path[len-1] != '/') 86#if defined(WIN32) || defined(__CYGWIN__) 87 || svn_dirent_is_root(path, len) 88#endif 89 )); 90} 91#endif 92 93 94/* functionality of svn_path_is_canonical but without the deprecation */ 95static svn_boolean_t 96svn_path_is_canonical_internal(const char *path, apr_pool_t *pool) 97{ 98 return svn_uri_is_canonical(path, pool) || 99 svn_dirent_is_canonical(path, pool) || 100 svn_relpath_is_canonical(path); 101} 102 103svn_boolean_t 104svn_path_is_canonical(const char *path, apr_pool_t *pool) 105{ 106 return svn_path_is_canonical_internal(path, pool); 107} 108 109/* functionality of svn_path_join but without the deprecation */ 110static char * 111svn_path_join_internal(const char *base, 112 const char *component, 113 apr_pool_t *pool) 114{ 115 apr_size_t blen = strlen(base); 116 apr_size_t clen = strlen(component); 117 char *path; 118 119 assert(svn_path_is_canonical_internal(base, pool)); 120 assert(svn_path_is_canonical_internal(component, pool)); 121 122 /* If the component is absolute, then return it. */ 123 if (*component == '/') 124 return apr_pmemdup(pool, component, clen + 1); 125 126 /* If either is empty return the other */ 127 if (SVN_PATH_IS_EMPTY(base)) 128 return apr_pmemdup(pool, component, clen + 1); 129 if (SVN_PATH_IS_EMPTY(component)) 130 return apr_pmemdup(pool, base, blen + 1); 131 132 if (blen == 1 && base[0] == '/') 133 blen = 0; /* Ignore base, just return separator + component */ 134 135 /* Construct the new, combined path. */ 136 path = apr_palloc(pool, blen + 1 + clen + 1); 137 memcpy(path, base, blen); 138 path[blen] = '/'; 139 memcpy(path + blen + 1, component, clen + 1); 140 141 return path; 142} 143 144char *svn_path_join(const char *base, 145 const char *component, 146 apr_pool_t *pool) 147{ 148 return svn_path_join_internal(base, component, pool); 149} 150 151char *svn_path_join_many(apr_pool_t *pool, const char *base, ...) 152{ 153#define MAX_SAVED_LENGTHS 10 154 apr_size_t saved_lengths[MAX_SAVED_LENGTHS]; 155 apr_size_t total_len; 156 int nargs; 157 va_list va; 158 const char *s; 159 apr_size_t len; 160 char *path; 161 char *p; 162 svn_boolean_t base_is_empty = FALSE, base_is_root = FALSE; 163 int base_arg = 0; 164 165 total_len = strlen(base); 166 167 assert(svn_path_is_canonical_internal(base, pool)); 168 169 if (total_len == 1 && *base == '/') 170 base_is_root = TRUE; 171 else if (SVN_PATH_IS_EMPTY(base)) 172 { 173 total_len = sizeof(SVN_EMPTY_PATH) - 1; 174 base_is_empty = TRUE; 175 } 176 177 saved_lengths[0] = total_len; 178 179 /* Compute the length of the resulting string. */ 180 181 nargs = 0; 182 va_start(va, base); 183 while ((s = va_arg(va, const char *)) != NULL) 184 { 185 len = strlen(s); 186 187 assert(svn_path_is_canonical_internal(s, pool)); 188 189 if (SVN_PATH_IS_EMPTY(s)) 190 continue; 191 192 if (nargs++ < MAX_SAVED_LENGTHS) 193 saved_lengths[nargs] = len; 194 195 if (*s == '/') 196 { 197 /* an absolute path. skip all components to this point and reset 198 the total length. */ 199 total_len = len; 200 base_arg = nargs; 201 base_is_root = len == 1; 202 base_is_empty = FALSE; 203 } 204 else if (nargs == base_arg 205 || (nargs == base_arg + 1 && base_is_root) 206 || base_is_empty) 207 { 208 /* if we have skipped everything up to this arg, then the base 209 and all prior components are empty. just set the length to 210 this component; do not add a separator. If the base is empty 211 we can now ignore it. */ 212 if (base_is_empty) 213 { 214 base_is_empty = FALSE; 215 total_len = 0; 216 } 217 total_len += len; 218 } 219 else 220 { 221 total_len += 1 + len; 222 } 223 } 224 va_end(va); 225 226 /* base == "/" and no further components. just return that. */ 227 if (base_is_root && total_len == 1) 228 return apr_pmemdup(pool, "/", 2); 229 230 /* we got the total size. allocate it, with room for a NULL character. */ 231 path = p = apr_palloc(pool, total_len + 1); 232 233 /* if we aren't supposed to skip forward to an absolute component, and if 234 this is not an empty base that we are skipping, then copy the base 235 into the output. */ 236 if (base_arg == 0 && ! (SVN_PATH_IS_EMPTY(base) && ! base_is_empty)) 237 { 238 if (SVN_PATH_IS_EMPTY(base)) 239 memcpy(p, SVN_EMPTY_PATH, len = saved_lengths[0]); 240 else 241 memcpy(p, base, len = saved_lengths[0]); 242 p += len; 243 } 244 245 nargs = 0; 246 va_start(va, base); 247 while ((s = va_arg(va, const char *)) != NULL) 248 { 249 if (SVN_PATH_IS_EMPTY(s)) 250 continue; 251 252 if (++nargs < base_arg) 253 continue; 254 255 if (nargs < MAX_SAVED_LENGTHS) 256 len = saved_lengths[nargs]; 257 else 258 len = strlen(s); 259 260 /* insert a separator if we aren't copying in the first component 261 (which can happen when base_arg is set). also, don't put in a slash 262 if the prior character is a slash (occurs when prior component 263 is "/"). */ 264 if (p != path && p[-1] != '/') 265 *p++ = '/'; 266 267 /* copy the new component and advance the pointer */ 268 memcpy(p, s, len); 269 p += len; 270 } 271 va_end(va); 272 273 *p = '\0'; 274 assert((apr_size_t)(p - path) == total_len); 275 276 return path; 277} 278 279 280 281apr_size_t 282svn_path_component_count(const char *path) 283{ 284 apr_size_t count = 0; 285 286 assert(is_canonical(path, strlen(path))); 287 288 while (*path) 289 { 290 const char *start; 291 292 while (*path == '/') 293 ++path; 294 295 start = path; 296 297 while (*path && *path != '/') 298 ++path; 299 300 if (path != start) 301 ++count; 302 } 303 304 return count; 305} 306 307 308/* Return the length of substring necessary to encompass the entire 309 * previous path segment in PATH, which should be a LEN byte string. 310 * 311 * A trailing slash will not be included in the returned length except 312 * in the case in which PATH is absolute and there are no more 313 * previous segments. 314 */ 315static apr_size_t 316previous_segment(const char *path, 317 apr_size_t len) 318{ 319 if (len == 0) 320 return 0; 321 322 while (len > 0 && path[--len] != '/') 323 ; 324 325 if (len == 0 && path[0] == '/') 326 return 1; 327 else 328 return len; 329} 330 331 332void 333svn_path_add_component(svn_stringbuf_t *path, 334 const char *component) 335{ 336 apr_size_t len = strlen(component); 337 338 assert(is_canonical(path->data, path->len)); 339 assert(is_canonical(component, strlen(component))); 340 341 /* Append a dir separator, but only if this path is neither empty 342 nor consists of a single dir separator already. */ 343 if ((! SVN_PATH_IS_EMPTY(path->data)) 344 && (! ((path->len == 1) && (*(path->data) == '/')))) 345 { 346 char dirsep = '/'; 347 svn_stringbuf_appendbytes(path, &dirsep, sizeof(dirsep)); 348 } 349 350 svn_stringbuf_appendbytes(path, component, len); 351} 352 353 354void 355svn_path_remove_component(svn_stringbuf_t *path) 356{ 357 assert(is_canonical(path->data, path->len)); 358 359 path->len = previous_segment(path->data, path->len); 360 path->data[path->len] = '\0'; 361} 362 363 364void 365svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n) 366{ 367 while (n > 0) 368 { 369 svn_path_remove_component(path); 370 n--; 371 } 372} 373 374 375char * 376svn_path_dirname(const char *path, apr_pool_t *pool) 377{ 378 apr_size_t len = strlen(path); 379 380 assert(svn_path_is_canonical_internal(path, pool)); 381 382 return apr_pstrmemdup(pool, path, previous_segment(path, len)); 383} 384 385 386char * 387svn_path_basename(const char *path, apr_pool_t *pool) 388{ 389 apr_size_t len = strlen(path); 390 apr_size_t start; 391 392 assert(svn_path_is_canonical_internal(path, pool)); 393 394 if (len == 1 && path[0] == '/') 395 start = 0; 396 else 397 { 398 start = len; 399 while (start > 0 && path[start - 1] != '/') 400 --start; 401 } 402 403 return apr_pstrmemdup(pool, path + start, len - start); 404} 405 406int 407svn_path_is_empty(const char *path) 408{ 409 assert(is_canonical(path, strlen(path))); 410 411 if (SVN_PATH_IS_EMPTY(path)) 412 return 1; 413 414 return 0; 415} 416 417int 418svn_path_compare_paths(const char *path1, 419 const char *path2) 420{ 421 apr_size_t path1_len = strlen(path1); 422 apr_size_t path2_len = strlen(path2); 423 apr_size_t min_len = ((path1_len < path2_len) ? path1_len : path2_len); 424 apr_size_t i = 0; 425 426 assert(is_canonical(path1, path1_len)); 427 assert(is_canonical(path2, path2_len)); 428 429 /* Skip past common prefix. */ 430 while (i < min_len && path1[i] == path2[i]) 431 ++i; 432 433 /* Are the paths exactly the same? */ 434 if ((path1_len == path2_len) && (i >= min_len)) 435 return 0; 436 437 /* Children of paths are greater than their parents, but less than 438 greater siblings of their parents. */ 439 if ((path1[i] == '/') && (path2[i] == 0)) 440 return 1; 441 if ((path2[i] == '/') && (path1[i] == 0)) 442 return -1; 443 if (path1[i] == '/') 444 return -1; 445 if (path2[i] == '/') 446 return 1; 447 448 /* Common prefix was skipped above, next character is compared to 449 determine order. We need to use an unsigned comparison, though, 450 so a "next character" of NULL (0x00) sorts numerically 451 smallest. */ 452 return (unsigned char)(path1[i]) < (unsigned char)(path2[i]) ? -1 : 1; 453} 454 455/* Return the string length of the longest common ancestor of PATH1 and PATH2. 456 * 457 * This function handles everything except the URL-handling logic 458 * of svn_path_get_longest_ancestor, and assumes that PATH1 and 459 * PATH2 are *not* URLs. 460 * 461 * If the two paths do not share a common ancestor, return 0. 462 * 463 * New strings are allocated in POOL. 464 */ 465static apr_size_t 466get_path_ancestor_length(const char *path1, 467 const char *path2, 468 apr_pool_t *pool) 469{ 470 apr_size_t path1_len, path2_len; 471 apr_size_t i = 0; 472 apr_size_t last_dirsep = 0; 473 474 path1_len = strlen(path1); 475 path2_len = strlen(path2); 476 477 if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2)) 478 return 0; 479 480 while (path1[i] == path2[i]) 481 { 482 /* Keep track of the last directory separator we hit. */ 483 if (path1[i] == '/') 484 last_dirsep = i; 485 486 i++; 487 488 /* If we get to the end of either path, break out. */ 489 if ((i == path1_len) || (i == path2_len)) 490 break; 491 } 492 493 /* two special cases: 494 1. '/' is the longest common ancestor of '/' and '/foo' 495 2. '/' is the longest common ancestor of '/rif' and '/raf' */ 496 if (i == 1 && path1[0] == '/' && path2[0] == '/') 497 return 1; 498 499 /* last_dirsep is now the offset of the last directory separator we 500 crossed before reaching a non-matching byte. i is the offset of 501 that non-matching byte. */ 502 if (((i == path1_len) && (path2[i] == '/')) 503 || ((i == path2_len) && (path1[i] == '/')) 504 || ((i == path1_len) && (i == path2_len))) 505 return i; 506 else 507 if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/') 508 return 1; 509 return last_dirsep; 510} 511 512 513char * 514svn_path_get_longest_ancestor(const char *path1, 515 const char *path2, 516 apr_pool_t *pool) 517{ 518 svn_boolean_t path1_is_url = svn_path_is_url(path1); 519 svn_boolean_t path2_is_url = svn_path_is_url(path2); 520 521 /* Are we messing with URLs? If we have a mix of URLs and non-URLs, 522 there's nothing common between them. */ 523 if (path1_is_url && path2_is_url) 524 { 525 return svn_uri_get_longest_ancestor(path1, path2, pool); 526 } 527 else if ((! path1_is_url) && (! path2_is_url)) 528 { 529 return apr_pstrndup(pool, path1, 530 get_path_ancestor_length(path1, path2, pool)); 531 } 532 else 533 { 534 /* A URL and a non-URL => no common prefix */ 535 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH)); 536 } 537} 538 539const char * 540svn_path_is_child(const char *path1, 541 const char *path2, 542 apr_pool_t *pool) 543{ 544 apr_size_t i; 545 546 /* assert (is_canonical (path1, strlen (path1))); ### Expensive strlen */ 547 /* assert (is_canonical (path2, strlen (path2))); ### Expensive strlen */ 548 549 /* Allow "" and "foo" to be parent/child */ 550 if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */ 551 { 552 if (SVN_PATH_IS_EMPTY(path2) /* "" not a child */ 553 || path2[0] == '/') /* "/foo" not a child */ 554 return NULL; 555 else 556 /* everything else is child */ 557 return pool ? apr_pstrdup(pool, path2) : path2; 558 } 559 560 /* Reach the end of at least one of the paths. How should we handle 561 things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't 562 appear to arise in the current Subversion code, it's not clear to me 563 if they should be parent/child or not. */ 564 for (i = 0; path1[i] && path2[i]; i++) 565 if (path1[i] != path2[i]) 566 return NULL; 567 568 /* There are two cases that are parent/child 569 ... path1[i] == '\0' 570 .../foo path2[i] == '/' 571 or 572 / path1[i] == '\0' 573 /foo path2[i] != '/' 574 */ 575 if (path1[i] == '\0' && path2[i]) 576 { 577 if (path2[i] == '/') 578 return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1; 579 else if (i == 1 && path1[0] == '/') 580 return pool ? apr_pstrdup(pool, path2 + 1) : path2 + 1; 581 } 582 583 /* Otherwise, path2 isn't a child. */ 584 return NULL; 585} 586 587 588svn_boolean_t 589svn_path_is_ancestor(const char *path1, const char *path2) 590{ 591 apr_size_t path1_len = strlen(path1); 592 593 /* If path1 is empty and path2 is not absoulte, then path1 is an ancestor. */ 594 if (SVN_PATH_IS_EMPTY(path1)) 595 return *path2 != '/'; 596 597 /* If path1 is a prefix of path2, then: 598 - If path1 ends in a path separator, 599 - If the paths are of the same length 600 OR 601 - path2 starts a new path component after the common prefix, 602 then path1 is an ancestor. */ 603 if (strncmp(path1, path2, path1_len) == 0) 604 return path1[path1_len - 1] == '/' 605 || (path2[path1_len] == '/' || path2[path1_len] == '\0'); 606 607 return FALSE; 608} 609 610 611apr_array_header_t * 612svn_path_decompose(const char *path, 613 apr_pool_t *pool) 614{ 615 apr_size_t i, oldi; 616 617 apr_array_header_t *components = 618 apr_array_make(pool, 1, sizeof(const char *)); 619 620 assert(svn_path_is_canonical_internal(path, pool)); 621 622 if (SVN_PATH_IS_EMPTY(path)) 623 return components; /* ### Should we return a "" component? */ 624 625 /* If PATH is absolute, store the '/' as the first component. */ 626 i = oldi = 0; 627 if (path[i] == '/') 628 { 629 char dirsep = '/'; 630 631 APR_ARRAY_PUSH(components, const char *) 632 = apr_pstrmemdup(pool, &dirsep, sizeof(dirsep)); 633 634 i++; 635 oldi++; 636 if (path[i] == '\0') /* path is a single '/' */ 637 return components; 638 } 639 640 do 641 { 642 if ((path[i] == '/') || (path[i] == '\0')) 643 { 644 if (SVN_PATH_IS_PLATFORM_EMPTY(path + oldi, i - oldi)) 645 APR_ARRAY_PUSH(components, const char *) = SVN_EMPTY_PATH; 646 else 647 APR_ARRAY_PUSH(components, const char *) 648 = apr_pstrmemdup(pool, path + oldi, i - oldi); 649 650 i++; 651 oldi = i; /* skipping past the dirsep */ 652 continue; 653 } 654 i++; 655 } 656 while (path[i-1]); 657 658 return components; 659} 660 661 662const char * 663svn_path_compose(const apr_array_header_t *components, 664 apr_pool_t *pool) 665{ 666 apr_size_t *lengths = apr_palloc(pool, components->nelts*sizeof(*lengths)); 667 apr_size_t max_length = components->nelts; 668 char *path; 669 char *p; 670 int i; 671 672 /* Get the length of each component so a total length can be 673 calculated. */ 674 for (i = 0; i < components->nelts; ++i) 675 { 676 apr_size_t l = strlen(APR_ARRAY_IDX(components, i, const char *)); 677 lengths[i] = l; 678 max_length += l; 679 } 680 681 path = apr_palloc(pool, max_length + 1); 682 p = path; 683 684 for (i = 0; i < components->nelts; ++i) 685 { 686 /* Append a '/' to the path. Handle the case with an absolute 687 path where a '/' appears in the first component. Only append 688 a '/' if the component is the second component that does not 689 follow a "/" first component; or it is the third or later 690 component. */ 691 if (i > 1 || 692 (i == 1 && strcmp("/", APR_ARRAY_IDX(components, 693 0, 694 const char *)) != 0)) 695 { 696 *p++ = '/'; 697 } 698 699 memcpy(p, APR_ARRAY_IDX(components, i, const char *), lengths[i]); 700 p += lengths[i]; 701 } 702 703 *p = '\0'; 704 705 return path; 706} 707 708 709svn_boolean_t 710svn_path_is_single_path_component(const char *name) 711{ 712 assert(is_canonical(name, strlen(name))); 713 714 /* Can't be empty or `..' */ 715 if (SVN_PATH_IS_EMPTY(name) 716 || (name[0] == '.' && name[1] == '.' && name[2] == '\0')) 717 return FALSE; 718 719 /* Slashes are bad, m'kay... */ 720 if (strchr(name, '/') != NULL) 721 return FALSE; 722 723 /* It is valid. */ 724 return TRUE; 725} 726 727 728svn_boolean_t 729svn_path_is_dotpath_present(const char *path) 730{ 731 size_t len; 732 733 /* The empty string does not have a dotpath */ 734 if (path[0] == '\0') 735 return FALSE; 736 737 /* Handle "." or a leading "./" */ 738 if (path[0] == '.' && (path[1] == '\0' || path[1] == '/')) 739 return TRUE; 740 741 /* Paths of length 1 (at this point) have no dotpath present. */ 742 if (path[1] == '\0') 743 return FALSE; 744 745 /* If any segment is "/./", then a dotpath is present. */ 746 if (strstr(path, "/./") != NULL) 747 return TRUE; 748 749 /* Does the path end in "/." ? */ 750 len = strlen(path); 751 return path[len - 2] == '/' && path[len - 1] == '.'; 752} 753 754svn_boolean_t 755svn_path_is_backpath_present(const char *path) 756{ 757 size_t len; 758 759 /* 0 and 1-length paths do not have a backpath */ 760 if (path[0] == '\0' || path[1] == '\0') 761 return FALSE; 762 763 /* Handle ".." or a leading "../" */ 764 if (path[0] == '.' && path[1] == '.' && (path[2] == '\0' || path[2] == '/')) 765 return TRUE; 766 767 /* Paths of length 2 (at this point) have no backpath present. */ 768 if (path[2] == '\0') 769 return FALSE; 770 771 /* If any segment is "..", then a backpath is present. */ 772 if (strstr(path, "/../") != NULL) 773 return TRUE; 774 775 /* Does the path end in "/.." ? */ 776 len = strlen(path); 777 return path[len - 3] == '/' && path[len - 2] == '.' && path[len - 1] == '.'; 778} 779 780 781/*** URI Stuff ***/ 782 783/* Examine PATH as a potential URI, and return a substring of PATH 784 that immediately follows the (scheme):// portion of the URI, or 785 NULL if PATH doesn't appear to be a valid URI. The returned value 786 is not alloced -- it shares memory with PATH. */ 787static const char * 788skip_uri_scheme(const char *path) 789{ 790 apr_size_t j; 791 792 /* A scheme is terminated by a : and cannot contain any /'s. */ 793 for (j = 0; path[j] && path[j] != ':'; ++j) 794 if (path[j] == '/') 795 return NULL; 796 797 if (j > 0 && path[j] == ':' && path[j+1] == '/' && path[j+2] == '/') 798 return path + j + 3; 799 800 return NULL; 801} 802 803 804svn_boolean_t 805svn_path_is_url(const char *path) 806{ 807 /* ### This function is reaaaaaaaaaaaaaally stupid right now. 808 We're just going to look for: 809 810 (scheme)://(optional_stuff) 811 812 Where (scheme) has no ':' or '/' characters. 813 814 Someday it might be nice to have an actual URI parser here. 815 */ 816 return skip_uri_scheme(path) != NULL; 817} 818 819 820 821/* Here is the BNF for path components in a URI. "pchar" is a 822 character in a path component. 823 824 pchar = unreserved | escaped | 825 ":" | "@" | "&" | "=" | "+" | "$" | "," 826 unreserved = alphanum | mark 827 mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 828 829 Note that "escaped" doesn't really apply to what users can put in 830 their paths, so that really means the set of characters is: 831 832 alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | "," 833*/ 834const char svn_uri__char_validity[256] = { 835 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 836 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 837 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 838 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 839 840 /* 64 */ 841 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 842 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 843 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 844 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 845 846 /* 128 */ 847 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 848 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 849 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 850 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 851 852 /* 192 */ 853 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 854 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 855 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 856 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 857}; 858 859 860svn_boolean_t 861svn_path_is_uri_safe(const char *path) 862{ 863 apr_size_t i; 864 865 /* Skip the URI scheme. */ 866 path = skip_uri_scheme(path); 867 868 /* No scheme? Get outta here. */ 869 if (! path) 870 return FALSE; 871 872 /* Skip to the first slash that's after the URI scheme. */ 873 path = strchr(path, '/'); 874 875 /* If there's no first slash, then there's only a host portion; 876 therefore there couldn't be any uri-unsafe characters after the 877 host... so return true. */ 878 if (path == NULL) 879 return TRUE; 880 881 for (i = 0; path[i]; i++) 882 { 883 /* Allow '%XX' (where each X is a hex digit) */ 884 if (path[i] == '%') 885 { 886 if (svn_ctype_isxdigit(path[i + 1]) && 887 svn_ctype_isxdigit(path[i + 2])) 888 { 889 i += 2; 890 continue; 891 } 892 return FALSE; 893 } 894 else if (! svn_uri__char_validity[((unsigned char)path[i])]) 895 { 896 return FALSE; 897 } 898 } 899 900 return TRUE; 901} 902 903 904/* URI-encode each character c in PATH for which TABLE[c] is 0. 905 If no encoding was needed, return PATH, else return a new string allocated 906 in POOL. */ 907static const char * 908uri_escape(const char *path, const char table[], apr_pool_t *pool) 909{ 910 svn_stringbuf_t *retstr; 911 apr_size_t i, copied = 0; 912 int c; 913 914 retstr = svn_stringbuf_create_ensure(strlen(path), pool); 915 for (i = 0; path[i]; i++) 916 { 917 c = (unsigned char)path[i]; 918 if (table[c]) 919 continue; 920 921 /* If we got here, we're looking at a character that isn't 922 supported by the (or at least, our) URI encoding scheme. We 923 need to escape this character. */ 924 925 /* First things first, copy all the good stuff that we haven't 926 yet copied into our output buffer. */ 927 if (i - copied) 928 svn_stringbuf_appendbytes(retstr, path + copied, 929 i - copied); 930 931 /* Now, write in our escaped character, consisting of the 932 '%' and two digits. We cast the C to unsigned char here because 933 the 'X' format character will be tempted to treat it as an unsigned 934 int...which causes problem when messing with 0x80-0xFF chars. 935 We also need space for a null as apr_snprintf will write one. */ 936 svn_stringbuf_ensure(retstr, retstr->len + 4); 937 apr_snprintf(retstr->data + retstr->len, 4, "%%%02X", (unsigned char)c); 938 retstr->len += 3; 939 940 /* Finally, update our copy counter. */ 941 copied = i + 1; 942 } 943 944 /* If we didn't encode anything, we don't need to duplicate the string. */ 945 if (retstr->len == 0) 946 return path; 947 948 /* Anything left to copy? */ 949 if (i - copied) 950 svn_stringbuf_appendbytes(retstr, path + copied, i - copied); 951 952 /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf 953 functions. */ 954 955 return retstr->data; 956} 957 958 959const char * 960svn_path_uri_encode(const char *path, apr_pool_t *pool) 961{ 962 const char *ret; 963 964 ret = uri_escape(path, svn_uri__char_validity, pool); 965 966 /* Our interface guarantees a copy. */ 967 if (ret == path) 968 return apr_pstrdup(pool, path); 969 else 970 return ret; 971} 972 973static const char iri_escape_chars[256] = { 974 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 975 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 976 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 977 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 978 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 979 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 980 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 981 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 982 983 /* 128 */ 984 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 985 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 986 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 987 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 988 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 989 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 990 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 991 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 992}; 993 994const char * 995svn_path_uri_from_iri(const char *iri, apr_pool_t *pool) 996{ 997 return uri_escape(iri, iri_escape_chars, pool); 998} 999 1000static const char uri_autoescape_chars[256] = { 1001 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1002 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1003 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1004 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1005 1006 /* 64 */ 1007 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1008 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1009 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1010 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1011 1012 /* 128 */ 1013 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1014 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1015 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1016 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1017 1018 /* 192 */ 1019 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1020 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1021 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1022 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1023}; 1024 1025const char * 1026svn_path_uri_autoescape(const char *uri, apr_pool_t *pool) 1027{ 1028 return uri_escape(uri, uri_autoescape_chars, pool); 1029} 1030 1031const char * 1032svn_path_uri_decode(const char *path, apr_pool_t *pool) 1033{ 1034 svn_stringbuf_t *retstr; 1035 apr_size_t i; 1036 svn_boolean_t query_start = FALSE; 1037 1038 /* avoid repeated realloc */ 1039 retstr = svn_stringbuf_create_ensure(strlen(path) + 1, pool); 1040 1041 retstr->len = 0; 1042 for (i = 0; path[i]; i++) 1043 { 1044 char c = path[i]; 1045 1046 if (c == '?') 1047 { 1048 /* Mark the start of the query string, if it exists. */ 1049 query_start = TRUE; 1050 } 1051 else if (c == '+' && query_start) 1052 { 1053 /* Only do this if we are into the query string. 1054 * RFC 2396, section 3.3 */ 1055 c = ' '; 1056 } 1057 else if (c == '%' && svn_ctype_isxdigit(path[i + 1]) 1058 && svn_ctype_isxdigit(path[i+2])) 1059 { 1060 char digitz[3]; 1061 digitz[0] = path[++i]; 1062 digitz[1] = path[++i]; 1063 digitz[2] = '\0'; 1064 c = (char)(strtol(digitz, NULL, 16)); 1065 } 1066 1067 retstr->data[retstr->len++] = c; 1068 } 1069 1070 /* Null-terminate this bad-boy. */ 1071 retstr->data[retstr->len] = 0; 1072 1073 return retstr->data; 1074} 1075 1076 1077const char * 1078svn_path_url_add_component2(const char *url, 1079 const char *component, 1080 apr_pool_t *pool) 1081{ 1082 /* = svn_path_uri_encode() but without always copying */ 1083 component = uri_escape(component, svn_uri__char_validity, pool); 1084 1085 return svn_path_join_internal(url, component, pool); 1086} 1087 1088svn_error_t * 1089svn_path_get_absolute(const char **pabsolute, 1090 const char *relative, 1091 apr_pool_t *pool) 1092{ 1093 if (svn_path_is_url(relative)) 1094 { 1095 *pabsolute = apr_pstrdup(pool, relative); 1096 return SVN_NO_ERROR; 1097 } 1098 1099 return svn_dirent_get_absolute(pabsolute, relative, pool); 1100} 1101 1102 1103#if !defined(WIN32) && !defined(DARWIN) 1104/** Get APR's internal path encoding. */ 1105static svn_error_t * 1106get_path_encoding(svn_boolean_t *path_is_utf8, apr_pool_t *pool) 1107{ 1108 apr_status_t apr_err; 1109 int encoding_style; 1110 1111 apr_err = apr_filepath_encoding(&encoding_style, pool); 1112 if (apr_err) 1113 return svn_error_wrap_apr(apr_err, 1114 _("Can't determine the native path encoding")); 1115 1116 /* ### What to do about APR_FILEPATH_ENCODING_UNKNOWN? 1117 Well, for now we'll just punt to the svn_utf_ functions; 1118 those will at least do the ASCII-subset check. */ 1119 *path_is_utf8 = (encoding_style == APR_FILEPATH_ENCODING_UTF8); 1120 return SVN_NO_ERROR; 1121} 1122#endif 1123 1124 1125svn_error_t * 1126svn_path_cstring_from_utf8(const char **path_apr, 1127 const char *path_utf8, 1128 apr_pool_t *pool) 1129{ 1130#if !defined(WIN32) && !defined(DARWIN) 1131 svn_boolean_t path_is_utf8; 1132 SVN_ERR(get_path_encoding(&path_is_utf8, pool)); 1133 if (path_is_utf8) 1134#endif 1135 { 1136 *path_apr = apr_pstrdup(pool, path_utf8); 1137 return SVN_NO_ERROR; 1138 } 1139#if !defined(WIN32) && !defined(DARWIN) 1140 else 1141 return svn_utf_cstring_from_utf8(path_apr, path_utf8, pool); 1142#endif 1143} 1144 1145 1146svn_error_t * 1147svn_path_cstring_to_utf8(const char **path_utf8, 1148 const char *path_apr, 1149 apr_pool_t *pool) 1150{ 1151#if !defined(WIN32) && !defined(DARWIN) 1152 svn_boolean_t path_is_utf8; 1153 SVN_ERR(get_path_encoding(&path_is_utf8, pool)); 1154 if (path_is_utf8) 1155#endif 1156 { 1157 *path_utf8 = apr_pstrdup(pool, path_apr); 1158 return SVN_NO_ERROR; 1159 } 1160#if !defined(WIN32) && !defined(DARWIN) 1161 else 1162 return svn_utf_cstring_to_utf8(path_utf8, path_apr, pool); 1163#endif 1164} 1165 1166 1167/* Return a copy of PATH, allocated from POOL, for which control 1168 characters have been escaped using the form \NNN (where NNN is the 1169 octal representation of the byte's ordinal value). */ 1170const char * 1171svn_path_illegal_path_escape(const char *path, apr_pool_t *pool) 1172{ 1173 svn_stringbuf_t *retstr; 1174 apr_size_t i, copied = 0; 1175 int c; 1176 1177 /* At least one control character: 1178 strlen - 1 (control) + \ + N + N + N + null . */ 1179 retstr = svn_stringbuf_create_ensure(strlen(path) + 4, pool); 1180 for (i = 0; path[i]; i++) 1181 { 1182 c = (unsigned char)path[i]; 1183 if (! svn_ctype_iscntrl(c)) 1184 continue; 1185 1186 /* If we got here, we're looking at a character that isn't 1187 supported by the (or at least, our) URI encoding scheme. We 1188 need to escape this character. */ 1189 1190 /* First things first, copy all the good stuff that we haven't 1191 yet copied into our output buffer. */ 1192 if (i - copied) 1193 svn_stringbuf_appendbytes(retstr, path + copied, 1194 i - copied); 1195 1196 /* Make sure buffer is big enough for '\' 'N' 'N' 'N' (and NUL) */ 1197 svn_stringbuf_ensure(retstr, retstr->len + 5); 1198 /*### The backslash separator doesn't work too great with Windows, 1199 but it's what we'll use for consistency with invalid utf8 1200 formatting (until someone has a better idea) */ 1201 apr_snprintf(retstr->data + retstr->len, 5, "\\%03o", (unsigned char)c); 1202 retstr->len += 4; 1203 1204 /* Finally, update our copy counter. */ 1205 copied = i + 1; 1206 } 1207 1208 /* If we didn't encode anything, we don't need to duplicate the string. */ 1209 if (retstr->len == 0) 1210 return path; 1211 1212 /* Anything left to copy? */ 1213 if (i - copied) 1214 svn_stringbuf_appendbytes(retstr, path + copied, i - copied); 1215 1216 /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf 1217 functions. */ 1218 1219 return retstr->data; 1220} 1221 1222svn_error_t * 1223svn_path_check_valid(const char *path, apr_pool_t *pool) 1224{ 1225 const char *c; 1226 1227 for (c = path; *c; c++) 1228 { 1229 if (svn_ctype_iscntrl(*c)) 1230 { 1231 return svn_error_createf 1232 (SVN_ERR_FS_PATH_SYNTAX, NULL, 1233 _("Invalid control character '0x%02x' in path '%s'"), 1234 (unsigned char)*c, 1235 svn_path_illegal_path_escape(svn_dirent_local_style(path, pool), 1236 pool)); 1237 } 1238 } 1239 1240 return SVN_NO_ERROR; 1241} 1242 1243void 1244svn_path_splitext(const char **path_root, 1245 const char **path_ext, 1246 const char *path, 1247 apr_pool_t *pool) 1248{ 1249 const char *last_dot, *last_slash; 1250 1251 /* Easy out -- why do all the work when there's no way to report it? */ 1252 if (! (path_root || path_ext)) 1253 return; 1254 1255 /* Do we even have a period in this thing? And if so, is there 1256 anything after it? We look for the "rightmost" period in the 1257 string. */ 1258 last_dot = strrchr(path, '.'); 1259 if (last_dot && (last_dot + 1 != '\0')) 1260 { 1261 /* If we have a period, we need to make sure it occurs in the 1262 final path component -- that there's no path separator 1263 between the last period and the end of the PATH -- otherwise, 1264 it doesn't count. Also, we want to make sure that our period 1265 isn't the first character of the last component. */ 1266 last_slash = strrchr(path, '/'); 1267 if ((last_slash && (last_dot > (last_slash + 1))) 1268 || ((! last_slash) && (last_dot > path))) 1269 { 1270 if (path_root) 1271 *path_root = apr_pstrmemdup(pool, path, 1272 (last_dot - path + 1) * sizeof(*path)); 1273 if (path_ext) 1274 *path_ext = apr_pstrdup(pool, last_dot + 1); 1275 return; 1276 } 1277 } 1278 /* If we get here, we never found a suitable separator character, so 1279 there's no split. */ 1280 if (path_root) 1281 *path_root = apr_pstrdup(pool, path); 1282 if (path_ext) 1283 *path_ext = ""; 1284} 1285 1286 1287/* Repository relative URLs (^/). */ 1288 1289svn_boolean_t 1290svn_path_is_repos_relative_url(const char *path) 1291{ 1292 return (0 == strncmp("^/", path, 2)); 1293} 1294 1295svn_error_t * 1296svn_path_resolve_repos_relative_url(const char **absolute_url, 1297 const char *relative_url, 1298 const char *repos_root_url, 1299 apr_pool_t *pool) 1300{ 1301 if (! svn_path_is_repos_relative_url(relative_url)) 1302 return svn_error_createf(SVN_ERR_BAD_URL, NULL, 1303 _("Improper relative URL '%s'"), 1304 relative_url); 1305 1306 /* No assumptions are made about the canonicalization of the inut 1307 * arguments, it is presumed that the output will be canonicalized after 1308 * this function, which will remove any duplicate path separator. 1309 */ 1310 *absolute_url = apr_pstrcat(pool, repos_root_url, relative_url + 1, 1311 (char *)NULL); 1312 1313 return SVN_NO_ERROR; 1314} 1315 1316