path.c revision 362181
1/*
2 * paths.c:   a path manipulation library using svn_stringbuf_t
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <string.h>
27#include <assert.h>
28
29#include <apr_file_info.h>
30#include <apr_lib.h>
31#include <apr_uri.h>
32
33#include "svn_string.h"
34#include "svn_dirent_uri.h"
35#include "svn_path.h"
36#include "svn_private_config.h"         /* for SVN_PATH_LOCAL_SEPARATOR */
37#include "svn_utf.h"
38#include "svn_io.h"                     /* for svn_io_stat() */
39#include "svn_ctype.h"
40
41#include "dirent_uri.h"
42
43
44/* The canonical empty path.  Can this be changed?  Well, change the empty
45   test below and the path library will work, not so sure about the fs/wc
46   libraries. */
47#define SVN_EMPTY_PATH ""
48
49/* TRUE if s is the canonical empty path, FALSE otherwise */
50#define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
51
52/* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
53   this be changed?  Well, the path library will work, not so sure about
54   the OS! */
55#define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
56
57
58
59
60#ifndef NDEBUG
61/* This function is an approximation of svn_path_is_canonical.
62 * It is supposed to be used in functions that do not have access
63 * to a pool, but still want to assert that a path is canonical.
64 *
65 * PATH with length LEN is assumed to be canonical if it isn't
66 * the platform's empty path (see definition of SVN_PATH_IS_PLATFORM_EMPTY),
67 * and does not contain "/./", and any one of the following
68 * conditions is also met:
69 *
70 *  1. PATH has zero length
71 *  2. PATH is the root directory (what exactly a root directory is
72 *                                depends on the platform)
73 *  3. PATH is not a root directory and does not end with '/'
74 *
75 * If possible, please use svn_path_is_canonical instead.
76 */
77static svn_boolean_t
78is_canonical(const char *path,
79             apr_size_t len)
80{
81  return (! SVN_PATH_IS_PLATFORM_EMPTY(path, len)
82          && strstr(path, "/./") == NULL
83          && (len == 0
84              || (len == 1 && path[0] == '/')
85              || (path[len-1] != '/')
86#if defined(WIN32) || defined(__CYGWIN__)
87              || svn_dirent_is_root(path, len)
88#endif
89              ));
90}
91#endif
92
93
94/* functionality of svn_path_is_canonical but without the deprecation */
95static svn_boolean_t
96svn_path_is_canonical_internal(const char *path, apr_pool_t *pool)
97{
98  return svn_uri_is_canonical(path, pool) ||
99      svn_dirent_is_canonical(path, pool) ||
100      svn_relpath_is_canonical(path);
101}
102
103svn_boolean_t
104svn_path_is_canonical(const char *path, apr_pool_t *pool)
105{
106  return svn_path_is_canonical_internal(path, pool);
107}
108
109/* functionality of svn_path_join but without the deprecation */
110static char *
111svn_path_join_internal(const char *base,
112                       const char *component,
113                       apr_pool_t *pool)
114{
115  apr_size_t blen = strlen(base);
116  apr_size_t clen = strlen(component);
117  char *path;
118
119  assert(svn_path_is_canonical_internal(base, pool));
120  assert(svn_path_is_canonical_internal(component, pool));
121
122  /* If the component is absolute, then return it.  */
123  if (*component == '/')
124    return apr_pmemdup(pool, component, clen + 1);
125
126  /* If either is empty return the other */
127  if (SVN_PATH_IS_EMPTY(base))
128    return apr_pmemdup(pool, component, clen + 1);
129  if (SVN_PATH_IS_EMPTY(component))
130    return apr_pmemdup(pool, base, blen + 1);
131
132  if (blen == 1 && base[0] == '/')
133    blen = 0; /* Ignore base, just return separator + component */
134
135  /* Construct the new, combined path. */
136  path = apr_palloc(pool, blen + 1 + clen + 1);
137  memcpy(path, base, blen);
138  path[blen] = '/';
139  memcpy(path + blen + 1, component, clen + 1);
140
141  return path;
142}
143
144char *svn_path_join(const char *base,
145                    const char *component,
146                    apr_pool_t *pool)
147{
148  return svn_path_join_internal(base, component, pool);
149}
150
151char *svn_path_join_many(apr_pool_t *pool, const char *base, ...)
152{
153#define MAX_SAVED_LENGTHS 10
154  apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
155  apr_size_t total_len;
156  int nargs;
157  va_list va;
158  const char *s;
159  apr_size_t len;
160  char *path;
161  char *p;
162  svn_boolean_t base_is_empty = FALSE, base_is_root = FALSE;
163  int base_arg = 0;
164
165  total_len = strlen(base);
166
167  assert(svn_path_is_canonical_internal(base, pool));
168
169  if (total_len == 1 && *base == '/')
170    base_is_root = TRUE;
171  else if (SVN_PATH_IS_EMPTY(base))
172    {
173      total_len = sizeof(SVN_EMPTY_PATH) - 1;
174      base_is_empty = TRUE;
175    }
176
177  saved_lengths[0] = total_len;
178
179  /* Compute the length of the resulting string. */
180
181  nargs = 0;
182  va_start(va, base);
183  while ((s = va_arg(va, const char *)) != NULL)
184    {
185      len = strlen(s);
186
187      assert(svn_path_is_canonical_internal(s, pool));
188
189      if (SVN_PATH_IS_EMPTY(s))
190        continue;
191
192      if (nargs++ < MAX_SAVED_LENGTHS)
193        saved_lengths[nargs] = len;
194
195      if (*s == '/')
196        {
197          /* an absolute path. skip all components to this point and reset
198             the total length. */
199          total_len = len;
200          base_arg = nargs;
201          base_is_root = len == 1;
202          base_is_empty = FALSE;
203        }
204      else if (nargs == base_arg
205               || (nargs == base_arg + 1 && base_is_root)
206               || base_is_empty)
207        {
208          /* if we have skipped everything up to this arg, then the base
209             and all prior components are empty. just set the length to
210             this component; do not add a separator.  If the base is empty
211             we can now ignore it. */
212          if (base_is_empty)
213            {
214              base_is_empty = FALSE;
215              total_len = 0;
216            }
217          total_len += len;
218        }
219      else
220        {
221          total_len += 1 + len;
222        }
223    }
224  va_end(va);
225
226  /* base == "/" and no further components. just return that. */
227  if (base_is_root && total_len == 1)
228    return apr_pmemdup(pool, "/", 2);
229
230  /* we got the total size. allocate it, with room for a NULL character. */
231  path = p = apr_palloc(pool, total_len + 1);
232
233  /* if we aren't supposed to skip forward to an absolute component, and if
234     this is not an empty base that we are skipping, then copy the base
235     into the output. */
236  if (base_arg == 0 && ! (SVN_PATH_IS_EMPTY(base) && ! base_is_empty))
237    {
238      if (SVN_PATH_IS_EMPTY(base))
239        memcpy(p, SVN_EMPTY_PATH, len = saved_lengths[0]);
240      else
241        memcpy(p, base, len = saved_lengths[0]);
242      p += len;
243    }
244
245  nargs = 0;
246  va_start(va, base);
247  while ((s = va_arg(va, const char *)) != NULL)
248    {
249      if (SVN_PATH_IS_EMPTY(s))
250        continue;
251
252      if (++nargs < base_arg)
253        continue;
254
255      if (nargs < MAX_SAVED_LENGTHS)
256        len = saved_lengths[nargs];
257      else
258        len = strlen(s);
259
260      /* insert a separator if we aren't copying in the first component
261         (which can happen when base_arg is set). also, don't put in a slash
262         if the prior character is a slash (occurs when prior component
263         is "/"). */
264      if (p != path && p[-1] != '/')
265        *p++ = '/';
266
267      /* copy the new component and advance the pointer */
268      memcpy(p, s, len);
269      p += len;
270    }
271  va_end(va);
272
273  *p = '\0';
274  assert((apr_size_t)(p - path) == total_len);
275
276  return path;
277}
278
279
280
281apr_size_t
282svn_path_component_count(const char *path)
283{
284  apr_size_t count = 0;
285
286  assert(is_canonical(path, strlen(path)));
287
288  while (*path)
289    {
290      const char *start;
291
292      while (*path == '/')
293        ++path;
294
295      start = path;
296
297      while (*path && *path != '/')
298        ++path;
299
300      if (path != start)
301        ++count;
302    }
303
304  return count;
305}
306
307
308/* Return the length of substring necessary to encompass the entire
309 * previous path segment in PATH, which should be a LEN byte string.
310 *
311 * A trailing slash will not be included in the returned length except
312 * in the case in which PATH is absolute and there are no more
313 * previous segments.
314 */
315static apr_size_t
316previous_segment(const char *path,
317                 apr_size_t len)
318{
319  if (len == 0)
320    return 0;
321
322  while (len > 0 && path[--len] != '/')
323    ;
324
325  if (len == 0 && path[0] == '/')
326    return 1;
327  else
328    return len;
329}
330
331
332void
333svn_path_add_component(svn_stringbuf_t *path,
334                       const char *component)
335{
336  apr_size_t len = strlen(component);
337
338  assert(is_canonical(path->data, path->len));
339  assert(is_canonical(component, strlen(component)));
340
341  /* Append a dir separator, but only if this path is neither empty
342     nor consists of a single dir separator already. */
343  if ((! SVN_PATH_IS_EMPTY(path->data))
344      && (! ((path->len == 1) && (*(path->data) == '/'))))
345    {
346      char dirsep = '/';
347      svn_stringbuf_appendbytes(path, &dirsep, sizeof(dirsep));
348    }
349
350  svn_stringbuf_appendbytes(path, component, len);
351}
352
353
354void
355svn_path_remove_component(svn_stringbuf_t *path)
356{
357  assert(is_canonical(path->data, path->len));
358
359  path->len = previous_segment(path->data, path->len);
360  path->data[path->len] = '\0';
361}
362
363
364void
365svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n)
366{
367  while (n > 0)
368    {
369      svn_path_remove_component(path);
370      n--;
371    }
372}
373
374
375char *
376svn_path_dirname(const char *path, apr_pool_t *pool)
377{
378  apr_size_t len = strlen(path);
379
380  assert(svn_path_is_canonical_internal(path, pool));
381
382  return apr_pstrmemdup(pool, path, previous_segment(path, len));
383}
384
385
386char *
387svn_path_basename(const char *path, apr_pool_t *pool)
388{
389  apr_size_t len = strlen(path);
390  apr_size_t start;
391
392  assert(svn_path_is_canonical_internal(path, pool));
393
394  if (len == 1 && path[0] == '/')
395    start = 0;
396  else
397    {
398      start = len;
399      while (start > 0 && path[start - 1] != '/')
400        --start;
401    }
402
403  return apr_pstrmemdup(pool, path + start, len - start);
404}
405
406int
407svn_path_is_empty(const char *path)
408{
409  assert(is_canonical(path, strlen(path)));
410
411  if (SVN_PATH_IS_EMPTY(path))
412    return 1;
413
414  return 0;
415}
416
417int
418svn_path_compare_paths(const char *path1,
419                       const char *path2)
420{
421  apr_size_t path1_len = strlen(path1);
422  apr_size_t path2_len = strlen(path2);
423  apr_size_t min_len = ((path1_len < path2_len) ? path1_len : path2_len);
424  apr_size_t i = 0;
425
426  assert(is_canonical(path1, path1_len));
427  assert(is_canonical(path2, path2_len));
428
429  /* Skip past common prefix. */
430  while (i < min_len && path1[i] == path2[i])
431    ++i;
432
433  /* Are the paths exactly the same? */
434  if ((path1_len == path2_len) && (i >= min_len))
435    return 0;
436
437  /* Children of paths are greater than their parents, but less than
438     greater siblings of their parents. */
439  if ((path1[i] == '/') && (path2[i] == 0))
440    return 1;
441  if ((path2[i] == '/') && (path1[i] == 0))
442    return -1;
443  if (path1[i] == '/')
444    return -1;
445  if (path2[i] == '/')
446    return 1;
447
448  /* Common prefix was skipped above, next character is compared to
449     determine order.  We need to use an unsigned comparison, though,
450     so a "next character" of NULL (0x00) sorts numerically
451     smallest. */
452  return (unsigned char)(path1[i]) < (unsigned char)(path2[i]) ? -1 : 1;
453}
454
455/* Return the string length of the longest common ancestor of PATH1 and PATH2.
456 *
457 * This function handles everything except the URL-handling logic
458 * of svn_path_get_longest_ancestor, and assumes that PATH1 and
459 * PATH2 are *not* URLs.
460 *
461 * If the two paths do not share a common ancestor, return 0.
462 *
463 * New strings are allocated in POOL.
464 */
465static apr_size_t
466get_path_ancestor_length(const char *path1,
467                         const char *path2,
468                         apr_pool_t *pool)
469{
470  apr_size_t path1_len, path2_len;
471  apr_size_t i = 0;
472  apr_size_t last_dirsep = 0;
473
474  path1_len = strlen(path1);
475  path2_len = strlen(path2);
476
477  if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
478    return 0;
479
480  while (path1[i] == path2[i])
481    {
482      /* Keep track of the last directory separator we hit. */
483      if (path1[i] == '/')
484        last_dirsep = i;
485
486      i++;
487
488      /* If we get to the end of either path, break out. */
489      if ((i == path1_len) || (i == path2_len))
490        break;
491    }
492
493  /* two special cases:
494     1. '/' is the longest common ancestor of '/' and '/foo'
495     2. '/' is the longest common ancestor of '/rif' and '/raf' */
496  if (i == 1 && path1[0] == '/' && path2[0] == '/')
497    return 1;
498
499  /* last_dirsep is now the offset of the last directory separator we
500     crossed before reaching a non-matching byte.  i is the offset of
501     that non-matching byte. */
502  if (((i == path1_len) && (path2[i] == '/'))
503           || ((i == path2_len) && (path1[i] == '/'))
504           || ((i == path1_len) && (i == path2_len)))
505    return i;
506  else
507    if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
508      return 1;
509  return last_dirsep;
510}
511
512
513char *
514svn_path_get_longest_ancestor(const char *path1,
515                              const char *path2,
516                              apr_pool_t *pool)
517{
518  svn_boolean_t path1_is_url = svn_path_is_url(path1);
519  svn_boolean_t path2_is_url = svn_path_is_url(path2);
520
521  /* Are we messing with URLs?  If we have a mix of URLs and non-URLs,
522     there's nothing common between them.  */
523  if (path1_is_url && path2_is_url)
524    {
525      return svn_uri_get_longest_ancestor(path1, path2, pool);
526    }
527  else if ((! path1_is_url) && (! path2_is_url))
528    {
529      return apr_pstrndup(pool, path1,
530                          get_path_ancestor_length(path1, path2, pool));
531    }
532  else
533    {
534      /* A URL and a non-URL => no common prefix */
535      return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
536    }
537}
538
539const char *
540svn_path_is_child(const char *path1,
541                  const char *path2,
542                  apr_pool_t *pool)
543{
544  apr_size_t i;
545
546  /* assert (is_canonical (path1, strlen (path1)));  ### Expensive strlen */
547  /* assert (is_canonical (path2, strlen (path2)));  ### Expensive strlen */
548
549  /* Allow "" and "foo" to be parent/child */
550  if (SVN_PATH_IS_EMPTY(path1))               /* "" is the parent  */
551    {
552      if (SVN_PATH_IS_EMPTY(path2)            /* "" not a child    */
553          || path2[0] == '/')                  /* "/foo" not a child */
554        return NULL;
555      else
556        /* everything else is child */
557        return pool ? apr_pstrdup(pool, path2) : path2;
558    }
559
560  /* Reach the end of at least one of the paths.  How should we handle
561     things like path1:"foo///bar" and path2:"foo/bar/baz"?  It doesn't
562     appear to arise in the current Subversion code, it's not clear to me
563     if they should be parent/child or not. */
564  for (i = 0; path1[i] && path2[i]; i++)
565    if (path1[i] != path2[i])
566      return NULL;
567
568  /* There are two cases that are parent/child
569          ...      path1[i] == '\0'
570          .../foo  path2[i] == '/'
571      or
572          /        path1[i] == '\0'
573          /foo     path2[i] != '/'
574  */
575  if (path1[i] == '\0' && path2[i])
576    {
577      if (path2[i] == '/')
578        return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
579      else if (i == 1 && path1[0] == '/')
580        return pool ? apr_pstrdup(pool, path2 + 1) : path2 + 1;
581    }
582
583  /* Otherwise, path2 isn't a child. */
584  return NULL;
585}
586
587
588svn_boolean_t
589svn_path_is_ancestor(const char *path1, const char *path2)
590{
591  apr_size_t path1_len = strlen(path1);
592
593  /* If path1 is empty and path2 is not absoulte, then path1 is an ancestor. */
594  if (SVN_PATH_IS_EMPTY(path1))
595    return *path2 != '/';
596
597  /* If path1 is a prefix of path2, then:
598     - If path1 ends in a path separator,
599     - If the paths are of the same length
600     OR
601     - path2 starts a new path component after the common prefix,
602     then path1 is an ancestor. */
603  if (strncmp(path1, path2, path1_len) == 0)
604    return path1[path1_len - 1] == '/'
605      || (path2[path1_len] == '/' || path2[path1_len] == '\0');
606
607  return FALSE;
608}
609
610
611apr_array_header_t *
612svn_path_decompose(const char *path,
613                   apr_pool_t *pool)
614{
615  apr_size_t i, oldi;
616
617  apr_array_header_t *components =
618    apr_array_make(pool, 1, sizeof(const char *));
619
620  assert(svn_path_is_canonical_internal(path, pool));
621
622  if (SVN_PATH_IS_EMPTY(path))
623    return components;  /* ### Should we return a "" component? */
624
625  /* If PATH is absolute, store the '/' as the first component. */
626  i = oldi = 0;
627  if (path[i] == '/')
628    {
629      char dirsep = '/';
630
631      APR_ARRAY_PUSH(components, const char *)
632        = apr_pstrmemdup(pool, &dirsep, sizeof(dirsep));
633
634      i++;
635      oldi++;
636      if (path[i] == '\0') /* path is a single '/' */
637        return components;
638    }
639
640  do
641    {
642      if ((path[i] == '/') || (path[i] == '\0'))
643        {
644          if (SVN_PATH_IS_PLATFORM_EMPTY(path + oldi, i - oldi))
645            APR_ARRAY_PUSH(components, const char *) = SVN_EMPTY_PATH;
646          else
647            APR_ARRAY_PUSH(components, const char *)
648              = apr_pstrmemdup(pool, path + oldi, i - oldi);
649
650          i++;
651          oldi = i;  /* skipping past the dirsep */
652          continue;
653        }
654      i++;
655    }
656  while (path[i-1]);
657
658  return components;
659}
660
661
662const char *
663svn_path_compose(const apr_array_header_t *components,
664                 apr_pool_t *pool)
665{
666  apr_size_t *lengths = apr_palloc(pool, components->nelts*sizeof(*lengths));
667  apr_size_t max_length = components->nelts;
668  char *path;
669  char *p;
670  int i;
671
672  /* Get the length of each component so a total length can be
673     calculated. */
674  for (i = 0; i < components->nelts; ++i)
675    {
676      apr_size_t l = strlen(APR_ARRAY_IDX(components, i, const char *));
677      lengths[i] = l;
678      max_length += l;
679    }
680
681  path = apr_palloc(pool, max_length + 1);
682  p = path;
683
684  for (i = 0; i < components->nelts; ++i)
685    {
686      /* Append a '/' to the path.  Handle the case with an absolute
687         path where a '/' appears in the first component.  Only append
688         a '/' if the component is the second component that does not
689         follow a "/" first component; or it is the third or later
690         component. */
691      if (i > 1 ||
692          (i == 1 && strcmp("/", APR_ARRAY_IDX(components,
693                                               0,
694                                               const char *)) != 0))
695        {
696          *p++ = '/';
697        }
698
699      memcpy(p, APR_ARRAY_IDX(components, i, const char *), lengths[i]);
700      p += lengths[i];
701    }
702
703  *p = '\0';
704
705  return path;
706}
707
708
709svn_boolean_t
710svn_path_is_single_path_component(const char *name)
711{
712  assert(is_canonical(name, strlen(name)));
713
714  /* Can't be empty or `..'  */
715  if (SVN_PATH_IS_EMPTY(name)
716      || (name[0] == '.' && name[1] == '.' && name[2] == '\0'))
717    return FALSE;
718
719  /* Slashes are bad, m'kay... */
720  if (strchr(name, '/') != NULL)
721    return FALSE;
722
723  /* It is valid.  */
724  return TRUE;
725}
726
727
728svn_boolean_t
729svn_path_is_dotpath_present(const char *path)
730{
731  size_t len;
732
733  /* The empty string does not have a dotpath */
734  if (path[0] == '\0')
735    return FALSE;
736
737  /* Handle "." or a leading "./" */
738  if (path[0] == '.' && (path[1] == '\0' || path[1] == '/'))
739    return TRUE;
740
741  /* Paths of length 1 (at this point) have no dotpath present. */
742  if (path[1] == '\0')
743    return FALSE;
744
745  /* If any segment is "/./", then a dotpath is present. */
746  if (strstr(path, "/./") != NULL)
747    return TRUE;
748
749  /* Does the path end in "/." ? */
750  len = strlen(path);
751  return path[len - 2] == '/' && path[len - 1] == '.';
752}
753
754svn_boolean_t
755svn_path_is_backpath_present(const char *path)
756{
757  size_t len;
758
759  /* 0 and 1-length paths do not have a backpath */
760  if (path[0] == '\0' || path[1] == '\0')
761    return FALSE;
762
763  /* Handle ".." or a leading "../" */
764  if (path[0] == '.' && path[1] == '.' && (path[2] == '\0' || path[2] == '/'))
765    return TRUE;
766
767  /* Paths of length 2 (at this point) have no backpath present. */
768  if (path[2] == '\0')
769    return FALSE;
770
771  /* If any segment is "..", then a backpath is present. */
772  if (strstr(path, "/../") != NULL)
773    return TRUE;
774
775  /* Does the path end in "/.." ? */
776  len = strlen(path);
777  return path[len - 3] == '/' && path[len - 2] == '.' && path[len - 1] == '.';
778}
779
780
781/*** URI Stuff ***/
782
783/* Examine PATH as a potential URI, and return a substring of PATH
784   that immediately follows the (scheme):// portion of the URI, or
785   NULL if PATH doesn't appear to be a valid URI.  The returned value
786   is not alloced -- it shares memory with PATH. */
787static const char *
788skip_uri_scheme(const char *path)
789{
790  apr_size_t j;
791
792  /* A scheme is terminated by a : and cannot contain any /'s. */
793  for (j = 0; path[j] && path[j] != ':'; ++j)
794    if (path[j] == '/')
795      return NULL;
796
797  if (j > 0 && path[j] == ':' && path[j+1] == '/' && path[j+2] == '/')
798    return path + j + 3;
799
800  return NULL;
801}
802
803
804svn_boolean_t
805svn_path_is_url(const char *path)
806{
807  /* ### This function is reaaaaaaaaaaaaaally stupid right now.
808     We're just going to look for:
809
810        (scheme)://(optional_stuff)
811
812     Where (scheme) has no ':' or '/' characters.
813
814     Someday it might be nice to have an actual URI parser here.
815  */
816  return skip_uri_scheme(path) != NULL;
817}
818
819
820
821/* Here is the BNF for path components in a URI. "pchar" is a
822   character in a path component.
823
824      pchar       = unreserved | escaped |
825                    ":" | "@" | "&" | "=" | "+" | "$" | ","
826      unreserved  = alphanum | mark
827      mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
828
829   Note that "escaped" doesn't really apply to what users can put in
830   their paths, so that really means the set of characters is:
831
832      alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | ","
833*/
834const char svn_uri__char_validity[256] = {
835  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
836  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
837  0, 1, 0, 0, 1, 0, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,
838  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 1, 0, 0,
839
840  /* 64 */
841  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,
842  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 0, 0, 1,
843  0, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,
844  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 0, 1, 0,
845
846  /* 128 */
847  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
848  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
849  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
850  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
851
852  /* 192 */
853  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
854  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
855  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
856  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
857};
858
859
860svn_boolean_t
861svn_path_is_uri_safe(const char *path)
862{
863  apr_size_t i;
864
865  /* Skip the URI scheme. */
866  path = skip_uri_scheme(path);
867
868  /* No scheme?  Get outta here. */
869  if (! path)
870    return FALSE;
871
872  /* Skip to the first slash that's after the URI scheme. */
873  path = strchr(path, '/');
874
875  /* If there's no first slash, then there's only a host portion;
876     therefore there couldn't be any uri-unsafe characters after the
877     host... so return true. */
878  if (path == NULL)
879    return TRUE;
880
881  for (i = 0; path[i]; i++)
882    {
883      /* Allow '%XX' (where each X is a hex digit) */
884      if (path[i] == '%')
885        {
886          if (svn_ctype_isxdigit(path[i + 1]) &&
887              svn_ctype_isxdigit(path[i + 2]))
888            {
889              i += 2;
890              continue;
891            }
892          return FALSE;
893        }
894      else if (! svn_uri__char_validity[((unsigned char)path[i])])
895        {
896          return FALSE;
897        }
898    }
899
900  return TRUE;
901}
902
903
904/* URI-encode each character c in PATH for which TABLE[c] is 0.
905   If no encoding was needed, return PATH, else return a new string allocated
906   in POOL. */
907static const char *
908uri_escape(const char *path, const char table[], apr_pool_t *pool)
909{
910  svn_stringbuf_t *retstr;
911  apr_size_t i, copied = 0;
912  int c;
913  apr_size_t len;
914  const char *p;
915
916  /* To terminate our scanning loop, table[NUL] must report "invalid". */
917  assert(table[0] == 0);
918
919  /* Quick check: Does any character need escaping? */
920  for (p = path; table[(unsigned char)*p]; ++p)
921    {}
922
923  /* No char to escape before EOS? */
924  if (*p == '\0')
925    return path;
926
927  /* We need to escape at least one character. */
928  len = strlen(p) + (p - path);
929  retstr = svn_stringbuf_create_ensure(len, pool);
930  for (i = p - path; i < len; i++)
931    {
932      c = (unsigned char)path[i];
933      if (table[c])
934        continue;
935
936      /* If we got here, we're looking at a character that isn't
937         supported by the (or at least, our) URI encoding scheme.  We
938         need to escape this character.  */
939
940      /* First things first, copy all the good stuff that we haven't
941         yet copied into our output buffer. */
942      if (i - copied)
943        svn_stringbuf_appendbytes(retstr, path + copied,
944                                  i - copied);
945
946      /* Now, write in our escaped character, consisting of the
947         '%' and two digits.  We cast the C to unsigned char here because
948         the 'X' format character will be tempted to treat it as an unsigned
949         int...which causes problem when messing with 0x80-0xFF chars.
950         We also need space for a null as apr_snprintf will write one. */
951      svn_stringbuf_ensure(retstr, retstr->len + 4);
952      apr_snprintf(retstr->data + retstr->len, 4, "%%%02X", (unsigned char)c);
953      retstr->len += 3;
954
955      /* Finally, update our copy counter. */
956      copied = i + 1;
957    }
958
959  /* Anything left to copy? */
960  if (i - copied)
961    svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
962
963  /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf
964     functions. */
965
966  return retstr->data;
967}
968
969
970const char *
971svn_path_uri_encode(const char *path, apr_pool_t *pool)
972{
973  const char *ret;
974
975  ret = uri_escape(path, svn_uri__char_validity, pool);
976
977  /* Our interface guarantees a copy. */
978  if (ret == path)
979    return apr_pstrdup(pool, path);
980  else
981    return ret;
982}
983
984static const char iri_escape_chars[256] = {
985  0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
986  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
987  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
988  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
989  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
990  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
991  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
992  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
993
994  /* 128 */
995  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
996  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
997  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
998  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
999  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1000  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1001  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1002  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0
1003};
1004
1005const char *
1006svn_path_uri_from_iri(const char *iri, apr_pool_t *pool)
1007{
1008  return uri_escape(iri, iri_escape_chars, pool);
1009}
1010
1011static const char uri_autoescape_chars[256] = {
1012  0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1013  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1014  0, 1, 0, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1015  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 0, 1, 0, 1,
1016
1017  /* 64 */
1018  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1019  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 0, 1, 0, 1,
1020  0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1021  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 1, 1,
1022
1023  /* 128 */
1024  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1025  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1026  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1027  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1028
1029  /* 192 */
1030  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1031  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1032  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1033  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1034};
1035
1036const char *
1037svn_path_uri_autoescape(const char *uri, apr_pool_t *pool)
1038{
1039  return uri_escape(uri, uri_autoescape_chars, pool);
1040}
1041
1042const char *
1043svn_path_uri_decode(const char *path, apr_pool_t *pool)
1044{
1045  svn_stringbuf_t *retstr;
1046  apr_size_t i;
1047  svn_boolean_t query_start = FALSE;
1048
1049  /* avoid repeated realloc */
1050  retstr = svn_stringbuf_create_ensure(strlen(path) + 1, pool);
1051
1052  retstr->len = 0;
1053  for (i = 0; path[i]; i++)
1054    {
1055      char c = path[i];
1056
1057      if (c == '?')
1058        {
1059          /* Mark the start of the query string, if it exists. */
1060          query_start = TRUE;
1061        }
1062      else if (c == '+' && query_start)
1063        {
1064          /* Only do this if we are into the query string.
1065           * RFC 2396, section 3.3  */
1066          c = ' ';
1067        }
1068      else if (c == '%' && svn_ctype_isxdigit(path[i + 1])
1069               && svn_ctype_isxdigit(path[i+2]))
1070        {
1071          char digitz[3];
1072          digitz[0] = path[++i];
1073          digitz[1] = path[++i];
1074          digitz[2] = '\0';
1075          c = (char)(strtol(digitz, NULL, 16));
1076        }
1077
1078      retstr->data[retstr->len++] = c;
1079    }
1080
1081  /* Null-terminate this bad-boy. */
1082  retstr->data[retstr->len] = 0;
1083
1084  return retstr->data;
1085}
1086
1087
1088const char *
1089svn_path_url_add_component2(const char *url,
1090                            const char *component,
1091                            apr_pool_t *pool)
1092{
1093  /* = svn_path_uri_encode() but without always copying */
1094  component = uri_escape(component, svn_uri__char_validity, pool);
1095
1096  return svn_path_join_internal(url, component, pool);
1097}
1098
1099svn_error_t *
1100svn_path_get_absolute(const char **pabsolute,
1101                      const char *relative,
1102                      apr_pool_t *pool)
1103{
1104  if (svn_path_is_url(relative))
1105    {
1106      *pabsolute = apr_pstrdup(pool, relative);
1107      return SVN_NO_ERROR;
1108    }
1109
1110  return svn_dirent_get_absolute(pabsolute, relative, pool);
1111}
1112
1113
1114#if !defined(WIN32) && !defined(DARWIN)
1115/** Get APR's internal path encoding. */
1116static svn_error_t *
1117get_path_encoding(svn_boolean_t *path_is_utf8, apr_pool_t *pool)
1118{
1119  apr_status_t apr_err;
1120  int encoding_style;
1121
1122  apr_err = apr_filepath_encoding(&encoding_style, pool);
1123  if (apr_err)
1124    return svn_error_wrap_apr(apr_err,
1125                              _("Can't determine the native path encoding"));
1126
1127  /* ### What to do about APR_FILEPATH_ENCODING_UNKNOWN?
1128     Well, for now we'll just punt to the svn_utf_ functions;
1129     those will at least do the ASCII-subset check. */
1130  *path_is_utf8 = (encoding_style == APR_FILEPATH_ENCODING_UTF8);
1131  return SVN_NO_ERROR;
1132}
1133#endif
1134
1135
1136svn_error_t *
1137svn_path_cstring_from_utf8(const char **path_apr,
1138                           const char *path_utf8,
1139                           apr_pool_t *pool)
1140{
1141#if !defined(WIN32) && !defined(DARWIN)
1142  svn_boolean_t path_is_utf8;
1143  SVN_ERR(get_path_encoding(&path_is_utf8, pool));
1144  if (path_is_utf8)
1145#endif
1146    {
1147      *path_apr = apr_pstrdup(pool, path_utf8);
1148      return SVN_NO_ERROR;
1149    }
1150#if !defined(WIN32) && !defined(DARWIN)
1151  else
1152    return svn_utf_cstring_from_utf8(path_apr, path_utf8, pool);
1153#endif
1154}
1155
1156
1157svn_error_t *
1158svn_path_cstring_to_utf8(const char **path_utf8,
1159                         const char *path_apr,
1160                         apr_pool_t *pool)
1161{
1162#if !defined(WIN32) && !defined(DARWIN)
1163  svn_boolean_t path_is_utf8;
1164  SVN_ERR(get_path_encoding(&path_is_utf8, pool));
1165  if (path_is_utf8)
1166#endif
1167    {
1168      *path_utf8 = apr_pstrdup(pool, path_apr);
1169      return SVN_NO_ERROR;
1170    }
1171#if !defined(WIN32) && !defined(DARWIN)
1172  else
1173    return svn_utf_cstring_to_utf8(path_utf8, path_apr, pool);
1174#endif
1175}
1176
1177
1178const char *
1179svn_path_illegal_path_escape(const char *path, apr_pool_t *pool)
1180{
1181  svn_stringbuf_t *retstr;
1182  apr_size_t i, copied = 0;
1183  int c;
1184
1185  /* At least one control character:
1186      strlen - 1 (control) + \ + N + N + N + null . */
1187  retstr = svn_stringbuf_create_ensure(strlen(path) + 4, pool);
1188  for (i = 0; path[i]; i++)
1189    {
1190      c = (unsigned char)path[i];
1191      if (! svn_ctype_iscntrl(c))
1192        continue;
1193
1194      /* If we got here, we're looking at a character that isn't
1195         supported by the (or at least, our) URI encoding scheme.  We
1196         need to escape this character.  */
1197
1198      /* First things first, copy all the good stuff that we haven't
1199         yet copied into our output buffer. */
1200      if (i - copied)
1201        svn_stringbuf_appendbytes(retstr, path + copied,
1202                                  i - copied);
1203
1204      /* Make sure buffer is big enough for '\' 'N' 'N' 'N' (and NUL) */
1205      svn_stringbuf_ensure(retstr, retstr->len + 5);
1206      /*### The backslash separator doesn't work too great with Windows,
1207         but it's what we'll use for consistency with invalid utf8
1208         formatting (until someone has a better idea) */
1209      apr_snprintf(retstr->data + retstr->len, 5, "\\%03o", (unsigned char)c);
1210      retstr->len += 4;
1211
1212      /* Finally, update our copy counter. */
1213      copied = i + 1;
1214    }
1215
1216  /* If we didn't encode anything, we don't need to duplicate the string. */
1217  if (retstr->len == 0)
1218    return path;
1219
1220  /* Anything left to copy? */
1221  if (i - copied)
1222    svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
1223
1224  /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf
1225     functions. */
1226
1227  return retstr->data;
1228}
1229
1230svn_error_t *
1231svn_path_check_valid(const char *path, apr_pool_t *pool)
1232{
1233  const char *c;
1234
1235  for (c = path; *c; c++)
1236    {
1237      if (svn_ctype_iscntrl(*c))
1238        {
1239          return svn_error_createf(SVN_ERR_FS_PATH_SYNTAX, NULL,
1240             _("Invalid control character '0x%02x' in path '%s'"),
1241             (unsigned char)*c,
1242             svn_path_illegal_path_escape(svn_dirent_local_style(path, pool),
1243                                          pool));
1244        }
1245    }
1246
1247  return SVN_NO_ERROR;
1248}
1249
1250void
1251svn_path_splitext(const char **path_root,
1252                  const char **path_ext,
1253                  const char *path,
1254                  apr_pool_t *pool)
1255{
1256  const char *last_dot, *last_slash;
1257
1258  /* Easy out -- why do all the work when there's no way to report it? */
1259  if (! (path_root || path_ext))
1260    return;
1261
1262  /* Do we even have a period in this thing?  And if so, is there
1263     anything after it?  We look for the "rightmost" period in the
1264     string. */
1265  last_dot = strrchr(path, '.');
1266  if (last_dot && (*(last_dot + 1) != '\0'))
1267    {
1268      /* If we have a period, we need to make sure it occurs in the
1269         final path component -- that there's no path separator
1270         between the last period and the end of the PATH -- otherwise,
1271         it doesn't count.  Also, we want to make sure that our period
1272         isn't the first character of the last component. */
1273      last_slash = strrchr(path, '/');
1274      if ((last_slash && (last_dot > (last_slash + 1)))
1275          || ((! last_slash) && (last_dot > path)))
1276        {
1277          if (path_root)
1278            *path_root = apr_pstrmemdup(pool, path,
1279                                        (last_dot - path + 1) * sizeof(*path));
1280          if (path_ext)
1281            *path_ext = apr_pstrdup(pool, last_dot + 1);
1282          return;
1283        }
1284    }
1285  /* If we get here, we never found a suitable separator character, so
1286     there's no split. */
1287  if (path_root)
1288    *path_root = apr_pstrdup(pool, path);
1289  if (path_ext)
1290    *path_ext = "";
1291}
1292
1293
1294/* Repository relative URLs (^/). */
1295
1296svn_boolean_t
1297svn_path_is_repos_relative_url(const char *path)
1298{
1299  return (0 == strncmp("^/", path, 2));
1300}
1301
1302svn_error_t *
1303svn_path_resolve_repos_relative_url(const char **absolute_url,
1304                                    const char *relative_url,
1305                                    const char *repos_root_url,
1306                                    apr_pool_t *pool)
1307{
1308  if (! svn_path_is_repos_relative_url(relative_url))
1309    return svn_error_createf(SVN_ERR_BAD_URL, NULL,
1310                             _("Improper relative URL '%s'"),
1311                             relative_url);
1312
1313  /* No assumptions are made about the canonicalization of the input
1314   * arguments, it is presumed that the output will be canonicalized after
1315   * this function, which will remove any duplicate path separator.
1316   */
1317  *absolute_url = apr_pstrcat(pool, repos_root_url, relative_url + 1,
1318                              SVN_VA_NULL);
1319
1320  return SVN_NO_ERROR;
1321}
1322
1323