1/*
2 * dirent_uri.c:   a library to manipulate URIs and directory entries.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <string.h>
27#include <assert.h>
28#include <ctype.h>
29
30#include <apr_uri.h>
31#include <apr_lib.h>
32
33#include "svn_private_config.h"
34#include "svn_string.h"
35#include "svn_dirent_uri.h"
36#include "svn_path.h"
37#include "svn_ctype.h"
38
39#include "dirent_uri.h"
40#include "private/svn_fspath.h"
41#include "private/svn_cert.h"
42
43/* The canonical empty path.  Can this be changed?  Well, change the empty
44   test below and the path library will work, not so sure about the fs/wc
45   libraries. */
46#define SVN_EMPTY_PATH ""
47
48/* TRUE if s is the canonical empty path, FALSE otherwise */
49#define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
50
51/* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
52   this be changed?  Well, the path library will work, not so sure about
53   the OS! */
54#define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
55
56/* This check must match the check on top of dirent_uri-tests.c and
57   path-tests.c */
58#if defined(WIN32) || defined(__CYGWIN__) || defined(__OS2__)
59#define SVN_USE_DOS_PATHS
60#endif
61
62/* Path type definition. Used only by internal functions. */
63typedef enum path_type_t {
64  type_uri,
65  type_dirent,
66  type_relpath
67} path_type_t;
68
69
70/**** Forward declarations *****/
71
72static svn_boolean_t
73relpath_is_canonical(const char *relpath);
74
75
76/**** Internal implementation functions *****/
77
78/* Return an internal-style new path based on PATH, allocated in POOL.
79 *
80 * "Internal-style" means that separators are all '/'.
81 */
82static const char *
83internal_style(const char *path, apr_pool_t *pool)
84{
85#if '/' != SVN_PATH_LOCAL_SEPARATOR
86    {
87      char *p = apr_pstrdup(pool, path);
88      path = p;
89
90      /* Convert all local-style separators to the canonical ones. */
91      for (; *p != '\0'; ++p)
92        if (*p == SVN_PATH_LOCAL_SEPARATOR)
93          *p = '/';
94    }
95#endif
96
97  return path;
98}
99
100/* Locale insensitive tolower() for converting parts of dirents and urls
101   while canonicalizing */
102static char
103canonicalize_to_lower(char c)
104{
105  if (c < 'A' || c > 'Z')
106    return c;
107  else
108    return (char)(c - 'A' + 'a');
109}
110
111/* Locale insensitive toupper() for converting parts of dirents and urls
112   while canonicalizing */
113static char
114canonicalize_to_upper(char c)
115{
116  if (c < 'a' || c > 'z')
117    return c;
118  else
119    return (char)(c - 'a' + 'A');
120}
121
122/* Calculates the length of the dirent absolute or non absolute root in
123   DIRENT, return 0 if dirent is not rooted  */
124static apr_size_t
125dirent_root_length(const char *dirent, apr_size_t len)
126{
127#ifdef SVN_USE_DOS_PATHS
128  if (len >= 2 && dirent[1] == ':' &&
129      ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
130       (dirent[0] >= 'a' && dirent[0] <= 'z')))
131    {
132      return (len > 2 && dirent[2] == '/') ? 3 : 2;
133    }
134
135  if (len > 2 && dirent[0] == '/' && dirent[1] == '/')
136    {
137      apr_size_t i = 2;
138
139      while (i < len && dirent[i] != '/')
140        i++;
141
142      if (i == len)
143        return len; /* Cygwin drive alias, invalid path on WIN32 */
144
145      i++; /* Skip '/' */
146
147      while (i < len && dirent[i] != '/')
148        i++;
149
150      return i;
151    }
152#endif /* SVN_USE_DOS_PATHS */
153  if (len >= 1 && dirent[0] == '/')
154    return 1;
155
156  return 0;
157}
158
159
160/* Return the length of substring necessary to encompass the entire
161 * previous dirent segment in DIRENT, which should be a LEN byte string.
162 *
163 * A trailing slash will not be included in the returned length except
164 * in the case in which DIRENT is absolute and there are no more
165 * previous segments.
166 */
167static apr_size_t
168dirent_previous_segment(const char *dirent,
169                        apr_size_t len)
170{
171  if (len == 0)
172    return 0;
173
174  --len;
175  while (len > 0 && dirent[len] != '/'
176#ifdef SVN_USE_DOS_PATHS
177                 && (dirent[len] != ':' || len != 1)
178#endif /* SVN_USE_DOS_PATHS */
179        )
180    --len;
181
182  /* check if the remaining segment including trailing '/' is a root dirent */
183  if (dirent_root_length(dirent, len+1) == len + 1)
184    return len + 1;
185  else
186    return len;
187}
188
189/* Calculates the length occupied by the schema defined root of URI */
190static apr_size_t
191uri_schema_root_length(const char *uri, apr_size_t len)
192{
193  apr_size_t i;
194
195  for (i = 0; i < len; i++)
196    {
197      if (uri[i] == '/')
198        {
199          if (i > 0 && uri[i-1] == ':' && i < len-1 && uri[i+1] == '/')
200            {
201              /* We have an absolute uri */
202              if (i == 5 && strncmp("file", uri, 4) == 0)
203                return 7; /* file:// */
204              else
205                {
206                  for (i += 2; i < len; i++)
207                    if (uri[i] == '/')
208                      return i;
209
210                  return len; /* Only a hostname is found */
211                }
212            }
213          else
214            return 0;
215        }
216    }
217
218  return 0;
219}
220
221/* Returns TRUE if svn_dirent_is_absolute(dirent) or when dirent has
222   a non absolute root. (E.g. '/' or 'F:' on Windows) */
223static svn_boolean_t
224dirent_is_rooted(const char *dirent)
225{
226  if (! dirent)
227    return FALSE;
228
229  /* Root on all systems */
230  if (dirent[0] == '/')
231    return TRUE;
232
233  /* On Windows, dirent is also absolute when it starts with 'H:' or 'H:/'
234     where 'H' is any letter. */
235#ifdef SVN_USE_DOS_PATHS
236  if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
237       (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
238      (dirent[1] == ':'))
239     return TRUE;
240#endif /* SVN_USE_DOS_PATHS */
241
242  return FALSE;
243}
244
245/* Return the length of substring necessary to encompass the entire
246 * previous relpath segment in RELPATH, which should be a LEN byte string.
247 *
248 * A trailing slash will not be included in the returned length.
249 */
250static apr_size_t
251relpath_previous_segment(const char *relpath,
252                         apr_size_t len)
253{
254  if (len == 0)
255    return 0;
256
257  --len;
258  while (len > 0 && relpath[len] != '/')
259    --len;
260
261  return len;
262}
263
264/* Return the length of substring necessary to encompass the entire
265 * previous uri segment in URI, which should be a LEN byte string.
266 *
267 * A trailing slash will not be included in the returned length except
268 * in the case in which URI is absolute and there are no more
269 * previous segments.
270 */
271static apr_size_t
272uri_previous_segment(const char *uri,
273                     apr_size_t len)
274{
275  apr_size_t root_length;
276  apr_size_t i = len;
277  if (len == 0)
278    return 0;
279
280  root_length = uri_schema_root_length(uri, len);
281
282  --i;
283  while (len > root_length && uri[i] != '/')
284    --i;
285
286  if (i == 0 && len > 1 && *uri == '/')
287    return 1;
288
289  return i;
290}
291
292/* Return the canonicalized version of PATH, of type TYPE, allocated in
293 * POOL.
294 */
295static const char *
296canonicalize(path_type_t type, const char *path, apr_pool_t *pool)
297{
298  char *canon, *dst;
299  const char *src;
300  apr_size_t seglen;
301  apr_size_t schemelen = 0;
302  apr_size_t canon_segments = 0;
303  svn_boolean_t url = FALSE;
304  char *schema_data = NULL;
305
306  /* "" is already canonical, so just return it; note that later code
307     depends on path not being zero-length.  */
308  if (SVN_PATH_IS_EMPTY(path))
309    {
310      assert(type != type_uri);
311      return "";
312    }
313
314  dst = canon = apr_pcalloc(pool, strlen(path) + 1);
315
316  /* If this is supposed to be an URI, it should start with
317     "scheme://".  We'll copy the scheme, host name, etc. to DST and
318     set URL = TRUE. */
319  src = path;
320  if (type == type_uri)
321    {
322      assert(*src != '/');
323
324      while (*src && (*src != '/') && (*src != ':'))
325        src++;
326
327      if (*src == ':' && *(src+1) == '/' && *(src+2) == '/')
328        {
329          const char *seg;
330
331          url = TRUE;
332
333          /* Found a scheme, convert to lowercase and copy to dst. */
334          src = path;
335          while (*src != ':')
336            {
337              *(dst++) = canonicalize_to_lower((*src++));
338              schemelen++;
339            }
340          *(dst++) = ':';
341          *(dst++) = '/';
342          *(dst++) = '/';
343          src += 3;
344          schemelen += 3;
345
346          /* This might be the hostname */
347          seg = src;
348          while (*src && (*src != '/') && (*src != '@'))
349            src++;
350
351          if (*src == '@')
352            {
353              /* Copy the username & password. */
354              seglen = src - seg + 1;
355              memcpy(dst, seg, seglen);
356              dst += seglen;
357              src++;
358            }
359          else
360            src = seg;
361
362          /* Found a hostname, convert to lowercase and copy to dst. */
363          if (*src == '[')
364            {
365             *(dst++) = *(src++); /* Copy '[' */
366
367              while (*src == ':'
368                     || (*src >= '0' && (*src <= '9'))
369                     || (*src >= 'a' && (*src <= 'f'))
370                     || (*src >= 'A' && (*src <= 'F')))
371                {
372                  *(dst++) = canonicalize_to_lower((*src++));
373                }
374
375              if (*src == ']')
376                *(dst++) = *(src++); /* Copy ']' */
377            }
378          else
379            while (*src && (*src != '/') && (*src != ':'))
380              *(dst++) = canonicalize_to_lower((*src++));
381
382          if (*src == ':')
383            {
384              /* We probably have a port number: Is it a default portnumber
385                 which doesn't belong in a canonical url? */
386              if (src[1] == '8' && src[2] == '0'
387                  && (src[3]== '/'|| !src[3])
388                  && !strncmp(canon, "http:", 5))
389                {
390                  src += 3;
391                }
392              else if (src[1] == '4' && src[2] == '4' && src[3] == '3'
393                       && (src[4]== '/'|| !src[4])
394                       && !strncmp(canon, "https:", 6))
395                {
396                  src += 4;
397                }
398              else if (src[1] == '3' && src[2] == '6'
399                       && src[3] == '9' && src[4] == '0'
400                       && (src[5]== '/'|| !src[5])
401                       && !strncmp(canon, "svn:", 4))
402                {
403                  src += 5;
404                }
405              else if (src[1] == '/' || !src[1])
406                {
407                  src += 1;
408                }
409
410              while (*src && (*src != '/'))
411                *(dst++) = canonicalize_to_lower((*src++));
412            }
413
414          /* Copy trailing slash, or null-terminator. */
415          *(dst) = *(src);
416
417          /* Move src and dst forward only if we are not
418           * at null-terminator yet. */
419          if (*src)
420            {
421              src++;
422              dst++;
423              schema_data = dst;
424            }
425
426          canon_segments = 1;
427        }
428    }
429
430  /* Copy to DST any separator or drive letter that must come before the
431     first regular path segment. */
432  if (! url && type != type_relpath)
433    {
434      src = path;
435      /* If this is an absolute path, then just copy over the initial
436         separator character. */
437      if (*src == '/')
438        {
439          *(dst++) = *(src++);
440
441#ifdef SVN_USE_DOS_PATHS
442          /* On Windows permit two leading separator characters which means an
443           * UNC path. */
444          if ((type == type_dirent) && *src == '/')
445            *(dst++) = *(src++);
446#endif /* SVN_USE_DOS_PATHS */
447        }
448#ifdef SVN_USE_DOS_PATHS
449      /* On Windows the first segment can be a drive letter, which we normalize
450         to upper case. */
451      else if (type == type_dirent &&
452               ((*src >= 'a' && *src <= 'z') ||
453                (*src >= 'A' && *src <= 'Z')) &&
454               (src[1] == ':'))
455        {
456          *(dst++) = canonicalize_to_upper(*(src++));
457          /* Leave the ':' to be processed as (or as part of) a path segment
458             by the following code block, so we need not care whether it has
459             a slash after it. */
460        }
461#endif /* SVN_USE_DOS_PATHS */
462    }
463
464  while (*src)
465    {
466      /* Parse each segment, finding the closing '/' (which might look
467         like '%2F' for URIs).  */
468      const char *next = src;
469      apr_size_t slash_len = 0;
470
471      while (*next
472             && (next[0] != '/')
473             && (! (type == type_uri && next[0] == '%' && next[1] == '2' &&
474                    canonicalize_to_upper(next[2]) == 'F')))
475        {
476          ++next;
477        }
478
479      /* Record how long our "slash" is. */
480      if (next[0] == '/')
481        slash_len = 1;
482      else if (type == type_uri && next[0] == '%')
483        slash_len = 3;
484
485      seglen = next - src;
486
487      if (seglen == 0
488          || (seglen == 1 && src[0] == '.')
489          || (type == type_uri && seglen == 3 && src[0] == '%' && src[1] == '2'
490              && canonicalize_to_upper(src[2]) == 'E'))
491        {
492          /* Empty or noop segment, so do nothing.  (For URIs, '%2E'
493             is equivalent to '.').  */
494        }
495#ifdef SVN_USE_DOS_PATHS
496      /* If this is the first path segment of a file:// URI and it contains a
497         windows drive letter, convert the drive letter to upper case. */
498      else if (url && canon_segments == 1 && seglen == 2 &&
499               (strncmp(canon, "file:", 5) == 0) &&
500               src[0] >= 'a' && src[0] <= 'z' && src[1] == ':')
501        {
502          *(dst++) = canonicalize_to_upper(src[0]);
503          *(dst++) = ':';
504          if (*next)
505            *(dst++) = *next;
506          canon_segments++;
507        }
508#endif /* SVN_USE_DOS_PATHS */
509      else
510        {
511          /* An actual segment, append it to the destination path */
512          memcpy(dst, src, seglen);
513          dst += seglen;
514          if (slash_len)
515            *(dst++) = '/';
516          canon_segments++;
517        }
518
519      /* Skip over trailing slash to the next segment. */
520      src = next + slash_len;
521    }
522
523  /* Remove the trailing slash if there was at least one
524   * canonical segment and the last segment ends with a slash.
525   *
526   * But keep in mind that, for URLs, the scheme counts as a
527   * canonical segment -- so if path is ONLY a scheme (such
528   * as "https://") we should NOT remove the trailing slash. */
529  if ((canon_segments > 0 && *(dst - 1) == '/')
530      && ! (url && path[schemelen] == '\0'))
531    {
532      dst --;
533    }
534
535  *dst = '\0';
536
537#ifdef SVN_USE_DOS_PATHS
538  /* Skip leading double slashes when there are less than 2
539   * canon segments. UNC paths *MUST* have two segments. */
540  if ((type == type_dirent) && canon[0] == '/' && canon[1] == '/')
541    {
542      if (canon_segments < 2)
543        return canon + 1;
544      else
545        {
546          /* Now we're sure this is a valid UNC path, convert the server name
547             (the first path segment) to lowercase as Windows treats it as case
548             insensitive.
549             Note: normally the share name is treated as case insensitive too,
550             but it seems to be possible to configure Samba to treat those as
551             case sensitive, so better leave that alone. */
552          for (dst = canon + 2; *dst && *dst != '/'; dst++)
553            *dst = canonicalize_to_lower(*dst);
554        }
555    }
556#endif /* SVN_USE_DOS_PATHS */
557
558  /* Check the normalization of characters in a uri */
559  if (schema_data)
560    {
561      int need_extra = 0;
562      src = schema_data;
563
564      while (*src)
565        {
566          switch (*src)
567            {
568              case '/':
569                break;
570              case '%':
571                if (!svn_ctype_isxdigit(*(src+1)) ||
572                    !svn_ctype_isxdigit(*(src+2)))
573                  need_extra += 2;
574                else
575                  src += 2;
576                break;
577              default:
578                if (!svn_uri__char_validity[(unsigned char)*src])
579                  need_extra += 2;
580                break;
581            }
582          src++;
583        }
584
585      if (need_extra > 0)
586        {
587          apr_size_t pre_schema_size = (apr_size_t)(schema_data - canon);
588
589          dst = apr_palloc(pool, (apr_size_t)(src - canon) + need_extra + 1);
590          memcpy(dst, canon, pre_schema_size);
591          canon = dst;
592
593          dst += pre_schema_size;
594        }
595      else
596        dst = schema_data;
597
598      src = schema_data;
599
600      while (*src)
601        {
602          switch (*src)
603            {
604              case '/':
605                *(dst++) = '/';
606                break;
607              case '%':
608                if (!svn_ctype_isxdigit(*(src+1)) ||
609                    !svn_ctype_isxdigit(*(src+2)))
610                  {
611                    *(dst++) = '%';
612                    *(dst++) = '2';
613                    *(dst++) = '5';
614                  }
615                else
616                  {
617                    char digitz[3];
618                    int val;
619
620                    digitz[0] = *(++src);
621                    digitz[1] = *(++src);
622                    digitz[2] = 0;
623
624                    val = (int)strtol(digitz, NULL, 16);
625
626                    if (svn_uri__char_validity[(unsigned char)val])
627                      *(dst++) = (char)val;
628                    else
629                      {
630                        *(dst++) = '%';
631                        *(dst++) = canonicalize_to_upper(digitz[0]);
632                        *(dst++) = canonicalize_to_upper(digitz[1]);
633                      }
634                  }
635                break;
636              default:
637                if (!svn_uri__char_validity[(unsigned char)*src])
638                  {
639                    apr_snprintf(dst, 4, "%%%02X", (unsigned char)*src);
640                    dst += 3;
641                  }
642                else
643                  *(dst++) = *src;
644                break;
645            }
646          src++;
647        }
648      *dst = '\0';
649    }
650
651  return canon;
652}
653
654/* Return the string length of the longest common ancestor of PATH1 and PATH2.
655 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
656 * PATH1 and PATH2 are regular paths.
657 *
658 * If the two paths do not share a common ancestor, return 0.
659 *
660 * New strings are allocated in POOL.
661 */
662static apr_size_t
663get_longest_ancestor_length(path_type_t types,
664                            const char *path1,
665                            const char *path2,
666                            apr_pool_t *pool)
667{
668  apr_size_t path1_len, path2_len;
669  apr_size_t i = 0;
670  apr_size_t last_dirsep = 0;
671#ifdef SVN_USE_DOS_PATHS
672  svn_boolean_t unc = FALSE;
673#endif
674
675  path1_len = strlen(path1);
676  path2_len = strlen(path2);
677
678  if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
679    return 0;
680
681  while (path1[i] == path2[i])
682    {
683      /* Keep track of the last directory separator we hit. */
684      if (path1[i] == '/')
685        last_dirsep = i;
686
687      i++;
688
689      /* If we get to the end of either path, break out. */
690      if ((i == path1_len) || (i == path2_len))
691        break;
692    }
693
694  /* two special cases:
695     1. '/' is the longest common ancestor of '/' and '/foo' */
696  if (i == 1 && path1[0] == '/' && path2[0] == '/')
697    return 1;
698  /* 2. '' is the longest common ancestor of any non-matching
699   * strings 'foo' and 'bar' */
700  if (types == type_dirent && i == 0)
701    return 0;
702
703  /* Handle some windows specific cases */
704#ifdef SVN_USE_DOS_PATHS
705  if (types == type_dirent)
706    {
707      /* don't count the '//' from UNC paths */
708      if (last_dirsep == 1 && path1[0] == '/' && path1[1] == '/')
709        {
710          last_dirsep = 0;
711          unc = TRUE;
712        }
713
714      /* X:/ and X:/foo */
715      if (i == 3 && path1[2] == '/' && path1[1] == ':')
716        return i;
717
718      /* Cannot use SVN_ERR_ASSERT here, so we'll have to crash, sorry.
719       * Note that this assertion triggers only if the code above has
720       * been broken. The code below relies on this assertion, because
721       * it uses [i - 1] as index. */
722      assert(i > 0);
723
724      /* X: and X:/ */
725      if ((path1[i - 1] == ':' && path2[i] == '/') ||
726          (path2[i - 1] == ':' && path1[i] == '/'))
727          return 0;
728      /* X: and X:foo */
729      if (path1[i - 1] == ':' || path2[i - 1] == ':')
730          return i;
731    }
732#endif /* SVN_USE_DOS_PATHS */
733
734  /* last_dirsep is now the offset of the last directory separator we
735     crossed before reaching a non-matching byte.  i is the offset of
736     that non-matching byte, and is guaranteed to be <= the length of
737     whichever path is shorter.
738     If one of the paths is the common part return that. */
739  if (((i == path1_len) && (path2[i] == '/'))
740           || ((i == path2_len) && (path1[i] == '/'))
741           || ((i == path1_len) && (i == path2_len)))
742    return i;
743  else
744    {
745      /* Nothing in common but the root folder '/' or 'X:/' for Windows
746         dirents. */
747#ifdef SVN_USE_DOS_PATHS
748      if (! unc)
749        {
750          /* X:/foo and X:/bar returns X:/ */
751          if ((types == type_dirent) &&
752              last_dirsep == 2 && path1[1] == ':' && path1[2] == '/'
753                               && path2[1] == ':' && path2[2] == '/')
754            return 3;
755#endif /* SVN_USE_DOS_PATHS */
756          if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
757            return 1;
758#ifdef SVN_USE_DOS_PATHS
759        }
760#endif
761    }
762
763  return last_dirsep;
764}
765
766/* Determine whether PATH2 is a child of PATH1.
767 *
768 * PATH2 is a child of PATH1 if
769 * 1) PATH1 is empty, and PATH2 is not empty and not an absolute path.
770 * or
771 * 2) PATH2 is has n components, PATH1 has x < n components,
772 *    and PATH1 matches PATH2 in all its x components.
773 *    Components are separated by a slash, '/'.
774 *
775 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
776 * PATH1 and PATH2 are regular paths.
777 *
778 * If PATH2 is not a child of PATH1, return NULL.
779 *
780 * If PATH2 is a child of PATH1, and POOL is not NULL, allocate a copy
781 * of the child part of PATH2 in POOL and return a pointer to the
782 * newly allocated child part.
783 *
784 * If PATH2 is a child of PATH1, and POOL is NULL, return a pointer
785 * pointing to the child part of PATH2.
786 * */
787static const char *
788is_child(path_type_t type, const char *path1, const char *path2,
789         apr_pool_t *pool)
790{
791  apr_size_t i;
792
793  /* Allow "" and "foo" or "H:foo" to be parent/child */
794  if (SVN_PATH_IS_EMPTY(path1))               /* "" is the parent  */
795    {
796      if (SVN_PATH_IS_EMPTY(path2))            /* "" not a child    */
797        return NULL;
798
799      /* check if this is an absolute path */
800      if ((type == type_uri) ||
801          (type == type_dirent && dirent_is_rooted(path2)))
802        return NULL;
803      else
804        /* everything else is child */
805        return pool ? apr_pstrdup(pool, path2) : path2;
806    }
807
808  /* Reach the end of at least one of the paths.  How should we handle
809     things like path1:"foo///bar" and path2:"foo/bar/baz"?  It doesn't
810     appear to arise in the current Subversion code, it's not clear to me
811     if they should be parent/child or not. */
812  /* Hmmm... aren't paths assumed to be canonical in this function?
813   * How can "foo///bar" even happen if the paths are canonical? */
814  for (i = 0; path1[i] && path2[i]; i++)
815    if (path1[i] != path2[i])
816      return NULL;
817
818  /* FIXME: This comment does not really match
819   * the checks made in the code it refers to: */
820  /* There are two cases that are parent/child
821          ...      path1[i] == '\0'
822          .../foo  path2[i] == '/'
823      or
824          /        path1[i] == '\0'
825          /foo     path2[i] != '/'
826
827     Other root paths (like X:/) fall under the former case:
828          X:/        path1[i] == '\0'
829          X:/foo     path2[i] != '/'
830
831     Check for '//' to avoid matching '/' and '//srv'.
832  */
833  if (path1[i] == '\0' && path2[i])
834    {
835      if (path1[i - 1] == '/'
836#ifdef SVN_USE_DOS_PATHS
837          || ((type == type_dirent) && path1[i - 1] == ':')
838#endif
839           )
840        {
841          if (path2[i] == '/')
842            /* .../
843             * ..../
844             *     i   */
845            return NULL;
846          else
847            /* .../
848             * .../foo
849             *     i    */
850            return pool ? apr_pstrdup(pool, path2 + i) : path2 + i;
851        }
852      else if (path2[i] == '/')
853        {
854          if (path2[i + 1])
855            /* ...
856             * .../foo
857             *    i    */
858            return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
859          else
860            /* ...
861             * .../
862             *    i    */
863            return NULL;
864        }
865    }
866
867  /* Otherwise, path2 isn't a child. */
868  return NULL;
869}
870
871
872/**** Public API functions ****/
873
874const char *
875svn_dirent_internal_style(const char *dirent, apr_pool_t *pool)
876{
877  return svn_dirent_canonicalize(internal_style(dirent, pool), pool);
878}
879
880const char *
881svn_dirent_local_style(const char *dirent, apr_pool_t *pool)
882{
883  /* Internally, Subversion represents the current directory with the
884     empty string.  But users like to see "." . */
885  if (SVN_PATH_IS_EMPTY(dirent))
886    return ".";
887
888#if '/' != SVN_PATH_LOCAL_SEPARATOR
889    {
890      char *p = apr_pstrdup(pool, dirent);
891      dirent = p;
892
893      /* Convert all canonical separators to the local-style ones. */
894      for (; *p != '\0'; ++p)
895        if (*p == '/')
896          *p = SVN_PATH_LOCAL_SEPARATOR;
897    }
898#endif
899
900  return dirent;
901}
902
903const char *
904svn_relpath__internal_style(const char *relpath,
905                            apr_pool_t *pool)
906{
907  return svn_relpath_canonicalize(internal_style(relpath, pool), pool);
908}
909
910
911/* We decided against using apr_filepath_root here because of the negative
912   performance impact (creating a pool and converting strings ). */
913svn_boolean_t
914svn_dirent_is_root(const char *dirent, apr_size_t len)
915{
916#ifdef SVN_USE_DOS_PATHS
917  /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter)
918     are also root directories */
919  if ((len == 2 || ((len == 3) && (dirent[2] == '/'))) &&
920      (dirent[1] == ':') &&
921      ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
922       (dirent[0] >= 'a' && dirent[0] <= 'z')))
923    return TRUE;
924
925  /* On Windows and Cygwin //server/share is a root directory,
926     and on Cygwin //drive is a drive alias */
927  if (len >= 2 && dirent[0] == '/' && dirent[1] == '/'
928      && dirent[len - 1] != '/')
929    {
930      int segments = 0;
931      apr_size_t i;
932      for (i = len; i >= 2; i--)
933        {
934          if (dirent[i] == '/')
935            {
936              segments ++;
937              if (segments > 1)
938                return FALSE;
939            }
940        }
941#ifdef __CYGWIN__
942      return (segments <= 1);
943#else
944      return (segments == 1); /* //drive is invalid on plain Windows */
945#endif
946    }
947#endif
948
949  /* directory is root if it's equal to '/' */
950  if (len == 1 && dirent[0] == '/')
951    return TRUE;
952
953  return FALSE;
954}
955
956svn_boolean_t
957svn_uri_is_root(const char *uri, apr_size_t len)
958{
959  assert(svn_uri_is_canonical(uri, NULL));
960  return (len == uri_schema_root_length(uri, len));
961}
962
963char *svn_dirent_join(const char *base,
964                      const char *component,
965                      apr_pool_t *pool)
966{
967  apr_size_t blen = strlen(base);
968  apr_size_t clen = strlen(component);
969  char *dirent;
970  int add_separator;
971
972  assert(svn_dirent_is_canonical(base, pool));
973  assert(svn_dirent_is_canonical(component, pool));
974
975  /* If the component is absolute, then return it.  */
976  if (svn_dirent_is_absolute(component))
977    return apr_pmemdup(pool, component, clen + 1);
978
979  /* If either is empty return the other */
980  if (SVN_PATH_IS_EMPTY(base))
981    return apr_pmemdup(pool, component, clen + 1);
982  if (SVN_PATH_IS_EMPTY(component))
983    return apr_pmemdup(pool, base, blen + 1);
984
985#ifdef SVN_USE_DOS_PATHS
986  if (component[0] == '/')
987    {
988      /* '/' is drive relative on Windows, not absolute like on Posix */
989      if (dirent_is_rooted(base))
990        {
991          /* Join component without '/' to root-of(base) */
992          blen = dirent_root_length(base, blen);
993          component++;
994          clen--;
995
996          if (blen == 2 && base[1] == ':') /* "C:" case */
997            {
998              char *root = apr_pmemdup(pool, base, 3);
999              root[2] = '/'; /* We don't need the final '\0' */
1000
1001              base = root;
1002              blen = 3;
1003            }
1004
1005          if (clen == 0)
1006            return apr_pstrndup(pool, base, blen);
1007        }
1008      else
1009        return apr_pmemdup(pool, component, clen + 1);
1010    }
1011  else if (dirent_is_rooted(component))
1012    return apr_pmemdup(pool, component, clen + 1);
1013#endif /* SVN_USE_DOS_PATHS */
1014
1015  /* if last character of base is already a separator, don't add a '/' */
1016  add_separator = 1;
1017  if (base[blen - 1] == '/'
1018#ifdef SVN_USE_DOS_PATHS
1019       || base[blen - 1] == ':'
1020#endif
1021        )
1022          add_separator = 0;
1023
1024  /* Construct the new, combined dirent. */
1025  dirent = apr_palloc(pool, blen + add_separator + clen + 1);
1026  memcpy(dirent, base, blen);
1027  if (add_separator)
1028    dirent[blen] = '/';
1029  memcpy(dirent + blen + add_separator, component, clen + 1);
1030
1031  return dirent;
1032}
1033
1034char *svn_dirent_join_many(apr_pool_t *pool, const char *base, ...)
1035{
1036#define MAX_SAVED_LENGTHS 10
1037  apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
1038  apr_size_t total_len;
1039  int nargs;
1040  va_list va;
1041  const char *s;
1042  apr_size_t len;
1043  char *dirent;
1044  char *p;
1045  int add_separator;
1046  int base_arg = 0;
1047
1048  total_len = strlen(base);
1049
1050  assert(svn_dirent_is_canonical(base, pool));
1051
1052  /* if last character of base is already a separator, don't add a '/' */
1053  add_separator = 1;
1054  if (total_len == 0
1055       || base[total_len - 1] == '/'
1056#ifdef SVN_USE_DOS_PATHS
1057       || base[total_len - 1] == ':'
1058#endif
1059        )
1060          add_separator = 0;
1061
1062  saved_lengths[0] = total_len;
1063
1064  /* Compute the length of the resulting string. */
1065
1066  nargs = 0;
1067  va_start(va, base);
1068  while ((s = va_arg(va, const char *)) != NULL)
1069    {
1070      len = strlen(s);
1071
1072      assert(svn_dirent_is_canonical(s, pool));
1073
1074      if (SVN_PATH_IS_EMPTY(s))
1075        continue;
1076
1077      if (nargs++ < MAX_SAVED_LENGTHS)
1078        saved_lengths[nargs] = len;
1079
1080      if (dirent_is_rooted(s))
1081        {
1082          total_len = len;
1083          base_arg = nargs;
1084
1085#ifdef SVN_USE_DOS_PATHS
1086          if (!svn_dirent_is_absolute(s)) /* Handle non absolute roots */
1087            {
1088              /* Set new base and skip the current argument */
1089              base = s = svn_dirent_join(base, s, pool);
1090              base_arg++;
1091              saved_lengths[0] = total_len = len = strlen(s);
1092            }
1093          else
1094#endif /* SVN_USE_DOS_PATHS */
1095            {
1096              base = ""; /* Don't add base */
1097              saved_lengths[0] = 0;
1098            }
1099
1100          add_separator = 1;
1101          if (s[len - 1] == '/'
1102#ifdef SVN_USE_DOS_PATHS
1103             || s[len - 1] == ':'
1104#endif
1105              )
1106             add_separator = 0;
1107        }
1108      else if (nargs <= base_arg + 1)
1109        {
1110          total_len += add_separator + len;
1111        }
1112      else
1113        {
1114          total_len += 1 + len;
1115        }
1116    }
1117  va_end(va);
1118
1119  /* base == "/" and no further components. just return that. */
1120  if (add_separator == 0 && total_len == 1)
1121    return apr_pmemdup(pool, "/", 2);
1122
1123  /* we got the total size. allocate it, with room for a NULL character. */
1124  dirent = p = apr_palloc(pool, total_len + 1);
1125
1126  /* if we aren't supposed to skip forward to an absolute component, and if
1127     this is not an empty base that we are skipping, then copy the base
1128     into the output. */
1129  if (! SVN_PATH_IS_EMPTY(base))
1130    {
1131      memcpy(p, base, len = saved_lengths[0]);
1132      p += len;
1133    }
1134
1135  nargs = 0;
1136  va_start(va, base);
1137  while ((s = va_arg(va, const char *)) != NULL)
1138    {
1139      if (SVN_PATH_IS_EMPTY(s))
1140        continue;
1141
1142      if (++nargs < base_arg)
1143        continue;
1144
1145      if (nargs < MAX_SAVED_LENGTHS)
1146        len = saved_lengths[nargs];
1147      else
1148        len = strlen(s);
1149
1150      /* insert a separator if we aren't copying in the first component
1151         (which can happen when base_arg is set). also, don't put in a slash
1152         if the prior character is a slash (occurs when prior component
1153         is "/"). */
1154      if (p != dirent &&
1155          ( ! (nargs - 1 <= base_arg) || add_separator))
1156        *p++ = '/';
1157
1158      /* copy the new component and advance the pointer */
1159      memcpy(p, s, len);
1160      p += len;
1161    }
1162  va_end(va);
1163
1164  *p = '\0';
1165  assert((apr_size_t)(p - dirent) == total_len);
1166
1167  return dirent;
1168}
1169
1170char *
1171svn_relpath_join(const char *base,
1172                 const char *component,
1173                 apr_pool_t *pool)
1174{
1175  apr_size_t blen = strlen(base);
1176  apr_size_t clen = strlen(component);
1177  char *path;
1178
1179  assert(relpath_is_canonical(base));
1180  assert(relpath_is_canonical(component));
1181
1182  /* If either is empty return the other */
1183  if (blen == 0)
1184    return apr_pmemdup(pool, component, clen + 1);
1185  if (clen == 0)
1186    return apr_pmemdup(pool, base, blen + 1);
1187
1188  path = apr_palloc(pool, blen + 1 + clen + 1);
1189  memcpy(path, base, blen);
1190  path[blen] = '/';
1191  memcpy(path + blen + 1, component, clen + 1);
1192
1193  return path;
1194}
1195
1196char *
1197svn_dirent_dirname(const char *dirent, apr_pool_t *pool)
1198{
1199  apr_size_t len = strlen(dirent);
1200
1201  assert(svn_dirent_is_canonical(dirent, pool));
1202
1203  if (len == dirent_root_length(dirent, len))
1204    return apr_pstrmemdup(pool, dirent, len);
1205  else
1206    return apr_pstrmemdup(pool, dirent, dirent_previous_segment(dirent, len));
1207}
1208
1209const char *
1210svn_dirent_basename(const char *dirent, apr_pool_t *pool)
1211{
1212  apr_size_t len = strlen(dirent);
1213  apr_size_t start;
1214
1215  assert(!pool || svn_dirent_is_canonical(dirent, pool));
1216
1217  if (svn_dirent_is_root(dirent, len))
1218    return "";
1219  else
1220    {
1221      start = len;
1222      while (start > 0 && dirent[start - 1] != '/'
1223#ifdef SVN_USE_DOS_PATHS
1224             && dirent[start - 1] != ':'
1225#endif
1226            )
1227        --start;
1228    }
1229
1230  if (pool)
1231    return apr_pstrmemdup(pool, dirent + start, len - start);
1232  else
1233    return dirent + start;
1234}
1235
1236void
1237svn_dirent_split(const char **dirpath,
1238                 const char **base_name,
1239                 const char *dirent,
1240                 apr_pool_t *pool)
1241{
1242  assert(dirpath != base_name);
1243
1244  if (dirpath)
1245    *dirpath = svn_dirent_dirname(dirent, pool);
1246
1247  if (base_name)
1248    *base_name = svn_dirent_basename(dirent, pool);
1249}
1250
1251char *
1252svn_relpath_dirname(const char *relpath,
1253                    apr_pool_t *pool)
1254{
1255  apr_size_t len = strlen(relpath);
1256
1257  assert(relpath_is_canonical(relpath));
1258
1259  return apr_pstrmemdup(pool, relpath,
1260                        relpath_previous_segment(relpath, len));
1261}
1262
1263const char *
1264svn_relpath_basename(const char *relpath,
1265                     apr_pool_t *pool)
1266{
1267  apr_size_t len = strlen(relpath);
1268  apr_size_t start;
1269
1270  assert(relpath_is_canonical(relpath));
1271
1272  start = len;
1273  while (start > 0 && relpath[start - 1] != '/')
1274    --start;
1275
1276  if (pool)
1277    return apr_pstrmemdup(pool, relpath + start, len - start);
1278  else
1279    return relpath + start;
1280}
1281
1282void
1283svn_relpath_split(const char **dirpath,
1284                  const char **base_name,
1285                  const char *relpath,
1286                  apr_pool_t *pool)
1287{
1288  assert(dirpath != base_name);
1289
1290  if (dirpath)
1291    *dirpath = svn_relpath_dirname(relpath, pool);
1292
1293  if (base_name)
1294    *base_name = svn_relpath_basename(relpath, pool);
1295}
1296
1297const char *
1298svn_relpath_prefix(const char *relpath,
1299                   int max_components,
1300                   apr_pool_t *result_pool)
1301{
1302  const char *end;
1303  assert(relpath_is_canonical(relpath));
1304
1305  if (max_components <= 0)
1306    return "";
1307
1308  for (end = relpath; *end; end++)
1309    {
1310      if (*end == '/')
1311        {
1312          if (!--max_components)
1313            break;
1314        }
1315    }
1316
1317  return apr_pstrmemdup(result_pool, relpath, end-relpath);
1318}
1319
1320char *
1321svn_uri_dirname(const char *uri, apr_pool_t *pool)
1322{
1323  apr_size_t len = strlen(uri);
1324
1325  assert(svn_uri_is_canonical(uri, pool));
1326
1327  if (svn_uri_is_root(uri, len))
1328    return apr_pstrmemdup(pool, uri, len);
1329  else
1330    return apr_pstrmemdup(pool, uri, uri_previous_segment(uri, len));
1331}
1332
1333const char *
1334svn_uri_basename(const char *uri, apr_pool_t *pool)
1335{
1336  apr_size_t len = strlen(uri);
1337  apr_size_t start;
1338
1339  assert(svn_uri_is_canonical(uri, NULL));
1340
1341  if (svn_uri_is_root(uri, len))
1342    return "";
1343
1344  start = len;
1345  while (start > 0 && uri[start - 1] != '/')
1346    --start;
1347
1348  return svn_path_uri_decode(uri + start, pool);
1349}
1350
1351void
1352svn_uri_split(const char **dirpath,
1353              const char **base_name,
1354              const char *uri,
1355              apr_pool_t *pool)
1356{
1357  assert(dirpath != base_name);
1358
1359  if (dirpath)
1360    *dirpath = svn_uri_dirname(uri, pool);
1361
1362  if (base_name)
1363    *base_name = svn_uri_basename(uri, pool);
1364}
1365
1366char *
1367svn_dirent_get_longest_ancestor(const char *dirent1,
1368                                const char *dirent2,
1369                                apr_pool_t *pool)
1370{
1371  return apr_pstrndup(pool, dirent1,
1372                      get_longest_ancestor_length(type_dirent, dirent1,
1373                                                  dirent2, pool));
1374}
1375
1376char *
1377svn_relpath_get_longest_ancestor(const char *relpath1,
1378                                 const char *relpath2,
1379                                 apr_pool_t *pool)
1380{
1381  assert(relpath_is_canonical(relpath1));
1382  assert(relpath_is_canonical(relpath2));
1383
1384  return apr_pstrndup(pool, relpath1,
1385                      get_longest_ancestor_length(type_relpath, relpath1,
1386                                                  relpath2, pool));
1387}
1388
1389char *
1390svn_uri_get_longest_ancestor(const char *uri1,
1391                             const char *uri2,
1392                             apr_pool_t *pool)
1393{
1394  apr_size_t uri_ancestor_len;
1395  apr_size_t i = 0;
1396
1397  assert(svn_uri_is_canonical(uri1, NULL));
1398  assert(svn_uri_is_canonical(uri2, NULL));
1399
1400  /* Find ':' */
1401  while (1)
1402    {
1403      /* No shared protocol => no common prefix */
1404      if (uri1[i] != uri2[i])
1405        return apr_pmemdup(pool, SVN_EMPTY_PATH,
1406                           sizeof(SVN_EMPTY_PATH));
1407
1408      if (uri1[i] == ':')
1409        break;
1410
1411      /* They're both URLs, so EOS can't come before ':' */
1412      assert((uri1[i] != '\0') && (uri2[i] != '\0'));
1413
1414      i++;
1415    }
1416
1417  i += 3;  /* Advance past '://' */
1418
1419  uri_ancestor_len = get_longest_ancestor_length(type_uri, uri1 + i,
1420                                                 uri2 + i, pool);
1421
1422  if (uri_ancestor_len == 0 ||
1423      (uri_ancestor_len == 1 && (uri1 + i)[0] == '/'))
1424    return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
1425  else
1426    return apr_pstrndup(pool, uri1, uri_ancestor_len + i);
1427}
1428
1429const char *
1430svn_dirent_is_child(const char *parent_dirent,
1431                    const char *child_dirent,
1432                    apr_pool_t *pool)
1433{
1434  return is_child(type_dirent, parent_dirent, child_dirent, pool);
1435}
1436
1437const char *
1438svn_dirent_skip_ancestor(const char *parent_dirent,
1439                         const char *child_dirent)
1440{
1441  apr_size_t len = strlen(parent_dirent);
1442  apr_size_t root_len;
1443
1444  if (0 != strncmp(parent_dirent, child_dirent, len))
1445    return NULL; /* parent_dirent is no ancestor of child_dirent */
1446
1447  if (child_dirent[len] == 0)
1448    return ""; /* parent_dirent == child_dirent */
1449
1450  /* Child == parent + more-characters */
1451
1452  root_len = dirent_root_length(child_dirent, strlen(child_dirent));
1453  if (root_len > len)
1454    /* Different root, e.g. ("" "/...") or ("//z" "//z/share") */
1455    return NULL;
1456
1457  /* Now, child == [root-of-parent] + [rest-of-parent] + more-characters.
1458   * It must be one of the following forms.
1459   *
1460   * rlen parent    child       bad?  rlen=len? c[len]=/?
1461   *  0   ""        "foo"               *
1462   *  0   "b"       "bad"         !
1463   *  0   "b"       "b/foo"                       *
1464   *  1   "/"       "/foo"              *
1465   *  1   "/b"      "/bad"        !
1466   *  1   "/b"      "/b/foo"                      *
1467   *  2   "a:"      "a:foo"             *
1468   *  2   "a:b"     "a:bad"       !
1469   *  2   "a:b"     "a:b/foo"                     *
1470   *  3   "a:/"     "a:/foo"            *
1471   *  3   "a:/b"    "a:/bad"      !
1472   *  3   "a:/b"    "a:/b/foo"                    *
1473   *  5   "//s/s"   "//s/s/foo"         *         *
1474   *  5   "//s/s/b" "//s/s/bad"   !
1475   *  5   "//s/s/b" "//s/s/b/foo"                 *
1476   */
1477
1478  if (child_dirent[len] == '/')
1479    /* "parent|child" is one of:
1480     * "[a:]b|/foo" "[a:]/b|/foo" "//s/s|/foo" "//s/s/b|/foo" */
1481    return child_dirent + len + 1;
1482
1483  if (root_len == len)
1484    /* "parent|child" is "|foo" "/|foo" "a:|foo" "a:/|foo" "//s/s|/foo" */
1485    return child_dirent + len;
1486
1487  return NULL;
1488}
1489
1490const char *
1491svn_relpath_skip_ancestor(const char *parent_relpath,
1492                          const char *child_relpath)
1493{
1494  apr_size_t len = strlen(parent_relpath);
1495
1496  assert(relpath_is_canonical(parent_relpath));
1497  assert(relpath_is_canonical(child_relpath));
1498
1499  if (len == 0)
1500    return child_relpath;
1501
1502  if (0 != strncmp(parent_relpath, child_relpath, len))
1503    return NULL; /* parent_relpath is no ancestor of child_relpath */
1504
1505  if (child_relpath[len] == 0)
1506    return ""; /* parent_relpath == child_relpath */
1507
1508  if (child_relpath[len] == '/')
1509    return child_relpath + len + 1;
1510
1511  return NULL;
1512}
1513
1514
1515/* */
1516static const char *
1517uri_skip_ancestor(const char *parent_uri,
1518                  const char *child_uri)
1519{
1520  apr_size_t len = strlen(parent_uri);
1521
1522  assert(svn_uri_is_canonical(parent_uri, NULL));
1523  assert(svn_uri_is_canonical(child_uri, NULL));
1524
1525  if (0 != strncmp(parent_uri, child_uri, len))
1526    return NULL; /* parent_uri is no ancestor of child_uri */
1527
1528  if (child_uri[len] == 0)
1529    return ""; /* parent_uri == child_uri */
1530
1531  if (child_uri[len] == '/')
1532    return child_uri + len + 1;
1533
1534  return NULL;
1535}
1536
1537const char *
1538svn_uri_skip_ancestor(const char *parent_uri,
1539                      const char *child_uri,
1540                      apr_pool_t *result_pool)
1541{
1542  const char *result = uri_skip_ancestor(parent_uri, child_uri);
1543
1544  return result ? svn_path_uri_decode(result, result_pool) : NULL;
1545}
1546
1547svn_boolean_t
1548svn_dirent_is_ancestor(const char *parent_dirent, const char *child_dirent)
1549{
1550  return svn_dirent_skip_ancestor(parent_dirent, child_dirent) != NULL;
1551}
1552
1553svn_boolean_t
1554svn_uri__is_ancestor(const char *parent_uri, const char *child_uri)
1555{
1556  return uri_skip_ancestor(parent_uri, child_uri) != NULL;
1557}
1558
1559
1560svn_boolean_t
1561svn_dirent_is_absolute(const char *dirent)
1562{
1563  if (! dirent)
1564    return FALSE;
1565
1566  /* dirent is absolute if it starts with '/' on non-Windows platforms
1567     or with '//' on Windows platforms */
1568  if (dirent[0] == '/'
1569#ifdef SVN_USE_DOS_PATHS
1570      && dirent[1] == '/' /* Single '/' depends on current drive */
1571#endif
1572      )
1573    return TRUE;
1574
1575  /* On Windows, dirent is also absolute when it starts with 'H:/'
1576     where 'H' is any letter. */
1577#ifdef SVN_USE_DOS_PATHS
1578  if (((dirent[0] >= 'A' && dirent[0] <= 'Z')) &&
1579      (dirent[1] == ':') && (dirent[2] == '/'))
1580     return TRUE;
1581#endif /* SVN_USE_DOS_PATHS */
1582
1583  return FALSE;
1584}
1585
1586svn_error_t *
1587svn_dirent_get_absolute(const char **pabsolute,
1588                        const char *relative,
1589                        apr_pool_t *pool)
1590{
1591  char *buffer;
1592  apr_status_t apr_err;
1593  const char *path_apr;
1594
1595  SVN_ERR_ASSERT(! svn_path_is_url(relative));
1596
1597  /* Merge the current working directory with the relative dirent. */
1598  SVN_ERR(svn_path_cstring_from_utf8(&path_apr, relative, pool));
1599
1600  apr_err = apr_filepath_merge(&buffer, NULL,
1601                               path_apr,
1602                               APR_FILEPATH_NOTRELATIVE,
1603                               pool);
1604  if (apr_err)
1605    {
1606      /* In some cases when the passed path or its ancestor(s) do not exist
1607         or no longer exist apr returns an error.
1608
1609         In many of these cases we would like to return a path anyway, when the
1610         passed path was already a safe absolute path. So check for that now to
1611         avoid an error.
1612
1613         svn_dirent_is_absolute() doesn't perform the necessary checks to see
1614         if the path doesn't need post processing to be in the canonical absolute
1615         format.
1616         */
1617
1618      if (svn_dirent_is_absolute(relative)
1619          && svn_dirent_is_canonical(relative, pool)
1620          && !svn_path_is_backpath_present(relative))
1621        {
1622          *pabsolute = apr_pstrdup(pool, relative);
1623          return SVN_NO_ERROR;
1624        }
1625
1626      return svn_error_createf(SVN_ERR_BAD_FILENAME,
1627                               svn_error_create(apr_err, NULL, NULL),
1628                               _("Couldn't determine absolute path of '%s'"),
1629                               svn_dirent_local_style(relative, pool));
1630    }
1631
1632  SVN_ERR(svn_path_cstring_to_utf8(pabsolute, buffer, pool));
1633  *pabsolute = svn_dirent_canonicalize(*pabsolute, pool);
1634  return SVN_NO_ERROR;
1635}
1636
1637const char *
1638svn_uri_canonicalize(const char *uri, apr_pool_t *pool)
1639{
1640  return canonicalize(type_uri, uri, pool);
1641}
1642
1643const char *
1644svn_relpath_canonicalize(const char *relpath, apr_pool_t *pool)
1645{
1646  return canonicalize(type_relpath, relpath, pool);
1647}
1648
1649const char *
1650svn_dirent_canonicalize(const char *dirent, apr_pool_t *pool)
1651{
1652  const char *dst = canonicalize(type_dirent, dirent, pool);
1653
1654#ifdef SVN_USE_DOS_PATHS
1655  /* Handle a specific case on Windows where path == "X:/". Here we have to
1656     append the final '/', as svn_path_canonicalize will chop this of. */
1657  if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
1658        (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
1659        dirent[1] == ':' && dirent[2] == '/' &&
1660        dst[3] == '\0')
1661    {
1662      char *dst_slash = apr_pcalloc(pool, 4);
1663      dst_slash[0] = canonicalize_to_upper(dirent[0]);
1664      dst_slash[1] = ':';
1665      dst_slash[2] = '/';
1666      dst_slash[3] = '\0';
1667
1668      return dst_slash;
1669    }
1670#endif /* SVN_USE_DOS_PATHS */
1671
1672  return dst;
1673}
1674
1675svn_boolean_t
1676svn_dirent_is_canonical(const char *dirent, apr_pool_t *scratch_pool)
1677{
1678  const char *ptr = dirent;
1679  if (*ptr == '/')
1680    {
1681      ptr++;
1682#ifdef SVN_USE_DOS_PATHS
1683      /* Check for UNC paths */
1684      if (*ptr == '/')
1685        {
1686          /* TODO: Scan hostname and sharename and fall back to part code */
1687
1688          /* ### Fall back to old implementation */
1689          return (strcmp(dirent, svn_dirent_canonicalize(dirent, scratch_pool))
1690                  == 0);
1691        }
1692#endif /* SVN_USE_DOS_PATHS */
1693    }
1694#ifdef SVN_USE_DOS_PATHS
1695  else if (((*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z')) &&
1696           (ptr[1] == ':'))
1697    {
1698      /* The only canonical drive names are "A:"..."Z:", no lower case */
1699      if (*ptr < 'A' || *ptr > 'Z')
1700        return FALSE;
1701
1702      ptr += 2;
1703
1704      if (*ptr == '/')
1705        ptr++;
1706    }
1707#endif /* SVN_USE_DOS_PATHS */
1708
1709  return relpath_is_canonical(ptr);
1710}
1711
1712static svn_boolean_t
1713relpath_is_canonical(const char *relpath)
1714{
1715  const char *dot_pos, *ptr = relpath;
1716  apr_size_t i, len;
1717  unsigned pattern = 0;
1718
1719  /* RELPATH is canonical if it has:
1720   *  - no '.' segments
1721   *  - no start and closing '/'
1722   *  - no '//'
1723   */
1724
1725  /* invalid beginnings */
1726  if (*ptr == '/')
1727    return FALSE;
1728
1729  if (ptr[0] == '.' && (ptr[1] == '/' || ptr[1] == '\0'))
1730    return FALSE;
1731
1732  /* valid special cases */
1733  len = strlen(ptr);
1734  if (len < 2)
1735    return TRUE;
1736
1737  /* invalid endings */
1738  if (ptr[len-1] == '/' || (ptr[len-1] == '.' && ptr[len-2] == '/'))
1739    return FALSE;
1740
1741  /* '.' are rare. So, search for them globally. There will often be no
1742   * more than one hit.  Also note that we already checked for invalid
1743   * starts and endings, i.e. we only need to check for "/./"
1744   */
1745  for (dot_pos = memchr(ptr, '.', len);
1746       dot_pos;
1747       dot_pos = strchr(dot_pos+1, '.'))
1748    if (dot_pos > ptr && dot_pos[-1] == '/' && dot_pos[1] == '/')
1749      return FALSE;
1750
1751  /* Now validate the rest of the path. */
1752  for (i = 0; i < len - 1; ++i)
1753    {
1754      pattern = ((pattern & 0xff) << 8) + (unsigned char)ptr[i];
1755      if (pattern == 0x101 * (unsigned char)('/'))
1756        return FALSE;
1757    }
1758
1759  return TRUE;
1760}
1761
1762svn_boolean_t
1763svn_relpath_is_canonical(const char *relpath)
1764{
1765  return relpath_is_canonical(relpath);
1766}
1767
1768svn_boolean_t
1769svn_uri_is_canonical(const char *uri, apr_pool_t *scratch_pool)
1770{
1771  const char *ptr = uri, *seg = uri;
1772  const char *schema_data = NULL;
1773
1774  /* URI is canonical if it has:
1775   *  - lowercase URL scheme
1776   *  - lowercase URL hostname
1777   *  - no '.' segments
1778   *  - no closing '/'
1779   *  - no '//'
1780   *  - uppercase hex-encoded pair digits ("%AB", not "%ab")
1781   */
1782
1783  if (*uri == '\0')
1784    return FALSE;
1785
1786  if (! svn_path_is_url(uri))
1787    return FALSE;
1788
1789  /* Skip the scheme. */
1790  while (*ptr && (*ptr != '/') && (*ptr != ':'))
1791    ptr++;
1792
1793  /* No scheme?  No good. */
1794  if (! (*ptr == ':' && *(ptr+1) == '/' && *(ptr+2) == '/'))
1795    return FALSE;
1796
1797  /* Found a scheme, check that it's all lowercase. */
1798  ptr = uri;
1799  while (*ptr != ':')
1800    {
1801      if (*ptr >= 'A' && *ptr <= 'Z')
1802        return FALSE;
1803      ptr++;
1804    }
1805  /* Skip :// */
1806  ptr += 3;
1807
1808  /* Scheme only?  That works. */
1809  if (! *ptr)
1810    return TRUE;
1811
1812  /* This might be the hostname */
1813  seg = ptr;
1814  while (*ptr && (*ptr != '/') && (*ptr != '@'))
1815    ptr++;
1816
1817  if (*ptr == '@')
1818    seg = ptr + 1;
1819
1820  /* Found a hostname, check that it's all lowercase. */
1821  ptr = seg;
1822
1823  if (*ptr == '[')
1824    {
1825      ptr++;
1826      while (*ptr == ':'
1827             || (*ptr >= '0' && *ptr <= '9')
1828             || (*ptr >= 'a' && *ptr <= 'f'))
1829        {
1830          ptr++;
1831        }
1832
1833      if (*ptr != ']')
1834        return FALSE;
1835      ptr++;
1836    }
1837  else
1838    while (*ptr && *ptr != '/' && *ptr != ':')
1839      {
1840        if (*ptr >= 'A' && *ptr <= 'Z')
1841          return FALSE;
1842        ptr++;
1843      }
1844
1845  /* Found a portnumber */
1846  if (*ptr == ':')
1847    {
1848      apr_int64_t port = 0;
1849
1850      ptr++;
1851      schema_data = ptr;
1852
1853      while (*ptr >= '0' && *ptr <= '9')
1854        {
1855          port = 10 * port + (*ptr - '0');
1856          ptr++;
1857        }
1858
1859      if (ptr == schema_data)
1860        return FALSE; /* Fail on "http://host:" */
1861
1862      if (*ptr && *ptr != '/')
1863        return FALSE; /* Not a port number */
1864
1865      if (port == 80 && strncmp(uri, "http:", 5) == 0)
1866        return FALSE;
1867      else if (port == 443 && strncmp(uri, "https:", 6) == 0)
1868        return FALSE;
1869      else if (port == 3690 && strncmp(uri, "svn:", 4) == 0)
1870        return FALSE;
1871    }
1872
1873  schema_data = ptr;
1874
1875#ifdef SVN_USE_DOS_PATHS
1876  if (schema_data && *ptr == '/')
1877    {
1878      /* If this is a file url, ptr now points to the third '/' in
1879         file:///C:/path. Check that if we have such a URL the drive
1880         letter is in uppercase. */
1881      if (strncmp(uri, "file:", 5) == 0 &&
1882          ! (*(ptr+1) >= 'A' && *(ptr+1) <= 'Z') &&
1883          *(ptr+2) == ':')
1884        return FALSE;
1885    }
1886#endif /* SVN_USE_DOS_PATHS */
1887
1888  /* Now validate the rest of the URI. */
1889  seg = ptr;
1890  while (*ptr && (*ptr != '/'))
1891    ptr++;
1892  while(1)
1893    {
1894      apr_size_t seglen = ptr - seg;
1895
1896      if (seglen == 1 && *seg == '.')
1897        return FALSE;  /*  /./   */
1898
1899      if (*ptr == '/' && *(ptr+1) == '/')
1900        return FALSE;  /*  //    */
1901
1902      if (! *ptr && *(ptr - 1) == '/' && ptr - 1 != uri)
1903        return FALSE;  /* foo/  */
1904
1905      if (! *ptr)
1906        break;
1907
1908      if (*ptr == '/')
1909        ptr++;
1910
1911      seg = ptr;
1912      while (*ptr && (*ptr != '/'))
1913        ptr++;
1914    }
1915
1916  ptr = schema_data;
1917
1918  while (*ptr)
1919    {
1920      if (*ptr == '%')
1921        {
1922          char digitz[3];
1923          int val;
1924
1925          /* Can't usesvn_ctype_isxdigit() because lower case letters are
1926             not in our canonical format */
1927          if (((*(ptr+1) < '0' || *(ptr+1) > '9'))
1928              && (*(ptr+1) < 'A' || *(ptr+1) > 'F'))
1929            return FALSE;
1930          else if (((*(ptr+2) < '0' || *(ptr+2) > '9'))
1931                   && (*(ptr+2) < 'A' || *(ptr+2) > 'F'))
1932            return FALSE;
1933
1934          digitz[0] = *(++ptr);
1935          digitz[1] = *(++ptr);
1936          digitz[2] = '\0';
1937          val = (int)strtol(digitz, NULL, 16);
1938
1939          if (svn_uri__char_validity[val])
1940            return FALSE; /* Should not have been escaped */
1941        }
1942      else if (*ptr != '/' && !svn_uri__char_validity[(unsigned char)*ptr])
1943        return FALSE; /* Character should have been escaped */
1944      ptr++;
1945    }
1946
1947  return TRUE;
1948}
1949
1950svn_error_t *
1951svn_dirent_condense_targets(const char **pcommon,
1952                            apr_array_header_t **pcondensed_targets,
1953                            const apr_array_header_t *targets,
1954                            svn_boolean_t remove_redundancies,
1955                            apr_pool_t *result_pool,
1956                            apr_pool_t *scratch_pool)
1957{
1958  int i, num_condensed = targets->nelts;
1959  svn_boolean_t *removed;
1960  apr_array_header_t *abs_targets;
1961
1962  /* Early exit when there's no data to work on. */
1963  if (targets->nelts <= 0)
1964    {
1965      *pcommon = NULL;
1966      if (pcondensed_targets)
1967        *pcondensed_targets = NULL;
1968      return SVN_NO_ERROR;
1969    }
1970
1971  /* Get the absolute path of the first target. */
1972  SVN_ERR(svn_dirent_get_absolute(pcommon,
1973                                  APR_ARRAY_IDX(targets, 0, const char *),
1974                                  scratch_pool));
1975
1976  /* Early exit when there's only one dirent to work on. */
1977  if (targets->nelts == 1)
1978    {
1979      *pcommon = apr_pstrdup(result_pool, *pcommon);
1980      if (pcondensed_targets)
1981        *pcondensed_targets = apr_array_make(result_pool, 0,
1982                                             sizeof(const char *));
1983      return SVN_NO_ERROR;
1984    }
1985
1986  /* Copy the targets array, but with absolute dirents instead of
1987     relative.  Also, find the pcommon argument by finding what is
1988     common in all of the absolute dirents. NOTE: This is not as
1989     efficient as it could be.  The calculation of the basedir could
1990     be done in the loop below, which would save some calls to
1991     svn_dirent_get_longest_ancestor.  I decided to do it this way
1992     because I thought it would be simpler, since this way, we don't
1993     even do the loop if we don't need to condense the targets. */
1994
1995  removed = apr_pcalloc(scratch_pool, (targets->nelts *
1996                                          sizeof(svn_boolean_t)));
1997  abs_targets = apr_array_make(scratch_pool, targets->nelts,
1998                               sizeof(const char *));
1999
2000  APR_ARRAY_PUSH(abs_targets, const char *) = *pcommon;
2001
2002  for (i = 1; i < targets->nelts; ++i)
2003    {
2004      const char *rel = APR_ARRAY_IDX(targets, i, const char *);
2005      const char *absolute;
2006      SVN_ERR(svn_dirent_get_absolute(&absolute, rel, scratch_pool));
2007      APR_ARRAY_PUSH(abs_targets, const char *) = absolute;
2008      *pcommon = svn_dirent_get_longest_ancestor(*pcommon, absolute,
2009                                                 scratch_pool);
2010    }
2011
2012  *pcommon = apr_pstrdup(result_pool, *pcommon);
2013
2014  if (pcondensed_targets != NULL)
2015    {
2016      size_t basedir_len;
2017
2018      if (remove_redundancies)
2019        {
2020          /* Find the common part of each pair of targets.  If
2021             common part is equal to one of the dirents, the other
2022             is a child of it, and can be removed.  If a target is
2023             equal to *pcommon, it can also be removed. */
2024
2025          /* First pass: when one non-removed target is a child of
2026             another non-removed target, remove the child. */
2027          for (i = 0; i < abs_targets->nelts; ++i)
2028            {
2029              int j;
2030
2031              if (removed[i])
2032                continue;
2033
2034              for (j = i + 1; j < abs_targets->nelts; ++j)
2035                {
2036                  const char *abs_targets_i;
2037                  const char *abs_targets_j;
2038                  const char *ancestor;
2039
2040                  if (removed[j])
2041                    continue;
2042
2043                  abs_targets_i = APR_ARRAY_IDX(abs_targets, i, const char *);
2044                  abs_targets_j = APR_ARRAY_IDX(abs_targets, j, const char *);
2045
2046                  ancestor = svn_dirent_get_longest_ancestor
2047                    (abs_targets_i, abs_targets_j, scratch_pool);
2048
2049                  if (*ancestor == '\0')
2050                    continue;
2051
2052                  if (strcmp(ancestor, abs_targets_i) == 0)
2053                    {
2054                      removed[j] = TRUE;
2055                      num_condensed--;
2056                    }
2057                  else if (strcmp(ancestor, abs_targets_j) == 0)
2058                    {
2059                      removed[i] = TRUE;
2060                      num_condensed--;
2061                    }
2062                }
2063            }
2064
2065          /* Second pass: when a target is the same as *pcommon,
2066             remove the target. */
2067          for (i = 0; i < abs_targets->nelts; ++i)
2068            {
2069              const char *abs_targets_i = APR_ARRAY_IDX(abs_targets, i,
2070                                                        const char *);
2071
2072              if ((strcmp(abs_targets_i, *pcommon) == 0) && (! removed[i]))
2073                {
2074                  removed[i] = TRUE;
2075                  num_condensed--;
2076                }
2077            }
2078        }
2079
2080      /* Now create the return array, and copy the non-removed items */
2081      basedir_len = strlen(*pcommon);
2082      *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2083                                           sizeof(const char *));
2084
2085      for (i = 0; i < abs_targets->nelts; ++i)
2086        {
2087          const char *rel_item = APR_ARRAY_IDX(abs_targets, i, const char *);
2088
2089          /* Skip this if it's been removed. */
2090          if (removed[i])
2091            continue;
2092
2093          /* If a common prefix was found, condensed_targets are given
2094             relative to that prefix.  */
2095          if (basedir_len > 0)
2096            {
2097              /* Only advance our pointer past a dirent separator if
2098                 REL_ITEM isn't the same as *PCOMMON.
2099
2100                 If *PCOMMON is a root dirent, basedir_len will already
2101                 include the closing '/', so never advance the pointer
2102                 here.
2103                 */
2104              rel_item += basedir_len;
2105              if (rel_item[0] &&
2106                  ! svn_dirent_is_root(*pcommon, basedir_len))
2107                rel_item++;
2108            }
2109
2110          APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2111            = apr_pstrdup(result_pool, rel_item);
2112        }
2113    }
2114
2115  return SVN_NO_ERROR;
2116}
2117
2118svn_error_t *
2119svn_uri_condense_targets(const char **pcommon,
2120                         apr_array_header_t **pcondensed_targets,
2121                         const apr_array_header_t *targets,
2122                         svn_boolean_t remove_redundancies,
2123                         apr_pool_t *result_pool,
2124                         apr_pool_t *scratch_pool)
2125{
2126  int i, num_condensed = targets->nelts;
2127  apr_array_header_t *uri_targets;
2128  svn_boolean_t *removed;
2129
2130  /* Early exit when there's no data to work on. */
2131  if (targets->nelts <= 0)
2132    {
2133      *pcommon = NULL;
2134      if (pcondensed_targets)
2135        *pcondensed_targets = NULL;
2136      return SVN_NO_ERROR;
2137    }
2138
2139  *pcommon = svn_uri_canonicalize(APR_ARRAY_IDX(targets, 0, const char *),
2140                                  scratch_pool);
2141
2142  /* Early exit when there's only one uri to work on. */
2143  if (targets->nelts == 1)
2144    {
2145      *pcommon = apr_pstrdup(result_pool, *pcommon);
2146      if (pcondensed_targets)
2147        *pcondensed_targets = apr_array_make(result_pool, 0,
2148                                             sizeof(const char *));
2149      return SVN_NO_ERROR;
2150    }
2151
2152  /* Find the pcommon argument by finding what is common in all of the
2153     uris. NOTE: This is not as efficient as it could be.  The calculation
2154     of the basedir could be done in the loop below, which would
2155     save some calls to svn_uri_get_longest_ancestor.  I decided to do it
2156     this way because I thought it would be simpler, since this way, we don't
2157     even do the loop if we don't need to condense the targets. */
2158
2159  removed = apr_pcalloc(scratch_pool, (targets->nelts *
2160                                          sizeof(svn_boolean_t)));
2161  uri_targets = apr_array_make(scratch_pool, targets->nelts,
2162                               sizeof(const char *));
2163
2164  APR_ARRAY_PUSH(uri_targets, const char *) = *pcommon;
2165
2166  for (i = 1; i < targets->nelts; ++i)
2167    {
2168      const char *uri = svn_uri_canonicalize(
2169                           APR_ARRAY_IDX(targets, i, const char *),
2170                           scratch_pool);
2171      APR_ARRAY_PUSH(uri_targets, const char *) = uri;
2172
2173      /* If the commonmost ancestor so far is empty, there's no point
2174         in continuing to search for a common ancestor at all.  But
2175         we'll keep looping for the sake of canonicalizing the
2176         targets, I suppose.  */
2177      if (**pcommon != '\0')
2178        *pcommon = svn_uri_get_longest_ancestor(*pcommon, uri,
2179                                                scratch_pool);
2180    }
2181
2182  *pcommon = apr_pstrdup(result_pool, *pcommon);
2183
2184  if (pcondensed_targets != NULL)
2185    {
2186      size_t basedir_len;
2187
2188      if (remove_redundancies)
2189        {
2190          /* Find the common part of each pair of targets.  If
2191             common part is equal to one of the dirents, the other
2192             is a child of it, and can be removed.  If a target is
2193             equal to *pcommon, it can also be removed. */
2194
2195          /* First pass: when one non-removed target is a child of
2196             another non-removed target, remove the child. */
2197          for (i = 0; i < uri_targets->nelts; ++i)
2198            {
2199              int j;
2200
2201              if (removed[i])
2202                continue;
2203
2204              for (j = i + 1; j < uri_targets->nelts; ++j)
2205                {
2206                  const char *uri_i;
2207                  const char *uri_j;
2208                  const char *ancestor;
2209
2210                  if (removed[j])
2211                    continue;
2212
2213                  uri_i = APR_ARRAY_IDX(uri_targets, i, const char *);
2214                  uri_j = APR_ARRAY_IDX(uri_targets, j, const char *);
2215
2216                  ancestor = svn_uri_get_longest_ancestor(uri_i,
2217                                                          uri_j,
2218                                                          scratch_pool);
2219
2220                  if (*ancestor == '\0')
2221                    continue;
2222
2223                  if (strcmp(ancestor, uri_i) == 0)
2224                    {
2225                      removed[j] = TRUE;
2226                      num_condensed--;
2227                    }
2228                  else if (strcmp(ancestor, uri_j) == 0)
2229                    {
2230                      removed[i] = TRUE;
2231                      num_condensed--;
2232                    }
2233                }
2234            }
2235
2236          /* Second pass: when a target is the same as *pcommon,
2237             remove the target. */
2238          for (i = 0; i < uri_targets->nelts; ++i)
2239            {
2240              const char *uri_targets_i = APR_ARRAY_IDX(uri_targets, i,
2241                                                        const char *);
2242
2243              if ((strcmp(uri_targets_i, *pcommon) == 0) && (! removed[i]))
2244                {
2245                  removed[i] = TRUE;
2246                  num_condensed--;
2247                }
2248            }
2249        }
2250
2251      /* Now create the return array, and copy the non-removed items */
2252      basedir_len = strlen(*pcommon);
2253      *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2254                                           sizeof(const char *));
2255
2256      for (i = 0; i < uri_targets->nelts; ++i)
2257        {
2258          const char *rel_item = APR_ARRAY_IDX(uri_targets, i, const char *);
2259
2260          /* Skip this if it's been removed. */
2261          if (removed[i])
2262            continue;
2263
2264          /* If a common prefix was found, condensed_targets are given
2265             relative to that prefix.  */
2266          if (basedir_len > 0)
2267            {
2268              /* Only advance our pointer past a dirent separator if
2269                 REL_ITEM isn't the same as *PCOMMON.
2270
2271                 If *PCOMMON is a root dirent, basedir_len will already
2272                 include the closing '/', so never advance the pointer
2273                 here.
2274                 */
2275              rel_item += basedir_len;
2276              if ((rel_item[0] == '/') ||
2277                  (rel_item[0] && !svn_uri_is_root(*pcommon, basedir_len)))
2278                {
2279                  rel_item++;
2280                }
2281            }
2282
2283          APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2284            = svn_path_uri_decode(rel_item, result_pool);
2285        }
2286    }
2287
2288  return SVN_NO_ERROR;
2289}
2290
2291svn_error_t *
2292svn_dirent_is_under_root(svn_boolean_t *under_root,
2293                         const char **result_path,
2294                         const char *base_path,
2295                         const char *path,
2296                         apr_pool_t *result_pool)
2297{
2298  apr_status_t status;
2299  char *full_path;
2300
2301  *under_root = FALSE;
2302  if (result_path)
2303    *result_path = NULL;
2304
2305  status = apr_filepath_merge(&full_path,
2306                              base_path,
2307                              path,
2308                              APR_FILEPATH_NOTABOVEROOT
2309                              | APR_FILEPATH_SECUREROOTTEST,
2310                              result_pool);
2311
2312  if (status == APR_SUCCESS)
2313    {
2314      if (result_path)
2315        *result_path = svn_dirent_canonicalize(full_path, result_pool);
2316      *under_root = TRUE;
2317      return SVN_NO_ERROR;
2318    }
2319  else if (status == APR_EABOVEROOT)
2320    {
2321      *under_root = FALSE;
2322      return SVN_NO_ERROR;
2323    }
2324
2325  return svn_error_wrap_apr(status, NULL);
2326}
2327
2328svn_error_t *
2329svn_uri_get_dirent_from_file_url(const char **dirent,
2330                                 const char *url,
2331                                 apr_pool_t *pool)
2332{
2333  const char *hostname, *path;
2334
2335  SVN_ERR_ASSERT(svn_uri_is_canonical(url, pool));
2336
2337  /* Verify that the URL is well-formed (loosely) */
2338
2339  /* First, check for the "file://" prefix. */
2340  if (strncmp(url, "file://", 7) != 0)
2341    return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2342                             _("Local URL '%s' does not contain 'file://' "
2343                               "prefix"), url);
2344
2345  /* Find the HOSTNAME portion and the PATH portion of the URL.  The host
2346     name is between the "file://" prefix and the next occurrence of '/'.  We
2347     are considering everything from that '/' until the end of the URL to be
2348     the absolute path portion of the URL.
2349     If we got just "file://", treat it the same as "file:///". */
2350  hostname = url + 7;
2351  path = strchr(hostname, '/');
2352  if (path)
2353    hostname = apr_pstrmemdup(pool, hostname, path - hostname);
2354  else
2355    path = "/";
2356
2357  /* URI-decode HOSTNAME, and set it to NULL if it is "" or "localhost". */
2358  if (*hostname == '\0')
2359    hostname = NULL;
2360  else
2361    {
2362      hostname = svn_path_uri_decode(hostname, pool);
2363      if (strcmp(hostname, "localhost") == 0)
2364        hostname = NULL;
2365    }
2366
2367  /* Duplicate the URL, starting at the top of the path.
2368     At the same time, we URI-decode the path. */
2369#ifdef SVN_USE_DOS_PATHS
2370  /* On Windows, we'll typically have to skip the leading / if the
2371     path starts with a drive letter.  Like most Web browsers, We
2372     support two variants of this scheme:
2373
2374         file:///X:/path    and
2375         file:///X|/path
2376
2377    Note that, at least on WinNT and above,  file:////./X:/path  will
2378    also work, so we must make sure the transformation doesn't break
2379    that, and  file:///path  (that looks within the current drive
2380    only) should also keep working.
2381    If we got a non-empty hostname other than localhost, we convert this
2382    into an UNC path.  In this case, we obviously don't strip the slash
2383    even if the path looks like it starts with a drive letter.
2384  */
2385  {
2386    static const char valid_drive_letters[] =
2387      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2388    /* Casting away const! */
2389    char *dup_path = (char *)svn_path_uri_decode(path, pool);
2390
2391    /* This check assumes ':' and '|' are already decoded! */
2392    if (!hostname && dup_path[1] && strchr(valid_drive_letters, dup_path[1])
2393        && (dup_path[2] == ':' || dup_path[2] == '|'))
2394      {
2395        /* Skip the leading slash. */
2396        ++dup_path;
2397
2398        if (dup_path[1] == '|')
2399          dup_path[1] = ':';
2400
2401        if (dup_path[2] == '/' || dup_path[2] == '\0')
2402          {
2403            if (dup_path[2] == '\0')
2404              {
2405                /* A valid dirent for the driveroot must be like "C:/" instead of
2406                   just "C:" or svn_dirent_join() will use the current directory
2407                   on the drive instead */
2408                char *new_path = apr_pcalloc(pool, 4);
2409                new_path[0] = dup_path[0];
2410                new_path[1] = ':';
2411                new_path[2] = '/';
2412                new_path[3] = '\0';
2413                dup_path = new_path;
2414              }
2415          }
2416      }
2417    if (hostname)
2418      {
2419        if (dup_path[0] == '/' && dup_path[1] == '\0')
2420          return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2421                                   _("Local URL '%s' contains only a hostname, "
2422                                     "no path"), url);
2423
2424        /* We still know that the path starts with a slash. */
2425        *dirent = apr_pstrcat(pool, "//", hostname, dup_path, SVN_VA_NULL);
2426      }
2427    else
2428      *dirent = dup_path;
2429  }
2430#else /* !SVN_USE_DOS_PATHS */
2431  /* Currently, the only hostnames we are allowing on non-Win32 platforms
2432     are the empty string and 'localhost'. */
2433  if (hostname)
2434    return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2435                             _("Local URL '%s' contains unsupported hostname"),
2436                             url);
2437
2438  *dirent = svn_path_uri_decode(path, pool);
2439#endif /* SVN_USE_DOS_PATHS */
2440  return SVN_NO_ERROR;
2441}
2442
2443svn_error_t *
2444svn_uri_get_file_url_from_dirent(const char **url,
2445                                 const char *dirent,
2446                                 apr_pool_t *pool)
2447{
2448  assert(svn_dirent_is_canonical(dirent, pool));
2449
2450  SVN_ERR(svn_dirent_get_absolute(&dirent, dirent, pool));
2451
2452  dirent = svn_path_uri_encode(dirent, pool);
2453
2454#ifndef SVN_USE_DOS_PATHS
2455  if (dirent[0] == '/' && dirent[1] == '\0')
2456    dirent = NULL; /* "file://" is the canonical form of "file:///" */
2457
2458  *url = apr_pstrcat(pool, "file://", dirent, SVN_VA_NULL);
2459#else
2460  if (dirent[0] == '/')
2461    {
2462      /* Handle UNC paths //server/share -> file://server/share */
2463      assert(dirent[1] == '/'); /* Expect UNC, not non-absolute */
2464
2465      *url = apr_pstrcat(pool, "file:", dirent, SVN_VA_NULL);
2466    }
2467  else
2468    {
2469      char *uri = apr_pstrcat(pool, "file:///", dirent, SVN_VA_NULL);
2470      apr_size_t len = 8 /* strlen("file:///") */ + strlen(dirent);
2471
2472      /* "C:/" is a canonical dirent on Windows,
2473         but "file:///C:/" is not a canonical uri */
2474      if (uri[len-1] == '/')
2475        uri[len-1] = '\0';
2476
2477      *url = uri;
2478    }
2479#endif
2480
2481  return SVN_NO_ERROR;
2482}
2483
2484
2485
2486/* -------------- The fspath API (see private/svn_fspath.h) -------------- */
2487
2488svn_boolean_t
2489svn_fspath__is_canonical(const char *fspath)
2490{
2491  return fspath[0] == '/' && relpath_is_canonical(fspath + 1);
2492}
2493
2494
2495const char *
2496svn_fspath__canonicalize(const char *fspath,
2497                         apr_pool_t *pool)
2498{
2499  if ((fspath[0] == '/') && (fspath[1] == '\0'))
2500    return "/";
2501
2502  return apr_pstrcat(pool, "/", svn_relpath_canonicalize(fspath, pool),
2503                     SVN_VA_NULL);
2504}
2505
2506
2507svn_boolean_t
2508svn_fspath__is_root(const char *fspath, apr_size_t len)
2509{
2510  /* directory is root if it's equal to '/' */
2511  return (len == 1 && fspath[0] == '/');
2512}
2513
2514
2515const char *
2516svn_fspath__skip_ancestor(const char *parent_fspath,
2517                          const char *child_fspath)
2518{
2519  assert(svn_fspath__is_canonical(parent_fspath));
2520  assert(svn_fspath__is_canonical(child_fspath));
2521
2522  return svn_relpath_skip_ancestor(parent_fspath + 1, child_fspath + 1);
2523}
2524
2525
2526const char *
2527svn_fspath__dirname(const char *fspath,
2528                    apr_pool_t *pool)
2529{
2530  assert(svn_fspath__is_canonical(fspath));
2531
2532  if (fspath[0] == '/' && fspath[1] == '\0')
2533    return apr_pstrdup(pool, fspath);
2534  else
2535    return apr_pstrcat(pool, "/", svn_relpath_dirname(fspath + 1, pool),
2536                       SVN_VA_NULL);
2537}
2538
2539
2540const char *
2541svn_fspath__basename(const char *fspath,
2542                     apr_pool_t *pool)
2543{
2544  const char *result;
2545  assert(svn_fspath__is_canonical(fspath));
2546
2547  result = svn_relpath_basename(fspath + 1, pool);
2548
2549  assert(strchr(result, '/') == NULL);
2550  return result;
2551}
2552
2553void
2554svn_fspath__split(const char **dirpath,
2555                  const char **base_name,
2556                  const char *fspath,
2557                  apr_pool_t *result_pool)
2558{
2559  assert(dirpath != base_name);
2560
2561  if (dirpath)
2562    *dirpath = svn_fspath__dirname(fspath, result_pool);
2563
2564  if (base_name)
2565    *base_name = svn_fspath__basename(fspath, result_pool);
2566}
2567
2568char *
2569svn_fspath__join(const char *fspath,
2570                 const char *relpath,
2571                 apr_pool_t *result_pool)
2572{
2573  char *result;
2574  assert(svn_fspath__is_canonical(fspath));
2575  assert(svn_relpath_is_canonical(relpath));
2576
2577  if (relpath[0] == '\0')
2578    result = apr_pstrdup(result_pool, fspath);
2579  else if (fspath[1] == '\0')
2580    result = apr_pstrcat(result_pool, "/", relpath, SVN_VA_NULL);
2581  else
2582    result = apr_pstrcat(result_pool, fspath, "/", relpath, SVN_VA_NULL);
2583
2584  assert(svn_fspath__is_canonical(result));
2585  return result;
2586}
2587
2588char *
2589svn_fspath__get_longest_ancestor(const char *fspath1,
2590                                 const char *fspath2,
2591                                 apr_pool_t *result_pool)
2592{
2593  char *result;
2594  assert(svn_fspath__is_canonical(fspath1));
2595  assert(svn_fspath__is_canonical(fspath2));
2596
2597  result = apr_pstrcat(result_pool, "/",
2598                       svn_relpath_get_longest_ancestor(fspath1 + 1,
2599                                                        fspath2 + 1,
2600                                                        result_pool),
2601                       SVN_VA_NULL);
2602
2603  assert(svn_fspath__is_canonical(result));
2604  return result;
2605}
2606
2607
2608
2609
2610/* -------------- The urlpath API (see private/svn_fspath.h) ------------- */
2611
2612const char *
2613svn_urlpath__canonicalize(const char *uri,
2614                          apr_pool_t *pool)
2615{
2616  if (svn_path_is_url(uri))
2617    {
2618      uri = svn_uri_canonicalize(uri, pool);
2619    }
2620  else
2621    {
2622      uri = svn_fspath__canonicalize(uri, pool);
2623      /* Do a little dance to normalize hex encoding. */
2624      uri = svn_path_uri_decode(uri, pool);
2625      uri = svn_path_uri_encode(uri, pool);
2626    }
2627  return uri;
2628}
2629
2630
2631/* -------------- The cert API (see private/svn_cert.h) ------------- */
2632
2633svn_boolean_t
2634svn_cert__match_dns_identity(svn_string_t *pattern, svn_string_t *hostname)
2635{
2636  apr_size_t pattern_pos = 0, hostname_pos = 0;
2637
2638  /* support leading wildcards that composed of the only character in the
2639   * left-most label. */
2640  if (pattern->len >= 2 &&
2641      pattern->data[pattern_pos] == '*' &&
2642      pattern->data[pattern_pos + 1] == '.')
2643    {
2644      while (hostname_pos < hostname->len &&
2645             hostname->data[hostname_pos] != '.')
2646        {
2647          hostname_pos++;
2648        }
2649      /* Assume that the wildcard must match something.  Rule 2 says
2650       * that *.example.com should not match example.com.  If the wildcard
2651       * ends up not matching anything then it matches .example.com which
2652       * seems to be essentially the same as just example.com */
2653      if (hostname_pos == 0)
2654        return FALSE;
2655
2656      pattern_pos++;
2657    }
2658
2659  while (pattern_pos < pattern->len && hostname_pos < hostname->len)
2660    {
2661      char pattern_c = pattern->data[pattern_pos];
2662      char hostname_c = hostname->data[hostname_pos];
2663
2664      /* fold case as described in RFC 4343.
2665       * Note: We actually convert to lowercase, since our URI
2666       * canonicalization code converts to lowercase and generally
2667       * most certs are issued with lowercase DNS names, meaning
2668       * this avoids the fold operation in most cases.  The RFC
2669       * suggests the opposite transformation, but doesn't require
2670       * any specific implementation in any case.  It is critical
2671       * that this folding be locale independent so you can't use
2672       * tolower(). */
2673      pattern_c = canonicalize_to_lower(pattern_c);
2674      hostname_c = canonicalize_to_lower(hostname_c);
2675
2676      if (pattern_c != hostname_c)
2677        {
2678          /* doesn't match */
2679          return FALSE;
2680        }
2681      else
2682        {
2683          /* characters match so skip both */
2684          pattern_pos++;
2685          hostname_pos++;
2686        }
2687    }
2688
2689  /* ignore a trailing period on the hostname since this has no effect on the
2690   * security of the matching.  See the following for the long explanation as
2691   * to why:
2692   * https://bugzilla.mozilla.org/show_bug.cgi?id=134402#c28
2693   */
2694  if (pattern_pos == pattern->len &&
2695      hostname_pos == hostname->len - 1 &&
2696      hostname->data[hostname_pos] == '.')
2697    hostname_pos++;
2698
2699  if (pattern_pos != pattern->len || hostname_pos != hostname->len)
2700    {
2701      /* end didn't match */
2702      return FALSE;
2703    }
2704
2705  return TRUE;
2706}
2707