1/*
2 * dirent_uri.c:   a library to manipulate URIs and directory entries.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <string.h>
27#include <assert.h>
28#include <ctype.h>
29
30#include <apr_uri.h>
31#include <apr_lib.h>
32
33#include "svn_private_config.h"
34#include "svn_string.h"
35#include "svn_dirent_uri.h"
36#include "svn_path.h"
37#include "svn_ctype.h"
38
39#include "dirent_uri.h"
40#include "private/svn_dirent_uri_private.h"
41#include "private/svn_fspath.h"
42#include "private/svn_cert.h"
43
44/* The canonical empty path.  Can this be changed?  Well, change the empty
45   test below and the path library will work, not so sure about the fs/wc
46   libraries. */
47#define SVN_EMPTY_PATH ""
48
49/* TRUE if s is the canonical empty path, FALSE otherwise */
50#define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
51
52/* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
53   this be changed?  Well, the path library will work, not so sure about
54   the OS! */
55#define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
56
57/* This check must match the check on top of dirent_uri-tests.c and
58   path-tests.c */
59#if defined(WIN32) || defined(__CYGWIN__) || defined(__OS2__)
60#define SVN_USE_DOS_PATHS
61#endif
62
63/* Path type definition. Used only by internal functions. */
64typedef enum path_type_t {
65  type_uri,
66  type_dirent,
67  type_relpath
68} path_type_t;
69
70
71/**** Forward declarations *****/
72
73static svn_boolean_t
74relpath_is_canonical(const char *relpath);
75
76
77/**** Internal implementation functions *****/
78
79/* Return an internal-style new path based on PATH, allocated in POOL.
80 *
81 * "Internal-style" means that separators are all '/'.
82 */
83static const char *
84internal_style(const char *path, apr_pool_t *pool)
85{
86#if '/' != SVN_PATH_LOCAL_SEPARATOR
87    {
88      char *p = apr_pstrdup(pool, path);
89      path = p;
90
91      /* Convert all local-style separators to the canonical ones. */
92      for (; *p != '\0'; ++p)
93        if (*p == SVN_PATH_LOCAL_SEPARATOR)
94          *p = '/';
95    }
96#endif
97
98  return path;
99}
100
101/* Locale insensitive tolower() for converting parts of dirents and urls
102   while canonicalizing */
103static char
104canonicalize_to_lower(char c)
105{
106  if (c < 'A' || c > 'Z')
107    return c;
108  else
109    return (char)(c - 'A' + 'a');
110}
111
112/* Locale insensitive toupper() for converting parts of dirents and urls
113   while canonicalizing */
114static char
115canonicalize_to_upper(char c)
116{
117  if (c < 'a' || c > 'z')
118    return c;
119  else
120    return (char)(c - 'a' + 'A');
121}
122
123/* Calculates the length of the dirent absolute or non absolute root in
124   DIRENT, return 0 if dirent is not rooted  */
125static apr_size_t
126dirent_root_length(const char *dirent, apr_size_t len)
127{
128#ifdef SVN_USE_DOS_PATHS
129  if (len >= 2 && dirent[1] == ':' &&
130      ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
131       (dirent[0] >= 'a' && dirent[0] <= 'z')))
132    {
133      return (len > 2 && dirent[2] == '/') ? 3 : 2;
134    }
135
136  if (len > 2 && dirent[0] == '/' && dirent[1] == '/')
137    {
138      apr_size_t i = 2;
139
140      while (i < len && dirent[i] != '/')
141        i++;
142
143      if (i == len)
144        return len; /* Cygwin drive alias, invalid path on WIN32 */
145
146      i++; /* Skip '/' */
147
148      while (i < len && dirent[i] != '/')
149        i++;
150
151      return i;
152    }
153#endif /* SVN_USE_DOS_PATHS */
154  if (len >= 1 && dirent[0] == '/')
155    return 1;
156
157  return 0;
158}
159
160
161/* Return the length of substring necessary to encompass the entire
162 * previous dirent segment in DIRENT, which should be a LEN byte string.
163 *
164 * A trailing slash will not be included in the returned length except
165 * in the case in which DIRENT is absolute and there are no more
166 * previous segments.
167 */
168static apr_size_t
169dirent_previous_segment(const char *dirent,
170                        apr_size_t len)
171{
172  if (len == 0)
173    return 0;
174
175  --len;
176  while (len > 0 && dirent[len] != '/'
177#ifdef SVN_USE_DOS_PATHS
178                 && (dirent[len] != ':' || len != 1)
179#endif /* SVN_USE_DOS_PATHS */
180        )
181    --len;
182
183  /* check if the remaining segment including trailing '/' is a root dirent */
184  if (dirent_root_length(dirent, len+1) == len + 1)
185    return len + 1;
186  else
187    return len;
188}
189
190/* Calculates the length occupied by the schema defined root of URI */
191static apr_size_t
192uri_schema_root_length(const char *uri, apr_size_t len)
193{
194  apr_size_t i;
195
196  for (i = 0; i < len; i++)
197    {
198      if (uri[i] == '/')
199        {
200          if (i > 0 && uri[i-1] == ':' && i < len-1 && uri[i+1] == '/')
201            {
202              /* We have an absolute uri */
203              if (i == 5 && strncmp("file", uri, 4) == 0)
204                return 7; /* file:// */
205              else
206                {
207                  for (i += 2; i < len; i++)
208                    if (uri[i] == '/')
209                      return i;
210
211                  return len; /* Only a hostname is found */
212                }
213            }
214          else
215            return 0;
216        }
217    }
218
219  return 0;
220}
221
222/* Returns TRUE if svn_dirent_is_absolute(dirent) or when dirent has
223   a non absolute root. (E.g. '/' or 'F:' on Windows) */
224static svn_boolean_t
225dirent_is_rooted(const char *dirent)
226{
227  if (! dirent)
228    return FALSE;
229
230  /* Root on all systems */
231  if (dirent[0] == '/')
232    return TRUE;
233
234  /* On Windows, dirent is also absolute when it starts with 'H:' or 'H:/'
235     where 'H' is any letter. */
236#ifdef SVN_USE_DOS_PATHS
237  if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
238       (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
239      (dirent[1] == ':'))
240     return TRUE;
241#endif /* SVN_USE_DOS_PATHS */
242
243  return FALSE;
244}
245
246/* Return the length of substring necessary to encompass the entire
247 * previous relpath segment in RELPATH, which should be a LEN byte string.
248 *
249 * A trailing slash will not be included in the returned length.
250 */
251static apr_size_t
252relpath_previous_segment(const char *relpath,
253                         apr_size_t len)
254{
255  if (len == 0)
256    return 0;
257
258  --len;
259  while (len > 0 && relpath[len] != '/')
260    --len;
261
262  return len;
263}
264
265/* Return the length of substring necessary to encompass the entire
266 * previous uri segment in URI, which should be a LEN byte string.
267 *
268 * A trailing slash will not be included in the returned length except
269 * in the case in which URI is absolute and there are no more
270 * previous segments.
271 */
272static apr_size_t
273uri_previous_segment(const char *uri,
274                     apr_size_t len)
275{
276  apr_size_t root_length;
277  apr_size_t i = len;
278  if (len == 0)
279    return 0;
280
281  root_length = uri_schema_root_length(uri, len);
282
283  --i;
284  while (len > root_length && uri[i] != '/')
285    --i;
286
287  if (i == 0 && len > 1 && *uri == '/')
288    return 1;
289
290  return i;
291}
292
293/* Return the canonicalized version of PATH, of type TYPE, allocated in
294 * POOL.
295 */
296static svn_error_t *
297canonicalize(const char **canonical_path,
298             path_type_t type, const char *path, apr_pool_t *pool)
299{
300  char *canon, *dst;
301  const char *src;
302  apr_size_t seglen;
303  apr_size_t schemelen = 0;
304  apr_size_t canon_segments = 0;
305  svn_boolean_t url = FALSE;
306  char *schema_data = NULL;
307
308  /* "" is already canonical, so just return it; note that later code
309     depends on path not being zero-length.  */
310  if (SVN_PATH_IS_EMPTY(path))
311    {
312      *canonical_path = "";
313      if (type == type_uri)
314        return svn_error_create(SVN_ERR_CANONICALIZATION_FAILED, NULL,
315                                _("An empty URI can not be canonicalized"));
316      else
317        return SVN_NO_ERROR;
318    }
319
320  dst = canon = apr_pcalloc(pool, strlen(path) + 1);
321
322  /* If this is supposed to be an URI, it should start with
323     "scheme://".  We'll copy the scheme, host name, etc. to DST and
324     set URL = TRUE. */
325  src = path;
326  if (type == type_uri)
327    {
328      if (*src == '/')
329        {
330          *canonical_path = src;
331          return svn_error_create(SVN_ERR_CANONICALIZATION_FAILED, NULL,
332                                  _("A URI can not start with '/'"));
333        }
334
335      while (*src && (*src != '/') && (*src != ':'))
336        src++;
337
338      if (*src == ':' && *(src+1) == '/' && *(src+2) == '/')
339        {
340          const char *seg;
341
342          url = TRUE;
343
344          /* Found a scheme, convert to lowercase and copy to dst. */
345          src = path;
346          while (*src != ':')
347            {
348              *(dst++) = canonicalize_to_lower((*src++));
349              schemelen++;
350            }
351          *(dst++) = ':';
352          *(dst++) = '/';
353          *(dst++) = '/';
354          src += 3;
355          schemelen += 3;
356
357          /* This might be the hostname */
358          seg = src;
359          while (*src && (*src != '/') && (*src != '@'))
360            src++;
361
362          if (*src == '@')
363            {
364              /* Copy the username & password. */
365              seglen = src - seg + 1;
366              memcpy(dst, seg, seglen);
367              dst += seglen;
368              src++;
369            }
370          else
371            src = seg;
372
373          /* Found a hostname, convert to lowercase and copy to dst. */
374          if (*src == '[')
375            {
376             *(dst++) = *(src++); /* Copy '[' */
377
378              while (*src == ':'
379                     || (*src >= '0' && (*src <= '9'))
380                     || (*src >= 'a' && (*src <= 'f'))
381                     || (*src >= 'A' && (*src <= 'F')))
382                {
383                  *(dst++) = canonicalize_to_lower((*src++));
384                }
385
386              if (*src == ']')
387                *(dst++) = *(src++); /* Copy ']' */
388            }
389          else
390            while (*src && (*src != '/') && (*src != ':'))
391              *(dst++) = canonicalize_to_lower((*src++));
392
393          if (*src == ':')
394            {
395              /* We probably have a port number: Is it a default portnumber
396                 which doesn't belong in a canonical url? */
397              if (src[1] == '8' && src[2] == '0'
398                  && (src[3]== '/'|| !src[3])
399                  && !strncmp(canon, "http:", 5))
400                {
401                  src += 3;
402                }
403              else if (src[1] == '4' && src[2] == '4' && src[3] == '3'
404                       && (src[4]== '/'|| !src[4])
405                       && !strncmp(canon, "https:", 6))
406                {
407                  src += 4;
408                }
409              else if (src[1] == '3' && src[2] == '6'
410                       && src[3] == '9' && src[4] == '0'
411                       && (src[5]== '/'|| !src[5])
412                       && !strncmp(canon, "svn:", 4))
413                {
414                  src += 5;
415                }
416              else if (src[1] == '/' || !src[1])
417                {
418                  src += 1;
419                }
420
421              while (*src && (*src != '/'))
422                *(dst++) = canonicalize_to_lower((*src++));
423            }
424
425          /* Copy trailing slash, or null-terminator. */
426          *(dst) = *(src);
427
428          /* Move src and dst forward only if we are not
429           * at null-terminator yet. */
430          if (*src)
431            {
432              src++;
433              dst++;
434              schema_data = dst;
435            }
436
437          canon_segments = 1;
438        }
439    }
440
441  /* Copy to DST any separator or drive letter that must come before the
442     first regular path segment. */
443  if (! url && type != type_relpath)
444    {
445      src = path;
446      /* If this is an absolute path, then just copy over the initial
447         separator character. */
448      if (*src == '/')
449        {
450          *(dst++) = *(src++);
451
452#ifdef SVN_USE_DOS_PATHS
453          /* On Windows permit two leading separator characters which means an
454           * UNC path. */
455          if ((type == type_dirent) && *src == '/')
456            *(dst++) = *(src++);
457#endif /* SVN_USE_DOS_PATHS */
458        }
459#ifdef SVN_USE_DOS_PATHS
460      /* On Windows the first segment can be a drive letter, which we normalize
461         to upper case. */
462      else if (type == type_dirent &&
463               ((*src >= 'a' && *src <= 'z') ||
464                (*src >= 'A' && *src <= 'Z')) &&
465               (src[1] == ':'))
466        {
467          *(dst++) = canonicalize_to_upper(*(src++));
468          /* Leave the ':' to be processed as (or as part of) a path segment
469             by the following code block, so we need not care whether it has
470             a slash after it. */
471        }
472#endif /* SVN_USE_DOS_PATHS */
473    }
474
475  while (*src)
476    {
477      /* Parse each segment, finding the closing '/' (which might look
478         like '%2F' for URIs).  */
479      const char *next = src;
480      apr_size_t slash_len = 0;
481
482      while (*next
483             && (next[0] != '/')
484             && (! (type == type_uri && next[0] == '%' && next[1] == '2' &&
485                    canonicalize_to_upper(next[2]) == 'F')))
486        {
487          ++next;
488        }
489
490      /* Record how long our "slash" is. */
491      if (next[0] == '/')
492        slash_len = 1;
493      else if (type == type_uri && next[0] == '%')
494        slash_len = 3;
495
496      seglen = next - src;
497
498      if (seglen == 0
499          || (seglen == 1 && src[0] == '.')
500          || (type == type_uri && seglen == 3 && src[0] == '%' && src[1] == '2'
501              && canonicalize_to_upper(src[2]) == 'E'))
502        {
503          /* Empty or noop segment, so do nothing.  (For URIs, '%2E'
504             is equivalent to '.').  */
505        }
506#ifdef SVN_USE_DOS_PATHS
507      /* If this is the first path segment of a file:// URI and it contains a
508         windows drive letter, convert the drive letter to upper case. */
509      else if (url && canon_segments == 1 && seglen >= 2 &&
510               (strncmp(canon, "file:", 5) == 0) &&
511               src[0] >= 'a' && src[0] <= 'z' && src[1] == ':')
512        {
513          *(dst++) = canonicalize_to_upper(src[0]);
514          *(dst++) = ':';
515          if (seglen > 2) /* drive relative path */
516            {
517              memcpy(dst, src + 2, seglen - 2);
518              dst += seglen - 2;
519            }
520
521          if (slash_len)
522            *(dst++) = '/';
523          canon_segments++;
524        }
525#endif /* SVN_USE_DOS_PATHS */
526      else
527        {
528          /* An actual segment, append it to the destination path */
529          memcpy(dst, src, seglen);
530          dst += seglen;
531          if (slash_len)
532            *(dst++) = '/';
533          canon_segments++;
534        }
535
536      /* Skip over trailing slash to the next segment. */
537      src = next + slash_len;
538    }
539
540  /* Remove the trailing slash if there was at least one
541   * canonical segment and the last segment ends with a slash.
542   *
543   * But keep in mind that, for URLs, the scheme counts as a
544   * canonical segment -- so if path is ONLY a scheme (such
545   * as "https://") we should NOT remove the trailing slash. */
546  if ((canon_segments > 0 && *(dst - 1) == '/')
547      && ! (url && path[schemelen] == '\0'))
548    {
549      dst --;
550    }
551
552  *dst = '\0';
553
554#ifdef SVN_USE_DOS_PATHS
555  /* Skip leading double slashes when there are less than 2
556   * canon segments. UNC paths *MUST* have two segments. */
557  if ((type == type_dirent) && canon[0] == '/' && canon[1] == '/')
558    {
559      if (canon_segments < 2)
560        {
561          *canonical_path = canon + 1;
562          return SVN_NO_ERROR;
563        }
564      else
565        {
566          /* Now we're sure this is a valid UNC path, convert the server name
567             (the first path segment) to lowercase as Windows treats it as case
568             insensitive.
569             Note: normally the share name is treated as case insensitive too,
570             but it seems to be possible to configure Samba to treat those as
571             case sensitive, so better leave that alone. */
572          for (dst = canon + 2; *dst && *dst != '/'; dst++)
573            *dst = canonicalize_to_lower(*dst);
574        }
575    }
576#endif /* SVN_USE_DOS_PATHS */
577
578  /* Check the normalization of characters in a uri */
579  if (schema_data)
580    {
581      int need_extra = 0;
582      src = schema_data;
583
584      while (*src)
585        {
586          switch (*src)
587            {
588              case '/':
589                break;
590              case '%':
591                if (!svn_ctype_isxdigit(*(src+1)) ||
592                    !svn_ctype_isxdigit(*(src+2)))
593                  need_extra += 2;
594                else
595                  src += 2;
596                break;
597              default:
598                if (!svn_uri__char_validity[(unsigned char)*src])
599                  need_extra += 2;
600                break;
601            }
602          src++;
603        }
604
605      if (need_extra > 0)
606        {
607          apr_size_t pre_schema_size = (apr_size_t)(schema_data - canon);
608
609          dst = apr_palloc(pool, (apr_size_t)(src - canon) + need_extra + 1);
610          memcpy(dst, canon, pre_schema_size);
611          canon = dst;
612
613          dst += pre_schema_size;
614        }
615      else
616        dst = schema_data;
617
618      src = schema_data;
619
620      while (*src)
621        {
622          switch (*src)
623            {
624              case '/':
625                *(dst++) = '/';
626                break;
627              case '%':
628                if (!svn_ctype_isxdigit(*(src+1)) ||
629                    !svn_ctype_isxdigit(*(src+2)))
630                  {
631                    *(dst++) = '%';
632                    *(dst++) = '2';
633                    *(dst++) = '5';
634                  }
635                else
636                  {
637                    char digitz[3];
638                    int val;
639
640                    digitz[0] = *(++src);
641                    digitz[1] = *(++src);
642                    digitz[2] = 0;
643
644                    val = (int)strtol(digitz, NULL, 16);
645
646                    if (svn_uri__char_validity[(unsigned char)val])
647                      *(dst++) = (char)val;
648                    else
649                      {
650                        *(dst++) = '%';
651                        *(dst++) = canonicalize_to_upper(digitz[0]);
652                        *(dst++) = canonicalize_to_upper(digitz[1]);
653                      }
654                  }
655                break;
656              default:
657                if (!svn_uri__char_validity[(unsigned char)*src])
658                  {
659                    apr_snprintf(dst, 4, "%%%02X", (unsigned char)*src);
660                    dst += 3;
661                  }
662                else
663                  *(dst++) = *src;
664                break;
665            }
666          src++;
667        }
668      *dst = '\0';
669    }
670
671  *canonical_path = canon;
672  return SVN_NO_ERROR;
673}
674
675/* Return the string length of the longest common ancestor of PATH1 and PATH2.
676 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
677 * PATH1 and PATH2 are regular paths.
678 *
679 * If the two paths do not share a common ancestor, return 0.
680 *
681 * New strings are allocated in POOL.
682 */
683static apr_size_t
684get_longest_ancestor_length(path_type_t types,
685                            const char *path1,
686                            const char *path2,
687                            apr_pool_t *pool)
688{
689  apr_size_t path1_len, path2_len;
690  apr_size_t i = 0;
691  apr_size_t last_dirsep = 0;
692#ifdef SVN_USE_DOS_PATHS
693  svn_boolean_t unc = FALSE;
694#endif
695
696  path1_len = strlen(path1);
697  path2_len = strlen(path2);
698
699  if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
700    return 0;
701
702  while (path1[i] == path2[i])
703    {
704      /* Keep track of the last directory separator we hit. */
705      if (path1[i] == '/')
706        last_dirsep = i;
707
708      i++;
709
710      /* If we get to the end of either path, break out. */
711      if ((i == path1_len) || (i == path2_len))
712        break;
713    }
714
715  /* two special cases:
716     1. '/' is the longest common ancestor of '/' and '/foo' */
717  if (i == 1 && path1[0] == '/' && path2[0] == '/')
718    return 1;
719  /* 2. '' is the longest common ancestor of any non-matching
720   * strings 'foo' and 'bar' */
721  if (types == type_dirent && i == 0)
722    return 0;
723
724  /* Handle some windows specific cases */
725#ifdef SVN_USE_DOS_PATHS
726  if (types == type_dirent)
727    {
728      /* don't count the '//' from UNC paths */
729      if (last_dirsep == 1 && path1[0] == '/' && path1[1] == '/')
730        {
731          last_dirsep = 0;
732          unc = TRUE;
733        }
734
735      /* X:/ and X:/foo */
736      if (i == 3 && path1[2] == '/' && path1[1] == ':')
737        return i;
738
739      /* Cannot use SVN_ERR_ASSERT here, so we'll have to crash, sorry.
740       * Note that this assertion triggers only if the code above has
741       * been broken. The code below relies on this assertion, because
742       * it uses [i - 1] as index. */
743      assert(i > 0);
744
745      /* X: and X:/ */
746      if ((path1[i - 1] == ':' && path2[i] == '/') ||
747          (path2[i - 1] == ':' && path1[i] == '/'))
748          return 0;
749      /* X: and X:foo */
750      if (path1[i - 1] == ':' || path2[i - 1] == ':')
751          return i;
752    }
753#endif /* SVN_USE_DOS_PATHS */
754
755  /* last_dirsep is now the offset of the last directory separator we
756     crossed before reaching a non-matching byte.  i is the offset of
757     that non-matching byte, and is guaranteed to be <= the length of
758     whichever path is shorter.
759     If one of the paths is the common part return that. */
760  if (((i == path1_len) && (path2[i] == '/'))
761           || ((i == path2_len) && (path1[i] == '/'))
762           || ((i == path1_len) && (i == path2_len)))
763    return i;
764  else
765    {
766      /* Nothing in common but the root folder '/' or 'X:/' for Windows
767         dirents. */
768#ifdef SVN_USE_DOS_PATHS
769      if (! unc)
770        {
771          /* X:/foo and X:/bar returns X:/ */
772          if ((types == type_dirent) &&
773              last_dirsep == 2 && path1[1] == ':' && path1[2] == '/'
774                               && path2[1] == ':' && path2[2] == '/')
775            return 3;
776#endif /* SVN_USE_DOS_PATHS */
777          if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
778            return 1;
779#ifdef SVN_USE_DOS_PATHS
780        }
781#endif
782    }
783
784  return last_dirsep;
785}
786
787/* Determine whether PATH2 is a child of PATH1.
788 *
789 * PATH2 is a child of PATH1 if
790 * 1) PATH1 is empty, and PATH2 is not empty and not an absolute path.
791 * or
792 * 2) PATH2 is has n components, PATH1 has x < n components,
793 *    and PATH1 matches PATH2 in all its x components.
794 *    Components are separated by a slash, '/'.
795 *
796 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
797 * PATH1 and PATH2 are regular paths.
798 *
799 * If PATH2 is not a child of PATH1, return NULL.
800 *
801 * If PATH2 is a child of PATH1, and POOL is not NULL, allocate a copy
802 * of the child part of PATH2 in POOL and return a pointer to the
803 * newly allocated child part.
804 *
805 * If PATH2 is a child of PATH1, and POOL is NULL, return a pointer
806 * pointing to the child part of PATH2.
807 * */
808static const char *
809is_child(path_type_t type, const char *path1, const char *path2,
810         apr_pool_t *pool)
811{
812  apr_size_t i;
813
814  /* Allow "" and "foo" or "H:foo" to be parent/child */
815  if (SVN_PATH_IS_EMPTY(path1))               /* "" is the parent  */
816    {
817      if (SVN_PATH_IS_EMPTY(path2))            /* "" not a child    */
818        return NULL;
819
820      /* check if this is an absolute path */
821      if ((type == type_uri) ||
822          (type == type_dirent && dirent_is_rooted(path2)))
823        return NULL;
824      else
825        /* everything else is child */
826        return pool ? apr_pstrdup(pool, path2) : path2;
827    }
828
829  /* Reach the end of at least one of the paths.  How should we handle
830     things like path1:"foo///bar" and path2:"foo/bar/baz"?  It doesn't
831     appear to arise in the current Subversion code, it's not clear to me
832     if they should be parent/child or not. */
833  /* Hmmm... aren't paths assumed to be canonical in this function?
834   * How can "foo///bar" even happen if the paths are canonical? */
835  for (i = 0; path1[i] && path2[i]; i++)
836    if (path1[i] != path2[i])
837      return NULL;
838
839  /* FIXME: This comment does not really match
840   * the checks made in the code it refers to: */
841  /* There are two cases that are parent/child
842          ...      path1[i] == '\0'
843          .../foo  path2[i] == '/'
844      or
845          /        path1[i] == '\0'
846          /foo     path2[i] != '/'
847
848     Other root paths (like X:/) fall under the former case:
849          X:/        path1[i] == '\0'
850          X:/foo     path2[i] != '/'
851
852     Check for '//' to avoid matching '/' and '//srv'.
853  */
854  if (path1[i] == '\0' && path2[i])
855    {
856      if (path1[i - 1] == '/'
857#ifdef SVN_USE_DOS_PATHS
858          || ((type == type_dirent) && path1[i - 1] == ':')
859#endif
860           )
861        {
862          if (path2[i] == '/')
863            /* .../
864             * ..../
865             *     i   */
866            return NULL;
867          else
868            /* .../
869             * .../foo
870             *     i    */
871            return pool ? apr_pstrdup(pool, path2 + i) : path2 + i;
872        }
873      else if (path2[i] == '/')
874        {
875          if (path2[i + 1])
876            /* ...
877             * .../foo
878             *    i    */
879            return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
880          else
881            /* ...
882             * .../
883             *    i    */
884            return NULL;
885        }
886    }
887
888  /* Otherwise, path2 isn't a child. */
889  return NULL;
890}
891
892
893/**** Public API functions ****/
894
895const char *
896svn_dirent_internal_style(const char *dirent, apr_pool_t *pool)
897{
898  return svn_dirent_canonicalize(internal_style(dirent, pool), pool);
899}
900
901svn_error_t *
902svn_dirent_internal_style_safe(const char **internal_style_dirent,
903                               const char **non_canonical_result,
904                               const char *dirent,
905                               apr_pool_t *result_pool,
906                               apr_pool_t *scratch_pool)
907{
908  return svn_error_trace(
909      svn_dirent_canonicalize_safe(internal_style_dirent,
910                                   non_canonical_result,
911                                   internal_style(dirent, scratch_pool),
912                                   result_pool, scratch_pool));
913}
914
915const char *
916svn_dirent_local_style(const char *dirent, apr_pool_t *pool)
917{
918  /* Internally, Subversion represents the current directory with the
919     empty string.  But users like to see "." . */
920  if (SVN_PATH_IS_EMPTY(dirent))
921    return ".";
922
923#if '/' != SVN_PATH_LOCAL_SEPARATOR
924    {
925      char *p = apr_pstrdup(pool, dirent);
926      dirent = p;
927
928      /* Convert all canonical separators to the local-style ones. */
929      for (; *p != '\0'; ++p)
930        if (*p == '/')
931          *p = SVN_PATH_LOCAL_SEPARATOR;
932    }
933#endif
934
935  return dirent;
936}
937
938svn_error_t *
939svn_relpath__make_internal(const char **internal_style_relpath,
940                           const char *relpath,
941                           apr_pool_t *result_pool,
942                           apr_pool_t *scratch_pool)
943{
944  return svn_error_trace(
945      svn_relpath_canonicalize_safe(internal_style_relpath, NULL,
946                                    internal_style(relpath, scratch_pool),
947                                    result_pool, scratch_pool));
948}
949
950/* We decided against using apr_filepath_root here because of the negative
951   performance impact (creating a pool and converting strings ). */
952svn_boolean_t
953svn_dirent_is_root(const char *dirent, apr_size_t len)
954{
955#ifdef SVN_USE_DOS_PATHS
956  /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter)
957     are also root directories */
958  if ((len == 2 || ((len == 3) && (dirent[2] == '/'))) &&
959      (dirent[1] == ':') &&
960      ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
961       (dirent[0] >= 'a' && dirent[0] <= 'z')))
962    return TRUE;
963
964  /* On Windows and Cygwin //server/share is a root directory,
965     and on Cygwin //drive is a drive alias */
966  if (len >= 2 && dirent[0] == '/' && dirent[1] == '/'
967      && dirent[len - 1] != '/')
968    {
969      int segments = 0;
970      apr_size_t i;
971      for (i = len; i >= 2; i--)
972        {
973          if (dirent[i] == '/')
974            {
975              segments ++;
976              if (segments > 1)
977                return FALSE;
978            }
979        }
980#ifdef __CYGWIN__
981      return (segments <= 1);
982#else
983      return (segments == 1); /* //drive is invalid on plain Windows */
984#endif
985    }
986#endif
987
988  /* directory is root if it's equal to '/' */
989  if (len == 1 && dirent[0] == '/')
990    return TRUE;
991
992  return FALSE;
993}
994
995svn_boolean_t
996svn_uri_is_root(const char *uri, apr_size_t len)
997{
998  assert(svn_uri_is_canonical(uri, NULL));
999  return (len == uri_schema_root_length(uri, len));
1000}
1001
1002char *svn_dirent_join(const char *base,
1003                      const char *component,
1004                      apr_pool_t *pool)
1005{
1006  apr_size_t blen = strlen(base);
1007  apr_size_t clen = strlen(component);
1008  char *dirent;
1009  int add_separator;
1010
1011  assert(svn_dirent_is_canonical(base, pool));
1012  assert(svn_dirent_is_canonical(component, pool));
1013
1014  /* If the component is absolute, then return it.  */
1015  if (svn_dirent_is_absolute(component))
1016    return apr_pmemdup(pool, component, clen + 1);
1017
1018  /* If either is empty return the other */
1019  if (SVN_PATH_IS_EMPTY(base))
1020    return apr_pmemdup(pool, component, clen + 1);
1021  if (SVN_PATH_IS_EMPTY(component))
1022    return apr_pmemdup(pool, base, blen + 1);
1023
1024#ifdef SVN_USE_DOS_PATHS
1025  if (component[0] == '/')
1026    {
1027      /* '/' is drive relative on Windows, not absolute like on Posix */
1028      if (dirent_is_rooted(base))
1029        {
1030          /* Join component without '/' to root-of(base) */
1031          blen = dirent_root_length(base, blen);
1032          component++;
1033          clen--;
1034
1035          if (blen == 2 && base[1] == ':') /* "C:" case */
1036            {
1037              char *root = apr_pmemdup(pool, base, 3);
1038              root[2] = '/'; /* We don't need the final '\0' */
1039
1040              base = root;
1041              blen = 3;
1042            }
1043
1044          if (clen == 0)
1045            return apr_pstrndup(pool, base, blen);
1046        }
1047      else
1048        return apr_pmemdup(pool, component, clen + 1);
1049    }
1050  else if (dirent_is_rooted(component))
1051    return apr_pmemdup(pool, component, clen + 1);
1052#endif /* SVN_USE_DOS_PATHS */
1053
1054  /* if last character of base is already a separator, don't add a '/' */
1055  add_separator = 1;
1056  if (base[blen - 1] == '/'
1057#ifdef SVN_USE_DOS_PATHS
1058       || base[blen - 1] == ':'
1059#endif
1060        )
1061          add_separator = 0;
1062
1063  /* Construct the new, combined dirent. */
1064  dirent = apr_palloc(pool, blen + add_separator + clen + 1);
1065  memcpy(dirent, base, blen);
1066  if (add_separator)
1067    dirent[blen] = '/';
1068  memcpy(dirent + blen + add_separator, component, clen + 1);
1069
1070  return dirent;
1071}
1072
1073char *svn_dirent_join_many(apr_pool_t *pool, const char *base, ...)
1074{
1075#define MAX_SAVED_LENGTHS 10
1076  apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
1077  apr_size_t total_len;
1078  int nargs;
1079  va_list va;
1080  const char *s;
1081  apr_size_t len;
1082  char *dirent;
1083  char *p;
1084  int add_separator;
1085  int base_arg = 0;
1086
1087  total_len = strlen(base);
1088
1089  assert(svn_dirent_is_canonical(base, pool));
1090
1091  /* if last character of base is already a separator, don't add a '/' */
1092  add_separator = 1;
1093  if (total_len == 0
1094       || base[total_len - 1] == '/'
1095#ifdef SVN_USE_DOS_PATHS
1096       || base[total_len - 1] == ':'
1097#endif
1098        )
1099          add_separator = 0;
1100
1101  saved_lengths[0] = total_len;
1102
1103  /* Compute the length of the resulting string. */
1104
1105  nargs = 0;
1106  va_start(va, base);
1107  while ((s = va_arg(va, const char *)) != NULL)
1108    {
1109      len = strlen(s);
1110
1111      assert(svn_dirent_is_canonical(s, pool));
1112
1113      if (SVN_PATH_IS_EMPTY(s))
1114        continue;
1115
1116      if (nargs++ < MAX_SAVED_LENGTHS)
1117        saved_lengths[nargs] = len;
1118
1119      if (dirent_is_rooted(s))
1120        {
1121          total_len = len;
1122          base_arg = nargs;
1123
1124#ifdef SVN_USE_DOS_PATHS
1125          if (!svn_dirent_is_absolute(s)) /* Handle non absolute roots */
1126            {
1127              /* Set new base and skip the current argument */
1128              base = s = svn_dirent_join(base, s, pool);
1129              base_arg++;
1130              saved_lengths[0] = total_len = len = strlen(s);
1131            }
1132          else
1133#endif /* SVN_USE_DOS_PATHS */
1134            {
1135              base = ""; /* Don't add base */
1136              saved_lengths[0] = 0;
1137            }
1138
1139          add_separator = 1;
1140          if (s[len - 1] == '/'
1141#ifdef SVN_USE_DOS_PATHS
1142             || s[len - 1] == ':'
1143#endif
1144              )
1145             add_separator = 0;
1146        }
1147      else if (nargs <= base_arg + 1)
1148        {
1149          total_len += add_separator + len;
1150        }
1151      else
1152        {
1153          total_len += 1 + len;
1154        }
1155    }
1156  va_end(va);
1157
1158  /* base == "/" and no further components. just return that. */
1159  if (add_separator == 0 && total_len == 1)
1160    return apr_pmemdup(pool, "/", 2);
1161
1162  /* we got the total size. allocate it, with room for a NULL character. */
1163  dirent = p = apr_palloc(pool, total_len + 1);
1164
1165  /* if we aren't supposed to skip forward to an absolute component, and if
1166     this is not an empty base that we are skipping, then copy the base
1167     into the output. */
1168  if (! SVN_PATH_IS_EMPTY(base))
1169    {
1170      memcpy(p, base, len = saved_lengths[0]);
1171      p += len;
1172    }
1173
1174  nargs = 0;
1175  va_start(va, base);
1176  while ((s = va_arg(va, const char *)) != NULL)
1177    {
1178      if (SVN_PATH_IS_EMPTY(s))
1179        continue;
1180
1181      if (++nargs < base_arg)
1182        continue;
1183
1184      if (nargs < MAX_SAVED_LENGTHS)
1185        len = saved_lengths[nargs];
1186      else
1187        len = strlen(s);
1188
1189      /* insert a separator if we aren't copying in the first component
1190         (which can happen when base_arg is set). also, don't put in a slash
1191         if the prior character is a slash (occurs when prior component
1192         is "/"). */
1193      if (p != dirent &&
1194          ( ! (nargs - 1 <= base_arg) || add_separator))
1195        *p++ = '/';
1196
1197      /* copy the new component and advance the pointer */
1198      memcpy(p, s, len);
1199      p += len;
1200    }
1201  va_end(va);
1202
1203  *p = '\0';
1204  assert((apr_size_t)(p - dirent) == total_len);
1205
1206  return dirent;
1207}
1208
1209char *
1210svn_relpath_join(const char *base,
1211                 const char *component,
1212                 apr_pool_t *pool)
1213{
1214  apr_size_t blen = strlen(base);
1215  apr_size_t clen = strlen(component);
1216  char *path;
1217
1218  assert(relpath_is_canonical(base));
1219  assert(relpath_is_canonical(component));
1220
1221  /* If either is empty return the other */
1222  if (blen == 0)
1223    return apr_pmemdup(pool, component, clen + 1);
1224  if (clen == 0)
1225    return apr_pmemdup(pool, base, blen + 1);
1226
1227  path = apr_palloc(pool, blen + 1 + clen + 1);
1228  memcpy(path, base, blen);
1229  path[blen] = '/';
1230  memcpy(path + blen + 1, component, clen + 1);
1231
1232  return path;
1233}
1234
1235char *
1236svn_dirent_dirname(const char *dirent, apr_pool_t *pool)
1237{
1238  apr_size_t len = strlen(dirent);
1239
1240  assert(svn_dirent_is_canonical(dirent, pool));
1241
1242  if (len == dirent_root_length(dirent, len))
1243    return apr_pstrmemdup(pool, dirent, len);
1244  else
1245    return apr_pstrmemdup(pool, dirent, dirent_previous_segment(dirent, len));
1246}
1247
1248const char *
1249svn_dirent_basename(const char *dirent, apr_pool_t *pool)
1250{
1251  apr_size_t len = strlen(dirent);
1252  apr_size_t start;
1253
1254  assert(!pool || svn_dirent_is_canonical(dirent, pool));
1255
1256  if (svn_dirent_is_root(dirent, len))
1257    return "";
1258  else
1259    {
1260      start = len;
1261      while (start > 0 && dirent[start - 1] != '/'
1262#ifdef SVN_USE_DOS_PATHS
1263             && dirent[start - 1] != ':'
1264#endif
1265            )
1266        --start;
1267    }
1268
1269  if (pool)
1270    return apr_pstrmemdup(pool, dirent + start, len - start);
1271  else
1272    return dirent + start;
1273}
1274
1275void
1276svn_dirent_split(const char **dirpath,
1277                 const char **base_name,
1278                 const char *dirent,
1279                 apr_pool_t *pool)
1280{
1281  assert(dirpath != base_name);
1282
1283  if (dirpath)
1284    *dirpath = svn_dirent_dirname(dirent, pool);
1285
1286  if (base_name)
1287    *base_name = svn_dirent_basename(dirent, pool);
1288}
1289
1290char *
1291svn_relpath_dirname(const char *relpath,
1292                    apr_pool_t *pool)
1293{
1294  apr_size_t len = strlen(relpath);
1295
1296  assert(relpath_is_canonical(relpath));
1297
1298  return apr_pstrmemdup(pool, relpath,
1299                        relpath_previous_segment(relpath, len));
1300}
1301
1302const char *
1303svn_relpath_basename(const char *relpath,
1304                     apr_pool_t *pool)
1305{
1306  apr_size_t len = strlen(relpath);
1307  apr_size_t start;
1308
1309  assert(relpath_is_canonical(relpath));
1310
1311  start = len;
1312  while (start > 0 && relpath[start - 1] != '/')
1313    --start;
1314
1315  if (pool)
1316    return apr_pstrmemdup(pool, relpath + start, len - start);
1317  else
1318    return relpath + start;
1319}
1320
1321void
1322svn_relpath_split(const char **dirpath,
1323                  const char **base_name,
1324                  const char *relpath,
1325                  apr_pool_t *pool)
1326{
1327  assert(dirpath != base_name);
1328
1329  if (dirpath)
1330    *dirpath = svn_relpath_dirname(relpath, pool);
1331
1332  if (base_name)
1333    *base_name = svn_relpath_basename(relpath, pool);
1334}
1335
1336const char *
1337svn_relpath_prefix(const char *relpath,
1338                   int max_components,
1339                   apr_pool_t *result_pool)
1340{
1341  const char *end;
1342  assert(relpath_is_canonical(relpath));
1343
1344  if (max_components <= 0)
1345    return "";
1346
1347  for (end = relpath; *end; end++)
1348    {
1349      if (*end == '/')
1350        {
1351          if (!--max_components)
1352            break;
1353        }
1354    }
1355
1356  return apr_pstrmemdup(result_pool, relpath, end-relpath);
1357}
1358
1359char *
1360svn_uri_dirname(const char *uri, apr_pool_t *pool)
1361{
1362  apr_size_t len = strlen(uri);
1363
1364  assert(svn_uri_is_canonical(uri, pool));
1365
1366  if (svn_uri_is_root(uri, len))
1367    return apr_pstrmemdup(pool, uri, len);
1368  else
1369    return apr_pstrmemdup(pool, uri, uri_previous_segment(uri, len));
1370}
1371
1372const char *
1373svn_uri_basename(const char *uri, apr_pool_t *pool)
1374{
1375  apr_size_t len = strlen(uri);
1376  apr_size_t start;
1377
1378  assert(svn_uri_is_canonical(uri, NULL));
1379
1380  if (svn_uri_is_root(uri, len))
1381    return "";
1382
1383  start = len;
1384  while (start > 0 && uri[start - 1] != '/')
1385    --start;
1386
1387  return svn_path_uri_decode(uri + start, pool);
1388}
1389
1390void
1391svn_uri_split(const char **dirpath,
1392              const char **base_name,
1393              const char *uri,
1394              apr_pool_t *pool)
1395{
1396  assert(dirpath != base_name);
1397
1398  if (dirpath)
1399    *dirpath = svn_uri_dirname(uri, pool);
1400
1401  if (base_name)
1402    *base_name = svn_uri_basename(uri, pool);
1403}
1404
1405char *
1406svn_dirent_get_longest_ancestor(const char *dirent1,
1407                                const char *dirent2,
1408                                apr_pool_t *pool)
1409{
1410  return apr_pstrndup(pool, dirent1,
1411                      get_longest_ancestor_length(type_dirent, dirent1,
1412                                                  dirent2, pool));
1413}
1414
1415char *
1416svn_relpath_get_longest_ancestor(const char *relpath1,
1417                                 const char *relpath2,
1418                                 apr_pool_t *pool)
1419{
1420  assert(relpath_is_canonical(relpath1));
1421  assert(relpath_is_canonical(relpath2));
1422
1423  return apr_pstrndup(pool, relpath1,
1424                      get_longest_ancestor_length(type_relpath, relpath1,
1425                                                  relpath2, pool));
1426}
1427
1428char *
1429svn_uri_get_longest_ancestor(const char *uri1,
1430                             const char *uri2,
1431                             apr_pool_t *pool)
1432{
1433  apr_size_t uri_ancestor_len;
1434  apr_size_t i = 0;
1435
1436  assert(svn_uri_is_canonical(uri1, NULL));
1437  assert(svn_uri_is_canonical(uri2, NULL));
1438
1439  /* Find ':' */
1440  while (1)
1441    {
1442      /* No shared protocol => no common prefix */
1443      if (uri1[i] != uri2[i])
1444        return apr_pmemdup(pool, SVN_EMPTY_PATH,
1445                           sizeof(SVN_EMPTY_PATH));
1446
1447      if (uri1[i] == ':')
1448        break;
1449
1450      /* They're both URLs, so EOS can't come before ':' */
1451      assert((uri1[i] != '\0') && (uri2[i] != '\0'));
1452
1453      i++;
1454    }
1455
1456  i += 3;  /* Advance past '://' */
1457
1458  uri_ancestor_len = get_longest_ancestor_length(type_uri, uri1 + i,
1459                                                 uri2 + i, pool);
1460
1461  if (uri_ancestor_len == 0 ||
1462      (uri_ancestor_len == 1 && (uri1 + i)[0] == '/'))
1463    return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
1464  else
1465    return apr_pstrndup(pool, uri1, uri_ancestor_len + i);
1466}
1467
1468const char *
1469svn_dirent_is_child(const char *parent_dirent,
1470                    const char *child_dirent,
1471                    apr_pool_t *pool)
1472{
1473  return is_child(type_dirent, parent_dirent, child_dirent, pool);
1474}
1475
1476const char *
1477svn_dirent_skip_ancestor(const char *parent_dirent,
1478                         const char *child_dirent)
1479{
1480  apr_size_t len = strlen(parent_dirent);
1481  apr_size_t root_len;
1482
1483  if (0 != strncmp(parent_dirent, child_dirent, len))
1484    return NULL; /* parent_dirent is no ancestor of child_dirent */
1485
1486  if (child_dirent[len] == 0)
1487    return ""; /* parent_dirent == child_dirent */
1488
1489  /* Child == parent + more-characters */
1490
1491  root_len = dirent_root_length(child_dirent, strlen(child_dirent));
1492  if (root_len > len)
1493    /* Different root, e.g. ("" "/...") or ("//z" "//z/share") */
1494    return NULL;
1495
1496  /* Now, child == [root-of-parent] + [rest-of-parent] + more-characters.
1497   * It must be one of the following forms.
1498   *
1499   * rlen parent    child       bad?  rlen=len? c[len]=/?
1500   *  0   ""        "foo"               *
1501   *  0   "b"       "bad"         !
1502   *  0   "b"       "b/foo"                       *
1503   *  1   "/"       "/foo"              *
1504   *  1   "/b"      "/bad"        !
1505   *  1   "/b"      "/b/foo"                      *
1506   *  2   "a:"      "a:foo"             *
1507   *  2   "a:b"     "a:bad"       !
1508   *  2   "a:b"     "a:b/foo"                     *
1509   *  3   "a:/"     "a:/foo"            *
1510   *  3   "a:/b"    "a:/bad"      !
1511   *  3   "a:/b"    "a:/b/foo"                    *
1512   *  5   "//s/s"   "//s/s/foo"         *         *
1513   *  5   "//s/s/b" "//s/s/bad"   !
1514   *  5   "//s/s/b" "//s/s/b/foo"                 *
1515   */
1516
1517  if (child_dirent[len] == '/')
1518    /* "parent|child" is one of:
1519     * "[a:]b|/foo" "[a:]/b|/foo" "//s/s|/foo" "//s/s/b|/foo" */
1520    return child_dirent + len + 1;
1521
1522  if (root_len == len)
1523    /* "parent|child" is "|foo" "/|foo" "a:|foo" "a:/|foo" "//s/s|/foo" */
1524    return child_dirent + len;
1525
1526  return NULL;
1527}
1528
1529const char *
1530svn_relpath_skip_ancestor(const char *parent_relpath,
1531                          const char *child_relpath)
1532{
1533  apr_size_t len = strlen(parent_relpath);
1534
1535  assert(relpath_is_canonical(parent_relpath));
1536  assert(relpath_is_canonical(child_relpath));
1537
1538  if (len == 0)
1539    return child_relpath;
1540
1541  if (0 != strncmp(parent_relpath, child_relpath, len))
1542    return NULL; /* parent_relpath is no ancestor of child_relpath */
1543
1544  if (child_relpath[len] == 0)
1545    return ""; /* parent_relpath == child_relpath */
1546
1547  if (child_relpath[len] == '/')
1548    return child_relpath + len + 1;
1549
1550  return NULL;
1551}
1552
1553
1554/* */
1555static const char *
1556uri_skip_ancestor(const char *parent_uri,
1557                  const char *child_uri)
1558{
1559  apr_size_t len = strlen(parent_uri);
1560
1561  assert(svn_uri_is_canonical(parent_uri, NULL));
1562  assert(svn_uri_is_canonical(child_uri, NULL));
1563
1564  if (0 != strncmp(parent_uri, child_uri, len))
1565    return NULL; /* parent_uri is no ancestor of child_uri */
1566
1567  if (child_uri[len] == 0)
1568    return ""; /* parent_uri == child_uri */
1569
1570  if (child_uri[len] == '/')
1571    return child_uri + len + 1;
1572
1573  return NULL;
1574}
1575
1576const char *
1577svn_uri_skip_ancestor(const char *parent_uri,
1578                      const char *child_uri,
1579                      apr_pool_t *result_pool)
1580{
1581  const char *result = uri_skip_ancestor(parent_uri, child_uri);
1582
1583  return result ? svn_path_uri_decode(result, result_pool) : NULL;
1584}
1585
1586svn_boolean_t
1587svn_dirent_is_ancestor(const char *parent_dirent, const char *child_dirent)
1588{
1589  return svn_dirent_skip_ancestor(parent_dirent, child_dirent) != NULL;
1590}
1591
1592svn_boolean_t
1593svn_uri__is_ancestor(const char *parent_uri, const char *child_uri)
1594{
1595  return uri_skip_ancestor(parent_uri, child_uri) != NULL;
1596}
1597
1598
1599svn_boolean_t
1600svn_dirent_is_absolute(const char *dirent)
1601{
1602  if (! dirent)
1603    return FALSE;
1604
1605  /* dirent is absolute if it starts with '/' on non-Windows platforms
1606     or with '//' on Windows platforms */
1607  if (dirent[0] == '/'
1608#ifdef SVN_USE_DOS_PATHS
1609      && dirent[1] == '/' /* Single '/' depends on current drive */
1610#endif
1611      )
1612    return TRUE;
1613
1614  /* On Windows, dirent is also absolute when it starts with 'H:/'
1615     where 'H' is any letter. */
1616#ifdef SVN_USE_DOS_PATHS
1617  if (((dirent[0] >= 'A' && dirent[0] <= 'Z')) &&
1618      (dirent[1] == ':') && (dirent[2] == '/'))
1619     return TRUE;
1620#endif /* SVN_USE_DOS_PATHS */
1621
1622  return FALSE;
1623}
1624
1625svn_error_t *
1626svn_dirent_get_absolute(const char **pabsolute,
1627                        const char *relative,
1628                        apr_pool_t *pool)
1629{
1630  char *buffer;
1631  apr_status_t apr_err;
1632  const char *path_apr;
1633
1634  SVN_ERR_ASSERT(! svn_path_is_url(relative));
1635
1636  /* Merge the current working directory with the relative dirent. */
1637  SVN_ERR(svn_path_cstring_from_utf8(&path_apr, relative, pool));
1638
1639  apr_err = apr_filepath_merge(&buffer, NULL,
1640                               path_apr,
1641                               APR_FILEPATH_NOTRELATIVE,
1642                               pool);
1643  if (apr_err)
1644    {
1645      /* In some cases when the passed path or its ancestor(s) do not exist
1646         or no longer exist apr returns an error.
1647
1648         In many of these cases we would like to return a path anyway, when the
1649         passed path was already a safe absolute path. So check for that now to
1650         avoid an error.
1651
1652         svn_dirent_is_absolute() doesn't perform the necessary checks to see
1653         if the path doesn't need post processing to be in the canonical absolute
1654         format.
1655         */
1656
1657      if (svn_dirent_is_absolute(relative)
1658          && svn_dirent_is_canonical(relative, pool)
1659          && !svn_path_is_backpath_present(relative))
1660        {
1661          *pabsolute = apr_pstrdup(pool, relative);
1662          return SVN_NO_ERROR;
1663        }
1664
1665      return svn_error_createf(SVN_ERR_BAD_FILENAME,
1666                               svn_error_create(apr_err, NULL, NULL),
1667                               _("Couldn't determine absolute path of '%s'"),
1668                               svn_dirent_local_style(relative, pool));
1669    }
1670
1671  SVN_ERR(svn_path_cstring_to_utf8(pabsolute, buffer, pool));
1672  *pabsolute = svn_dirent_canonicalize(*pabsolute, pool);
1673  return SVN_NO_ERROR;
1674}
1675
1676const char *
1677svn_uri_canonicalize(const char *uri, apr_pool_t *pool)
1678{
1679  const char *result;
1680  svn_error_t *const err = canonicalize(&result, type_uri, uri, pool);
1681  if (err)
1682    {
1683      svn_error_clear(err);
1684      SVN_ERR_ASSERT_NO_RETURN(!"URI canonicalization failed");
1685    }
1686  return result;
1687}
1688
1689svn_error_t *
1690svn_uri_canonicalize_safe(const char **canonical_uri,
1691                          const char **non_canonical_result,
1692                          const char *uri,
1693                          apr_pool_t *result_pool,
1694                          apr_pool_t *scratch_pool)
1695{
1696  const char *result = NULL;
1697  SVN_ERR(canonicalize(&result, type_uri, uri, result_pool));
1698  if (!svn_uri_is_canonical(result, scratch_pool))
1699    {
1700      if (non_canonical_result)
1701        *non_canonical_result = result;
1702
1703      return svn_error_createf(
1704          SVN_ERR_CANONICALIZATION_FAILED, NULL,
1705          _("Could not canonicalize URI '%s'"
1706            " (the result '%s' is not canonical)"),
1707          uri, result);
1708    }
1709  *canonical_uri = result;
1710  return SVN_NO_ERROR;
1711}
1712
1713const char *
1714svn_relpath_canonicalize(const char *relpath, apr_pool_t *pool)
1715{
1716  const char *result;
1717  svn_error_t *const err = canonicalize(&result, type_relpath, relpath, pool);
1718  if (err)
1719    {
1720      svn_error_clear(err);
1721      SVN_ERR_ASSERT_NO_RETURN(!"relpath canonicalization failed");
1722    }
1723  return result;
1724}
1725
1726svn_error_t *
1727svn_relpath_canonicalize_safe(const char **canonical_relpath,
1728                              const char **non_canonical_result,
1729                              const char *relpath,
1730                              apr_pool_t *result_pool,
1731                              apr_pool_t *scratch_pool)
1732{
1733  const char *result = NULL;
1734  SVN_ERR(canonicalize(&result, type_relpath, relpath, result_pool));
1735  if (!svn_relpath_is_canonical(result))
1736    {
1737      if (non_canonical_result)
1738        *non_canonical_result = result;
1739
1740      return svn_error_createf(
1741          SVN_ERR_CANONICALIZATION_FAILED, NULL,
1742          _("Could not canonicalize relpath '%s'"
1743            " (the result '%s' is not canonical)"),
1744          relpath, result);
1745    }
1746
1747  SVN_UNUSED(scratch_pool);
1748  *canonical_relpath = result;
1749  return SVN_NO_ERROR;
1750}
1751
1752static svn_error_t *
1753canonicalize_dirent(const char **result, const char *dirent, apr_pool_t *pool)
1754{
1755  const char *dst;
1756  SVN_ERR(canonicalize(&dst, type_dirent, dirent, pool));
1757
1758#ifdef SVN_USE_DOS_PATHS
1759  /* Handle a specific case on Windows where path == "X:/". Here we have to
1760     append the final '/', as svn_path_canonicalize will chop this of. */
1761  if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
1762        (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
1763        dirent[1] == ':' && dirent[2] == '/' &&
1764        dst[3] == '\0')
1765    {
1766      char *dst_slash = apr_pcalloc(pool, 4);
1767      dst_slash[0] = canonicalize_to_upper(dirent[0]);
1768      dst_slash[1] = ':';
1769      dst_slash[2] = '/';
1770      dst_slash[3] = '\0';
1771
1772      *result = dst_slash;
1773      return SVN_NO_ERROR;
1774    }
1775#endif /* SVN_USE_DOS_PATHS */
1776
1777  *result = dst;
1778  return SVN_NO_ERROR;
1779}
1780
1781const char *
1782svn_dirent_canonicalize(const char *dirent, apr_pool_t *pool)
1783{
1784  const char *result;
1785  svn_error_t *const err = canonicalize_dirent(&result, dirent, pool);
1786  if (err)
1787    {
1788      svn_error_clear(err);
1789      SVN_ERR_ASSERT_NO_RETURN(!"dirent canonicalization failed");
1790    }
1791  return result;
1792}
1793
1794svn_error_t *
1795svn_dirent_canonicalize_safe(const char **canonical_dirent,
1796                             const char **non_canonical_result,
1797                             const char *dirent,
1798                             apr_pool_t *result_pool,
1799                             apr_pool_t *scratch_pool)
1800{
1801  const char *result = NULL;
1802  SVN_ERR(canonicalize_dirent(&result, dirent, result_pool));
1803  if (!svn_dirent_is_canonical(result, scratch_pool))
1804    {
1805      if (non_canonical_result)
1806        *non_canonical_result = result;
1807
1808      return svn_error_createf(
1809          SVN_ERR_CANONICALIZATION_FAILED, NULL,
1810          _("Could not canonicalize dirent '%s'"
1811            " (the result '%s' is not canonical)"),
1812          dirent, result);
1813    }
1814  *canonical_dirent = result;
1815  return SVN_NO_ERROR;
1816}
1817
1818svn_boolean_t
1819svn_dirent_is_canonical(const char *dirent, apr_pool_t *scratch_pool)
1820{
1821  const char *ptr = dirent;
1822  if (*ptr == '/')
1823    {
1824      ptr++;
1825#ifdef SVN_USE_DOS_PATHS
1826      /* Check for UNC paths */
1827      if (*ptr == '/')
1828        {
1829          /* TODO: Scan hostname and sharename and fall back to part code */
1830
1831          /* ### Fall back to old implementation */
1832          return (strcmp(dirent, svn_dirent_canonicalize(dirent, scratch_pool))
1833                  == 0);
1834        }
1835#endif /* SVN_USE_DOS_PATHS */
1836    }
1837#ifdef SVN_USE_DOS_PATHS
1838  else if (((*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z')) &&
1839           (ptr[1] == ':'))
1840    {
1841      /* The only canonical drive names are "A:"..."Z:", no lower case */
1842      if (*ptr < 'A' || *ptr > 'Z')
1843        return FALSE;
1844
1845      ptr += 2;
1846
1847      if (*ptr == '/')
1848        ptr++;
1849    }
1850#endif /* SVN_USE_DOS_PATHS */
1851
1852  return relpath_is_canonical(ptr);
1853}
1854
1855static svn_boolean_t
1856relpath_is_canonical(const char *relpath)
1857{
1858  const char *dot_pos, *ptr = relpath;
1859  apr_size_t i, len;
1860  unsigned pattern = 0;
1861
1862  /* RELPATH is canonical if it has:
1863   *  - no '.' segments
1864   *  - no start and closing '/'
1865   *  - no '//'
1866   */
1867
1868  /* invalid beginnings */
1869  if (*ptr == '/')
1870    return FALSE;
1871
1872  if (ptr[0] == '.' && (ptr[1] == '/' || ptr[1] == '\0'))
1873    return FALSE;
1874
1875  /* valid special cases */
1876  len = strlen(ptr);
1877  if (len < 2)
1878    return TRUE;
1879
1880  /* invalid endings */
1881  if (ptr[len-1] == '/' || (ptr[len-1] == '.' && ptr[len-2] == '/'))
1882    return FALSE;
1883
1884  /* '.' are rare. So, search for them globally. There will often be no
1885   * more than one hit.  Also note that we already checked for invalid
1886   * starts and endings, i.e. we only need to check for "/./"
1887   */
1888  for (dot_pos = memchr(ptr, '.', len);
1889       dot_pos;
1890       dot_pos = strchr(dot_pos+1, '.'))
1891    if (dot_pos > ptr && dot_pos[-1] == '/' && dot_pos[1] == '/')
1892      return FALSE;
1893
1894  /* Now validate the rest of the path. */
1895  for (i = 0; i < len - 1; ++i)
1896    {
1897      pattern = ((pattern & 0xff) << 8) + (unsigned char)ptr[i];
1898      if (pattern == 0x101 * (unsigned char)('/'))
1899        return FALSE;
1900    }
1901
1902  return TRUE;
1903}
1904
1905svn_boolean_t
1906svn_relpath_is_canonical(const char *relpath)
1907{
1908  return relpath_is_canonical(relpath);
1909}
1910
1911svn_boolean_t
1912svn_uri_is_canonical(const char *uri, apr_pool_t *scratch_pool)
1913{
1914  const char *ptr = uri, *seg = uri;
1915  const char *schema_data = NULL;
1916
1917  /* URI is canonical if it has:
1918   *  - lowercase URL scheme
1919   *  - lowercase URL hostname
1920   *  - no '.' segments
1921   *  - no closing '/'
1922   *  - no '//'
1923   *  - uppercase hex-encoded pair digits ("%AB", not "%ab")
1924   */
1925
1926  if (*uri == '\0')
1927    return FALSE;
1928
1929  if (! svn_path_is_url(uri))
1930    return FALSE;
1931
1932  /* Skip the scheme. */
1933  while (*ptr && (*ptr != '/') && (*ptr != ':'))
1934    ptr++;
1935
1936  /* No scheme?  No good. */
1937  if (! (*ptr == ':' && *(ptr+1) == '/' && *(ptr+2) == '/'))
1938    return FALSE;
1939
1940  /* Found a scheme, check that it's all lowercase. */
1941  ptr = uri;
1942  while (*ptr != ':')
1943    {
1944      if (*ptr >= 'A' && *ptr <= 'Z')
1945        return FALSE;
1946      ptr++;
1947    }
1948  /* Skip :// */
1949  ptr += 3;
1950
1951  /* Scheme only?  That works. */
1952  if (! *ptr)
1953    return TRUE;
1954
1955  /* This might be the hostname */
1956  seg = ptr;
1957  while (*ptr && (*ptr != '/') && (*ptr != '@'))
1958    ptr++;
1959
1960  if (*ptr == '@')
1961    seg = ptr + 1;
1962
1963  /* Found a hostname, check that it's all lowercase. */
1964  ptr = seg;
1965
1966  if (*ptr == '[')
1967    {
1968      ptr++;
1969      while (*ptr == ':'
1970             || (*ptr >= '0' && *ptr <= '9')
1971             || (*ptr >= 'a' && *ptr <= 'f'))
1972        {
1973          ptr++;
1974        }
1975
1976      if (*ptr != ']')
1977        return FALSE;
1978      ptr++;
1979    }
1980  else
1981    while (*ptr && *ptr != '/' && *ptr != ':')
1982      {
1983        if (*ptr >= 'A' && *ptr <= 'Z')
1984          return FALSE;
1985        ptr++;
1986      }
1987
1988  /* Found a portnumber */
1989  if (*ptr == ':')
1990    {
1991      apr_int64_t port = 0;
1992
1993      ptr++;
1994      schema_data = ptr;
1995
1996      while (*ptr >= '0' && *ptr <= '9')
1997        {
1998          port = 10 * port + (*ptr - '0');
1999          ptr++;
2000        }
2001
2002      if (ptr == schema_data && (*ptr == '/' || *ptr == '\0'))
2003        return FALSE; /* Fail on "http://host:" */
2004
2005      if (port == 80 && strncmp(uri, "http:", 5) == 0)
2006        return FALSE;
2007      else if (port == 443 && strncmp(uri, "https:", 6) == 0)
2008        return FALSE;
2009      else if (port == 3690 && strncmp(uri, "svn:", 4) == 0)
2010        return FALSE;
2011
2012      while (*ptr && *ptr != '/')
2013        ++ptr; /* Allow "http://host:stuff" */
2014    }
2015
2016  schema_data = ptr;
2017
2018#ifdef SVN_USE_DOS_PATHS
2019  if (schema_data && *ptr == '/')
2020    {
2021      /* If this is a file url, ptr now points to the third '/' in
2022         file:///C:/path. Check that if we have such a URL the drive
2023         letter is in uppercase. */
2024      if (strncmp(uri, "file:", 5) == 0 &&
2025          ! (*(ptr+1) >= 'A' && *(ptr+1) <= 'Z') &&
2026          *(ptr+2) == ':')
2027        return FALSE;
2028    }
2029#endif /* SVN_USE_DOS_PATHS */
2030
2031  /* Now validate the rest of the URI. */
2032  seg = ptr;
2033  while (*ptr && (*ptr != '/'))
2034    ptr++;
2035  while(1)
2036    {
2037      apr_size_t seglen = ptr - seg;
2038
2039      if (seglen == 1 && *seg == '.')
2040        return FALSE;  /*  /./   */
2041
2042      if (*ptr == '/' && *(ptr+1) == '/')
2043        return FALSE;  /*  //    */
2044
2045      if (! *ptr && *(ptr - 1) == '/' && ptr - 1 != uri)
2046        return FALSE;  /* foo/  */
2047
2048      if (! *ptr)
2049        break;
2050
2051      if (*ptr == '/')
2052        ptr++;
2053
2054      seg = ptr;
2055      while (*ptr && (*ptr != '/'))
2056        ptr++;
2057    }
2058
2059  ptr = schema_data;
2060
2061  while (*ptr)
2062    {
2063      if (*ptr == '%')
2064        {
2065          char digitz[3];
2066          int val;
2067
2068          /* Can't usesvn_ctype_isxdigit() because lower case letters are
2069             not in our canonical format */
2070          if (((*(ptr+1) < '0' || *(ptr+1) > '9'))
2071              && (*(ptr+1) < 'A' || *(ptr+1) > 'F'))
2072            return FALSE;
2073          else if (((*(ptr+2) < '0' || *(ptr+2) > '9'))
2074                   && (*(ptr+2) < 'A' || *(ptr+2) > 'F'))
2075            return FALSE;
2076
2077          digitz[0] = *(++ptr);
2078          digitz[1] = *(++ptr);
2079          digitz[2] = '\0';
2080          val = (int)strtol(digitz, NULL, 16);
2081
2082          if (svn_uri__char_validity[val])
2083            return FALSE; /* Should not have been escaped */
2084        }
2085      else if (*ptr != '/' && !svn_uri__char_validity[(unsigned char)*ptr])
2086        return FALSE; /* Character should have been escaped */
2087      ptr++;
2088    }
2089
2090  return TRUE;
2091}
2092
2093svn_error_t *
2094svn_dirent_condense_targets(const char **pcommon,
2095                            apr_array_header_t **pcondensed_targets,
2096                            const apr_array_header_t *targets,
2097                            svn_boolean_t remove_redundancies,
2098                            apr_pool_t *result_pool,
2099                            apr_pool_t *scratch_pool)
2100{
2101  int i, num_condensed = targets->nelts;
2102  svn_boolean_t *removed;
2103  apr_array_header_t *abs_targets;
2104
2105  /* Early exit when there's no data to work on. */
2106  if (targets->nelts <= 0)
2107    {
2108      *pcommon = NULL;
2109      if (pcondensed_targets)
2110        *pcondensed_targets = NULL;
2111      return SVN_NO_ERROR;
2112    }
2113
2114  /* Get the absolute path of the first target. */
2115  SVN_ERR(svn_dirent_get_absolute(pcommon,
2116                                  APR_ARRAY_IDX(targets, 0, const char *),
2117                                  scratch_pool));
2118
2119  /* Early exit when there's only one dirent to work on. */
2120  if (targets->nelts == 1)
2121    {
2122      *pcommon = apr_pstrdup(result_pool, *pcommon);
2123      if (pcondensed_targets)
2124        *pcondensed_targets = apr_array_make(result_pool, 0,
2125                                             sizeof(const char *));
2126      return SVN_NO_ERROR;
2127    }
2128
2129  /* Copy the targets array, but with absolute dirents instead of
2130     relative.  Also, find the pcommon argument by finding what is
2131     common in all of the absolute dirents. NOTE: This is not as
2132     efficient as it could be.  The calculation of the basedir could
2133     be done in the loop below, which would save some calls to
2134     svn_dirent_get_longest_ancestor.  I decided to do it this way
2135     because I thought it would be simpler, since this way, we don't
2136     even do the loop if we don't need to condense the targets. */
2137
2138  removed = apr_pcalloc(scratch_pool, (targets->nelts *
2139                                          sizeof(svn_boolean_t)));
2140  abs_targets = apr_array_make(scratch_pool, targets->nelts,
2141                               sizeof(const char *));
2142
2143  APR_ARRAY_PUSH(abs_targets, const char *) = *pcommon;
2144
2145  for (i = 1; i < targets->nelts; ++i)
2146    {
2147      const char *rel = APR_ARRAY_IDX(targets, i, const char *);
2148      const char *absolute;
2149      SVN_ERR(svn_dirent_get_absolute(&absolute, rel, scratch_pool));
2150      APR_ARRAY_PUSH(abs_targets, const char *) = absolute;
2151      *pcommon = svn_dirent_get_longest_ancestor(*pcommon, absolute,
2152                                                 scratch_pool);
2153    }
2154
2155  *pcommon = apr_pstrdup(result_pool, *pcommon);
2156
2157  if (pcondensed_targets != NULL)
2158    {
2159      size_t basedir_len;
2160
2161      if (remove_redundancies)
2162        {
2163          /* Find the common part of each pair of targets.  If
2164             common part is equal to one of the dirents, the other
2165             is a child of it, and can be removed.  If a target is
2166             equal to *pcommon, it can also be removed. */
2167
2168          /* First pass: when one non-removed target is a child of
2169             another non-removed target, remove the child. */
2170          for (i = 0; i < abs_targets->nelts; ++i)
2171            {
2172              int j;
2173
2174              if (removed[i])
2175                continue;
2176
2177              for (j = i + 1; j < abs_targets->nelts; ++j)
2178                {
2179                  const char *abs_targets_i;
2180                  const char *abs_targets_j;
2181                  const char *ancestor;
2182
2183                  if (removed[j])
2184                    continue;
2185
2186                  abs_targets_i = APR_ARRAY_IDX(abs_targets, i, const char *);
2187                  abs_targets_j = APR_ARRAY_IDX(abs_targets, j, const char *);
2188
2189                  ancestor = svn_dirent_get_longest_ancestor
2190                    (abs_targets_i, abs_targets_j, scratch_pool);
2191
2192                  if (*ancestor == '\0')
2193                    continue;
2194
2195                  if (strcmp(ancestor, abs_targets_i) == 0)
2196                    {
2197                      removed[j] = TRUE;
2198                      num_condensed--;
2199                    }
2200                  else if (strcmp(ancestor, abs_targets_j) == 0)
2201                    {
2202                      removed[i] = TRUE;
2203                      num_condensed--;
2204                    }
2205                }
2206            }
2207
2208          /* Second pass: when a target is the same as *pcommon,
2209             remove the target. */
2210          for (i = 0; i < abs_targets->nelts; ++i)
2211            {
2212              const char *abs_targets_i = APR_ARRAY_IDX(abs_targets, i,
2213                                                        const char *);
2214
2215              if ((strcmp(abs_targets_i, *pcommon) == 0) && (! removed[i]))
2216                {
2217                  removed[i] = TRUE;
2218                  num_condensed--;
2219                }
2220            }
2221        }
2222
2223      /* Now create the return array, and copy the non-removed items */
2224      basedir_len = strlen(*pcommon);
2225      *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2226                                           sizeof(const char *));
2227
2228      for (i = 0; i < abs_targets->nelts; ++i)
2229        {
2230          const char *rel_item = APR_ARRAY_IDX(abs_targets, i, const char *);
2231
2232          /* Skip this if it's been removed. */
2233          if (removed[i])
2234            continue;
2235
2236          /* If a common prefix was found, condensed_targets are given
2237             relative to that prefix.  */
2238          if (basedir_len > 0)
2239            {
2240              /* Only advance our pointer past a dirent separator if
2241                 REL_ITEM isn't the same as *PCOMMON.
2242
2243                 If *PCOMMON is a root dirent, basedir_len will already
2244                 include the closing '/', so never advance the pointer
2245                 here.
2246                 */
2247              rel_item += basedir_len;
2248              if (rel_item[0] &&
2249                  ! svn_dirent_is_root(*pcommon, basedir_len))
2250                rel_item++;
2251            }
2252
2253          APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2254            = apr_pstrdup(result_pool, rel_item);
2255        }
2256    }
2257
2258  return SVN_NO_ERROR;
2259}
2260
2261svn_error_t *
2262svn_uri_condense_targets(const char **pcommon,
2263                         apr_array_header_t **pcondensed_targets,
2264                         const apr_array_header_t *targets,
2265                         svn_boolean_t remove_redundancies,
2266                         apr_pool_t *result_pool,
2267                         apr_pool_t *scratch_pool)
2268{
2269  int i, num_condensed = targets->nelts;
2270  apr_array_header_t *uri_targets;
2271  svn_boolean_t *removed;
2272
2273  /* Early exit when there's no data to work on. */
2274  if (targets->nelts <= 0)
2275    {
2276      *pcommon = NULL;
2277      if (pcondensed_targets)
2278        *pcondensed_targets = NULL;
2279      return SVN_NO_ERROR;
2280    }
2281
2282  *pcommon = svn_uri_canonicalize(APR_ARRAY_IDX(targets, 0, const char *),
2283                                  scratch_pool);
2284
2285  /* Early exit when there's only one uri to work on. */
2286  if (targets->nelts == 1)
2287    {
2288      *pcommon = apr_pstrdup(result_pool, *pcommon);
2289      if (pcondensed_targets)
2290        *pcondensed_targets = apr_array_make(result_pool, 0,
2291                                             sizeof(const char *));
2292      return SVN_NO_ERROR;
2293    }
2294
2295  /* Find the pcommon argument by finding what is common in all of the
2296     uris. NOTE: This is not as efficient as it could be.  The calculation
2297     of the basedir could be done in the loop below, which would
2298     save some calls to svn_uri_get_longest_ancestor.  I decided to do it
2299     this way because I thought it would be simpler, since this way, we don't
2300     even do the loop if we don't need to condense the targets. */
2301
2302  removed = apr_pcalloc(scratch_pool, (targets->nelts *
2303                                          sizeof(svn_boolean_t)));
2304  uri_targets = apr_array_make(scratch_pool, targets->nelts,
2305                               sizeof(const char *));
2306
2307  APR_ARRAY_PUSH(uri_targets, const char *) = *pcommon;
2308
2309  for (i = 1; i < targets->nelts; ++i)
2310    {
2311      const char *uri = svn_uri_canonicalize(
2312                           APR_ARRAY_IDX(targets, i, const char *),
2313                           scratch_pool);
2314      APR_ARRAY_PUSH(uri_targets, const char *) = uri;
2315
2316      /* If the commonmost ancestor so far is empty, there's no point
2317         in continuing to search for a common ancestor at all.  But
2318         we'll keep looping for the sake of canonicalizing the
2319         targets, I suppose.  */
2320      if (**pcommon != '\0')
2321        *pcommon = svn_uri_get_longest_ancestor(*pcommon, uri,
2322                                                scratch_pool);
2323    }
2324
2325  *pcommon = apr_pstrdup(result_pool, *pcommon);
2326
2327  if (pcondensed_targets != NULL)
2328    {
2329      size_t basedir_len;
2330
2331      if (remove_redundancies)
2332        {
2333          /* Find the common part of each pair of targets.  If
2334             common part is equal to one of the dirents, the other
2335             is a child of it, and can be removed.  If a target is
2336             equal to *pcommon, it can also be removed. */
2337
2338          /* First pass: when one non-removed target is a child of
2339             another non-removed target, remove the child. */
2340          for (i = 0; i < uri_targets->nelts; ++i)
2341            {
2342              int j;
2343
2344              if (removed[i])
2345                continue;
2346
2347              for (j = i + 1; j < uri_targets->nelts; ++j)
2348                {
2349                  const char *uri_i;
2350                  const char *uri_j;
2351                  const char *ancestor;
2352
2353                  if (removed[j])
2354                    continue;
2355
2356                  uri_i = APR_ARRAY_IDX(uri_targets, i, const char *);
2357                  uri_j = APR_ARRAY_IDX(uri_targets, j, const char *);
2358
2359                  ancestor = svn_uri_get_longest_ancestor(uri_i,
2360                                                          uri_j,
2361                                                          scratch_pool);
2362
2363                  if (*ancestor == '\0')
2364                    continue;
2365
2366                  if (strcmp(ancestor, uri_i) == 0)
2367                    {
2368                      removed[j] = TRUE;
2369                      num_condensed--;
2370                    }
2371                  else if (strcmp(ancestor, uri_j) == 0)
2372                    {
2373                      removed[i] = TRUE;
2374                      num_condensed--;
2375                    }
2376                }
2377            }
2378
2379          /* Second pass: when a target is the same as *pcommon,
2380             remove the target. */
2381          for (i = 0; i < uri_targets->nelts; ++i)
2382            {
2383              const char *uri_targets_i = APR_ARRAY_IDX(uri_targets, i,
2384                                                        const char *);
2385
2386              if ((strcmp(uri_targets_i, *pcommon) == 0) && (! removed[i]))
2387                {
2388                  removed[i] = TRUE;
2389                  num_condensed--;
2390                }
2391            }
2392        }
2393
2394      /* Now create the return array, and copy the non-removed items */
2395      basedir_len = strlen(*pcommon);
2396      *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2397                                           sizeof(const char *));
2398
2399      for (i = 0; i < uri_targets->nelts; ++i)
2400        {
2401          const char *rel_item = APR_ARRAY_IDX(uri_targets, i, const char *);
2402
2403          /* Skip this if it's been removed. */
2404          if (removed[i])
2405            continue;
2406
2407          /* If a common prefix was found, condensed_targets are given
2408             relative to that prefix.  */
2409          if (basedir_len > 0)
2410            {
2411              /* Only advance our pointer past a dirent separator if
2412                 REL_ITEM isn't the same as *PCOMMON.
2413
2414                 If *PCOMMON is a root dirent, basedir_len will already
2415                 include the closing '/', so never advance the pointer
2416                 here.
2417                 */
2418              rel_item += basedir_len;
2419              if ((rel_item[0] == '/') ||
2420                  (rel_item[0] && !svn_uri_is_root(*pcommon, basedir_len)))
2421                {
2422                  rel_item++;
2423                }
2424            }
2425
2426          APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2427            = svn_path_uri_decode(rel_item, result_pool);
2428        }
2429    }
2430
2431  return SVN_NO_ERROR;
2432}
2433
2434svn_error_t *
2435svn_dirent_is_under_root(svn_boolean_t *under_root,
2436                         const char **result_path,
2437                         const char *base_path,
2438                         const char *path,
2439                         apr_pool_t *result_pool)
2440{
2441  apr_status_t status;
2442  char *full_path;
2443
2444  *under_root = FALSE;
2445  if (result_path)
2446    *result_path = NULL;
2447
2448  status = apr_filepath_merge(&full_path,
2449                              base_path,
2450                              path,
2451                              APR_FILEPATH_NOTABOVEROOT
2452                              | APR_FILEPATH_SECUREROOTTEST,
2453                              result_pool);
2454
2455  if (status == APR_SUCCESS)
2456    {
2457      if (result_path)
2458        *result_path = svn_dirent_canonicalize(full_path, result_pool);
2459      *under_root = TRUE;
2460      return SVN_NO_ERROR;
2461    }
2462  else if (status == APR_EABOVEROOT)
2463    {
2464      *under_root = FALSE;
2465      return SVN_NO_ERROR;
2466    }
2467
2468  return svn_error_wrap_apr(status, NULL);
2469}
2470
2471svn_error_t *
2472svn_uri_get_dirent_from_file_url(const char **dirent,
2473                                 const char *url,
2474                                 apr_pool_t *pool)
2475{
2476  const char *hostname, *path;
2477
2478  SVN_ERR_ASSERT(svn_uri_is_canonical(url, pool));
2479
2480  /* Verify that the URL is well-formed (loosely) */
2481
2482  /* First, check for the "file://" prefix. */
2483  if (strncmp(url, "file://", 7) != 0)
2484    return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2485                             _("Local URL '%s' does not contain 'file://' "
2486                               "prefix"), url);
2487
2488  /* Find the HOSTNAME portion and the PATH portion of the URL.  The host
2489     name is between the "file://" prefix and the next occurrence of '/'.  We
2490     are considering everything from that '/' until the end of the URL to be
2491     the absolute path portion of the URL.
2492     If we got just "file://", treat it the same as "file:///". */
2493  hostname = url + 7;
2494  path = strchr(hostname, '/');
2495  if (path)
2496    hostname = apr_pstrmemdup(pool, hostname, path - hostname);
2497  else
2498    path = "/";
2499
2500  /* URI-decode HOSTNAME, and set it to NULL if it is "" or "localhost". */
2501  if (*hostname == '\0')
2502    hostname = NULL;
2503  else
2504    {
2505      hostname = svn_path_uri_decode(hostname, pool);
2506      if (strcmp(hostname, "localhost") == 0)
2507        hostname = NULL;
2508    }
2509
2510  /* Duplicate the URL, starting at the top of the path.
2511     At the same time, we URI-decode the path. */
2512#ifdef SVN_USE_DOS_PATHS
2513  /* On Windows, we'll typically have to skip the leading / if the
2514     path starts with a drive letter.  Like most Web browsers, We
2515     support two variants of this scheme:
2516
2517         file:///X:/path    and
2518         file:///X|/path
2519
2520    Note that, at least on WinNT and above,  file:////./X:/path  will
2521    also work, so we must make sure the transformation doesn't break
2522    that, and  file:///path  (that looks within the current drive
2523    only) should also keep working.
2524    If we got a non-empty hostname other than localhost, we convert this
2525    into an UNC path.  In this case, we obviously don't strip the slash
2526    even if the path looks like it starts with a drive letter.
2527  */
2528  {
2529    static const char valid_drive_letters[] =
2530      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2531    /* Casting away const! */
2532    char *dup_path = (char *)svn_path_uri_decode(path, pool);
2533
2534    /* This check assumes ':' and '|' are already decoded! */
2535    if (!hostname && dup_path[1] && strchr(valid_drive_letters, dup_path[1])
2536        && (dup_path[2] == ':' || dup_path[2] == '|'))
2537      {
2538        /* Skip the leading slash. */
2539        ++dup_path;
2540
2541        if (dup_path[1] == '|')
2542          dup_path[1] = ':';
2543
2544        if (dup_path[2] == '/' || dup_path[2] == '\\' || dup_path[2] == '\0')
2545          {
2546            /* Dirents have upper case drive letters in their canonical form */
2547            dup_path[0] = canonicalize_to_upper(dup_path[0]);
2548
2549            if (dup_path[2] == '\0')
2550              {
2551                /* A valid dirent for the driveroot must be like "C:/" instead of
2552                   just "C:" or svn_dirent_join() will use the current directory
2553                   on the drive instead */
2554                char *new_path = apr_pcalloc(pool, 4);
2555                new_path[0] = dup_path[0];
2556                new_path[1] = ':';
2557                new_path[2] = '/';
2558                new_path[3] = '\0';
2559                dup_path = new_path;
2560              }
2561            else
2562              dup_path[2] = '/'; /* Ensure not relative for '\' after drive! */
2563          }
2564      }
2565    if (hostname)
2566      {
2567        if (dup_path[0] == '/' && dup_path[1] == '\0')
2568          return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2569                                   _("Local URL '%s' contains only a hostname, "
2570                                     "no path"), url);
2571
2572        /* We still know that the path starts with a slash. */
2573        *dirent = apr_pstrcat(pool, "//", hostname, dup_path, SVN_VA_NULL);
2574      }
2575    else
2576      *dirent = dup_path;
2577  }
2578#else /* !SVN_USE_DOS_PATHS */
2579  /* Currently, the only hostnames we are allowing on non-Win32 platforms
2580     are the empty string and 'localhost'. */
2581  if (hostname)
2582    return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2583                             _("Local URL '%s' contains unsupported hostname"),
2584                             url);
2585
2586  *dirent = svn_path_uri_decode(path, pool);
2587#endif /* SVN_USE_DOS_PATHS */
2588  return SVN_NO_ERROR;
2589}
2590
2591svn_error_t *
2592svn_uri_get_file_url_from_dirent(const char **url,
2593                                 const char *dirent,
2594                                 apr_pool_t *pool)
2595{
2596  assert(svn_dirent_is_canonical(dirent, pool));
2597
2598  SVN_ERR(svn_dirent_get_absolute(&dirent, dirent, pool));
2599
2600  dirent = svn_path_uri_encode(dirent, pool);
2601
2602#ifndef SVN_USE_DOS_PATHS
2603  if (dirent[0] == '/' && dirent[1] == '\0')
2604    dirent = NULL; /* "file://" is the canonical form of "file:///" */
2605
2606  *url = apr_pstrcat(pool, "file://", dirent, SVN_VA_NULL);
2607#else
2608  if (dirent[0] == '/')
2609    {
2610      /* Handle UNC paths //server/share -> file://server/share */
2611      assert(dirent[1] == '/'); /* Expect UNC, not non-absolute */
2612
2613      *url = apr_pstrcat(pool, "file:", dirent, SVN_VA_NULL);
2614    }
2615  else
2616    {
2617      char *uri = apr_pstrcat(pool, "file:///", dirent, SVN_VA_NULL);
2618      apr_size_t len = 8 /* strlen("file:///") */ + strlen(dirent);
2619
2620      /* "C:/" is a canonical dirent on Windows,
2621         but "file:///C:/" is not a canonical uri */
2622      if (uri[len-1] == '/')
2623        uri[len-1] = '\0';
2624
2625      *url = uri;
2626    }
2627#endif
2628
2629  return SVN_NO_ERROR;
2630}
2631
2632
2633
2634/* -------------- The fspath API (see private/svn_fspath.h) -------------- */
2635
2636svn_boolean_t
2637svn_fspath__is_canonical(const char *fspath)
2638{
2639  return fspath[0] == '/' && relpath_is_canonical(fspath + 1);
2640}
2641
2642
2643const char *
2644svn_fspath__canonicalize(const char *fspath,
2645                         apr_pool_t *pool)
2646{
2647  if ((fspath[0] == '/') && (fspath[1] == '\0'))
2648    return "/";
2649
2650  return apr_pstrcat(pool, "/", svn_relpath_canonicalize(fspath, pool),
2651                     SVN_VA_NULL);
2652}
2653
2654
2655svn_boolean_t
2656svn_fspath__is_root(const char *fspath, apr_size_t len)
2657{
2658  /* directory is root if it's equal to '/' */
2659  return (len == 1 && fspath[0] == '/');
2660}
2661
2662
2663const char *
2664svn_fspath__skip_ancestor(const char *parent_fspath,
2665                          const char *child_fspath)
2666{
2667  assert(svn_fspath__is_canonical(parent_fspath));
2668  assert(svn_fspath__is_canonical(child_fspath));
2669
2670  return svn_relpath_skip_ancestor(parent_fspath + 1, child_fspath + 1);
2671}
2672
2673
2674const char *
2675svn_fspath__dirname(const char *fspath,
2676                    apr_pool_t *pool)
2677{
2678  assert(svn_fspath__is_canonical(fspath));
2679
2680  if (fspath[0] == '/' && fspath[1] == '\0')
2681    return apr_pstrdup(pool, fspath);
2682  else
2683    return apr_pstrcat(pool, "/", svn_relpath_dirname(fspath + 1, pool),
2684                       SVN_VA_NULL);
2685}
2686
2687
2688const char *
2689svn_fspath__basename(const char *fspath,
2690                     apr_pool_t *pool)
2691{
2692  const char *result;
2693  assert(svn_fspath__is_canonical(fspath));
2694
2695  result = svn_relpath_basename(fspath + 1, pool);
2696
2697  assert(strchr(result, '/') == NULL);
2698  return result;
2699}
2700
2701void
2702svn_fspath__split(const char **dirpath,
2703                  const char **base_name,
2704                  const char *fspath,
2705                  apr_pool_t *result_pool)
2706{
2707  assert(dirpath != base_name);
2708
2709  if (dirpath)
2710    *dirpath = svn_fspath__dirname(fspath, result_pool);
2711
2712  if (base_name)
2713    *base_name = svn_fspath__basename(fspath, result_pool);
2714}
2715
2716char *
2717svn_fspath__join(const char *fspath,
2718                 const char *relpath,
2719                 apr_pool_t *result_pool)
2720{
2721  char *result;
2722  assert(svn_fspath__is_canonical(fspath));
2723  assert(svn_relpath_is_canonical(relpath));
2724
2725  if (relpath[0] == '\0')
2726    result = apr_pstrdup(result_pool, fspath);
2727  else if (fspath[1] == '\0')
2728    result = apr_pstrcat(result_pool, "/", relpath, SVN_VA_NULL);
2729  else
2730    result = apr_pstrcat(result_pool, fspath, "/", relpath, SVN_VA_NULL);
2731
2732  assert(svn_fspath__is_canonical(result));
2733  return result;
2734}
2735
2736char *
2737svn_fspath__get_longest_ancestor(const char *fspath1,
2738                                 const char *fspath2,
2739                                 apr_pool_t *result_pool)
2740{
2741  char *result;
2742  assert(svn_fspath__is_canonical(fspath1));
2743  assert(svn_fspath__is_canonical(fspath2));
2744
2745  result = apr_pstrcat(result_pool, "/",
2746                       svn_relpath_get_longest_ancestor(fspath1 + 1,
2747                                                        fspath2 + 1,
2748                                                        result_pool),
2749                       SVN_VA_NULL);
2750
2751  assert(svn_fspath__is_canonical(result));
2752  return result;
2753}
2754
2755
2756
2757
2758/* -------------- The urlpath API (see private/svn_fspath.h) ------------- */
2759
2760const char *
2761svn_urlpath__canonicalize(const char *uri,
2762                          apr_pool_t *pool)
2763{
2764  if (svn_path_is_url(uri))
2765    {
2766      uri = svn_uri_canonicalize(uri, pool);
2767    }
2768  else
2769    {
2770      uri = svn_fspath__canonicalize(uri, pool);
2771      /* Do a little dance to normalize hex encoding. */
2772      uri = svn_path_uri_decode(uri, pool);
2773      uri = svn_path_uri_encode(uri, pool);
2774    }
2775  return uri;
2776}
2777
2778
2779/* -------------- The cert API (see private/svn_cert.h) ------------- */
2780
2781svn_boolean_t
2782svn_cert__match_dns_identity(svn_string_t *pattern, svn_string_t *hostname)
2783{
2784  apr_size_t pattern_pos = 0, hostname_pos = 0;
2785
2786  /* support leading wildcards that composed of the only character in the
2787   * left-most label. */
2788  if (pattern->len >= 2 &&
2789      pattern->data[pattern_pos] == '*' &&
2790      pattern->data[pattern_pos + 1] == '.')
2791    {
2792      while (hostname_pos < hostname->len &&
2793             hostname->data[hostname_pos] != '.')
2794        {
2795          hostname_pos++;
2796        }
2797      /* Assume that the wildcard must match something.  Rule 2 says
2798       * that *.example.com should not match example.com.  If the wildcard
2799       * ends up not matching anything then it matches .example.com which
2800       * seems to be essentially the same as just example.com */
2801      if (hostname_pos == 0)
2802        return FALSE;
2803
2804      pattern_pos++;
2805    }
2806
2807  while (pattern_pos < pattern->len && hostname_pos < hostname->len)
2808    {
2809      char pattern_c = pattern->data[pattern_pos];
2810      char hostname_c = hostname->data[hostname_pos];
2811
2812      /* fold case as described in RFC 4343.
2813       * Note: We actually convert to lowercase, since our URI
2814       * canonicalization code converts to lowercase and generally
2815       * most certs are issued with lowercase DNS names, meaning
2816       * this avoids the fold operation in most cases.  The RFC
2817       * suggests the opposite transformation, but doesn't require
2818       * any specific implementation in any case.  It is critical
2819       * that this folding be locale independent so you can't use
2820       * tolower(). */
2821      pattern_c = canonicalize_to_lower(pattern_c);
2822      hostname_c = canonicalize_to_lower(hostname_c);
2823
2824      if (pattern_c != hostname_c)
2825        {
2826          /* doesn't match */
2827          return FALSE;
2828        }
2829      else
2830        {
2831          /* characters match so skip both */
2832          pattern_pos++;
2833          hostname_pos++;
2834        }
2835    }
2836
2837  /* ignore a trailing period on the hostname since this has no effect on the
2838   * security of the matching.  See the following for the long explanation as
2839   * to why:
2840   * https://bugzilla.mozilla.org/show_bug.cgi?id=134402#c28
2841   */
2842  if (pattern_pos == pattern->len &&
2843      hostname_pos == hostname->len - 1 &&
2844      hostname->data[hostname_pos] == '.')
2845    hostname_pos++;
2846
2847  if (pattern_pos != pattern->len || hostname_pos != hostname->len)
2848    {
2849      /* end didn't match */
2850      return FALSE;
2851    }
2852
2853  return TRUE;
2854}
2855