1/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFCs 3986, 2732 and 2373
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11#define IN_LIBXML
12#include "libxml.h"
13
14#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
18#include <libxml/globals.h>
19#include <libxml/xmlerror.h>
20
21/**
22 * MAX_URI_LENGTH:
23 *
24 * The definition of the URI regexp in the above RFC has no size limit
25 * In practice they are usually relativey short except for the
26 * data URI scheme as defined in RFC 2397. Even for data URI the usual
27 * maximum size before hitting random practical limits is around 64 KB
28 * and 4KB is usually a maximum admitted limit for proper operations.
29 * The value below is more a security limit than anything else and
30 * really should never be hit by 'normal' operations
31 * Set to 1 MByte in 2012, this is only enforced on output
32 */
33#define MAX_URI_LENGTH 1024 * 1024
34
35static void
36xmlURIErrMemory(const char *extra)
37{
38    if (extra)
39        __xmlRaiseError(NULL, NULL, NULL,
40                        NULL, NULL, XML_FROM_URI,
41                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42                        extra, NULL, NULL, 0, 0,
43                        "Memory allocation failed : %s\n", extra);
44    else
45        __xmlRaiseError(NULL, NULL, NULL,
46                        NULL, NULL, XML_FROM_URI,
47                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48                        NULL, NULL, NULL, 0, 0,
49                        "Memory allocation failed\n");
50}
51
52static void xmlCleanURI(xmlURIPtr uri);
53
54/*
55 * Old rule from 2396 used in legacy handling code
56 * alpha    = lowalpha | upalpha
57 */
58#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59
60
61/*
62 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64 *            "u" | "v" | "w" | "x" | "y" | "z"
65 */
66
67#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68
69/*
70 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72 *           "U" | "V" | "W" | "X" | "Y" | "Z"
73 */
74#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75
76#ifdef IS_DIGIT
77#undef IS_DIGIT
78#endif
79/*
80 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81 */
82#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83
84/*
85 * alphanum = alpha | digit
86 */
87
88#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89
90/*
91 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92 */
93
94#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
95    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
96    ((x) == '(') || ((x) == ')'))
97
98/*
99 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
100 */
101
102#define IS_UNWISE(p)                                                    \
103      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
104       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
105       ((*(p) == ']')) || ((*(p) == '`')))
106/*
107 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108 *            "[" | "]"
109 */
110
111#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114        ((x) == ']'))
115
116/*
117 * unreserved = alphanum | mark
118 */
119
120#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121
122/*
123 * Skip to next pointer char, handle escaped sequences
124 */
125
126#define NEXT(p) ((*p == '%')? p += 3 : p++)
127
128/*
129 * Productions from the spec.
130 *
131 *    authority     = server | reg_name
132 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
133 *                        ";" | ":" | "@" | "&" | "=" | "+" )
134 *
135 * path          = [ abs_path | opaque_part ]
136 */
137
138#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139
140/************************************************************************
141 *									*
142 *                         RFC 3986 parser				*
143 *									*
144 ************************************************************************/
145
146#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
148                      ((*(p) >= 'A') && (*(p) <= 'Z')))
149#define ISA_HEXDIG(p)							\
150       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
151        ((*(p) >= 'A') && (*(p) <= 'F')))
152
153/*
154 *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
155 *                     / "*" / "+" / "," / ";" / "="
156 */
157#define ISA_SUB_DELIM(p)						\
158      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
159       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
160       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
161       ((*(p) == '=')) || ((*(p) == '\'')))
162
163/*
164 *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165 */
166#define ISA_GEN_DELIM(p)						\
167      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
168       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
169       ((*(p) == '@')))
170
171/*
172 *    reserved      = gen-delims / sub-delims
173 */
174#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175
176/*
177 *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
178 */
179#define ISA_UNRESERVED(p)						\
180      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
181       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182
183/*
184 *    pct-encoded   = "%" HEXDIG HEXDIG
185 */
186#define ISA_PCT_ENCODED(p)						\
187     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188
189/*
190 *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
191 */
192#define ISA_PCHAR(p)							\
193     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
194      ((*(p) == ':')) || ((*(p) == '@')))
195
196/**
197 * xmlParse3986Scheme:
198 * @uri:  pointer to an URI structure
199 * @str:  pointer to the string to analyze
200 *
201 * Parse an URI scheme
202 *
203 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204 *
205 * Returns 0 or the error code
206 */
207static int
208xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209    const char *cur;
210
211    if (str == NULL)
212	return(-1);
213
214    cur = *str;
215    if (!ISA_ALPHA(cur))
216	return(2);
217    cur++;
218    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220    if (uri != NULL) {
221	if (uri->scheme != NULL) xmlFree(uri->scheme);
222	uri->scheme = STRNDUP(*str, cur - *str);
223    }
224    *str = cur;
225    return(0);
226}
227
228/**
229 * xmlParse3986Fragment:
230 * @uri:  pointer to an URI structure
231 * @str:  pointer to the string to analyze
232 *
233 * Parse the query part of an URI
234 *
235 * fragment      = *( pchar / "/" / "?" )
236 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237 *       in the fragment identifier but this is used very broadly for
238 *       xpointer scheme selection, so we are allowing it here to not break
239 *       for example all the DocBook processing chains.
240 *
241 * Returns 0 or the error code
242 */
243static int
244xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245{
246    const char *cur;
247
248    if (str == NULL)
249        return (-1);
250
251    cur = *str;
252
253    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
254           (*cur == '[') || (*cur == ']') ||
255           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256        NEXT(cur);
257    if (uri != NULL) {
258        if (uri->fragment != NULL)
259            xmlFree(uri->fragment);
260	if (uri->cleanup & 2)
261	    uri->fragment = STRNDUP(*str, cur - *str);
262	else
263	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264    }
265    *str = cur;
266    return (0);
267}
268
269/**
270 * xmlParse3986Query:
271 * @uri:  pointer to an URI structure
272 * @str:  pointer to the string to analyze
273 *
274 * Parse the query part of an URI
275 *
276 * query = *uric
277 *
278 * Returns 0 or the error code
279 */
280static int
281xmlParse3986Query(xmlURIPtr uri, const char **str)
282{
283    const char *cur;
284
285    if (str == NULL)
286        return (-1);
287
288    cur = *str;
289
290    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292        NEXT(cur);
293    if (uri != NULL) {
294        if (uri->query != NULL)
295            xmlFree(uri->query);
296	if (uri->cleanup & 2)
297	    uri->query = STRNDUP(*str, cur - *str);
298	else
299	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300
301	/* Save the raw bytes of the query as well.
302	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303	 */
304	if (uri->query_raw != NULL)
305	    xmlFree (uri->query_raw);
306	uri->query_raw = STRNDUP (*str, cur - *str);
307    }
308    *str = cur;
309    return (0);
310}
311
312/**
313 * xmlParse3986Port:
314 * @uri:  pointer to an URI structure
315 * @str:  the string to analyze
316 *
317 * Parse a port  part and fills in the appropriate fields
318 * of the @uri structure
319 *
320 * port          = *DIGIT
321 *
322 * Returns 0 or the error code
323 */
324static int
325xmlParse3986Port(xmlURIPtr uri, const char **str)
326{
327    const char *cur = *str;
328
329    if (ISA_DIGIT(cur)) {
330	if (uri != NULL)
331	    uri->port = 0;
332	while (ISA_DIGIT(cur)) {
333	    if (uri != NULL)
334		uri->port = uri->port * 10 + (*cur - '0');
335	    cur++;
336	}
337	*str = cur;
338	return(0);
339    }
340    return(1);
341}
342
343/**
344 * xmlParse3986Userinfo:
345 * @uri:  pointer to an URI structure
346 * @str:  the string to analyze
347 *
348 * Parse an user informations part and fills in the appropriate fields
349 * of the @uri structure
350 *
351 * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
352 *
353 * Returns 0 or the error code
354 */
355static int
356xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
357{
358    const char *cur;
359
360    cur = *str;
361    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
362           ISA_SUB_DELIM(cur) || (*cur == ':'))
363	NEXT(cur);
364    if (*cur == '@') {
365	if (uri != NULL) {
366	    if (uri->user != NULL) xmlFree(uri->user);
367	    if (uri->cleanup & 2)
368		uri->user = STRNDUP(*str, cur - *str);
369	    else
370		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
371	}
372	*str = cur;
373	return(0);
374    }
375    return(1);
376}
377
378/**
379 * xmlParse3986DecOctet:
380 * @str:  the string to analyze
381 *
382 *    dec-octet     = DIGIT                 ; 0-9
383 *                  / %x31-39 DIGIT         ; 10-99
384 *                  / "1" 2DIGIT            ; 100-199
385 *                  / "2" %x30-34 DIGIT     ; 200-249
386 *                  / "25" %x30-35          ; 250-255
387 *
388 * Skip a dec-octet.
389 *
390 * Returns 0 if found and skipped, 1 otherwise
391 */
392static int
393xmlParse3986DecOctet(const char **str) {
394    const char *cur = *str;
395
396    if (!(ISA_DIGIT(cur)))
397        return(1);
398    if (!ISA_DIGIT(cur+1))
399	cur++;
400    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
401	cur += 2;
402    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
403	cur += 3;
404    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
405	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
406	cur += 3;
407    else if ((*cur == '2') && (*(cur + 1) == '5') &&
408	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
409	cur += 3;
410    else
411        return(1);
412    *str = cur;
413    return(0);
414}
415/**
416 * xmlParse3986Host:
417 * @uri:  pointer to an URI structure
418 * @str:  the string to analyze
419 *
420 * Parse an host part and fills in the appropriate fields
421 * of the @uri structure
422 *
423 * host          = IP-literal / IPv4address / reg-name
424 * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
425 * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
426 * reg-name      = *( unreserved / pct-encoded / sub-delims )
427 *
428 * Returns 0 or the error code
429 */
430static int
431xmlParse3986Host(xmlURIPtr uri, const char **str)
432{
433    const char *cur = *str;
434    const char *host;
435
436    host = cur;
437    /*
438     * IPv6 and future adressing scheme are enclosed between brackets
439     */
440    if (*cur == '[') {
441        cur++;
442	while ((*cur != ']') && (*cur != 0))
443	    cur++;
444	if (*cur != ']')
445	    return(1);
446	cur++;
447	goto found;
448    }
449    /*
450     * try to parse an IPv4
451     */
452    if (ISA_DIGIT(cur)) {
453        if (xmlParse3986DecOctet(&cur) != 0)
454	    goto not_ipv4;
455	if (*cur != '.')
456	    goto not_ipv4;
457	cur++;
458        if (xmlParse3986DecOctet(&cur) != 0)
459	    goto not_ipv4;
460	if (*cur != '.')
461	    goto not_ipv4;
462        if (xmlParse3986DecOctet(&cur) != 0)
463	    goto not_ipv4;
464	if (*cur != '.')
465	    goto not_ipv4;
466        if (xmlParse3986DecOctet(&cur) != 0)
467	    goto not_ipv4;
468	goto found;
469not_ipv4:
470        cur = *str;
471    }
472    /*
473     * then this should be a hostname which can be empty
474     */
475    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
476        NEXT(cur);
477found:
478    if (uri != NULL) {
479	if (uri->authority != NULL) xmlFree(uri->authority);
480	uri->authority = NULL;
481	if (uri->server != NULL) xmlFree(uri->server);
482	if (cur != host) {
483	    if (uri->cleanup & 2)
484		uri->server = STRNDUP(host, cur - host);
485	    else
486		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
487	} else
488	    uri->server = NULL;
489    }
490    *str = cur;
491    return(0);
492}
493
494/**
495 * xmlParse3986Authority:
496 * @uri:  pointer to an URI structure
497 * @str:  the string to analyze
498 *
499 * Parse an authority part and fills in the appropriate fields
500 * of the @uri structure
501 *
502 * authority     = [ userinfo "@" ] host [ ":" port ]
503 *
504 * Returns 0 or the error code
505 */
506static int
507xmlParse3986Authority(xmlURIPtr uri, const char **str)
508{
509    const char *cur;
510    int ret;
511
512    cur = *str;
513    /*
514     * try to parse an userinfo and check for the trailing @
515     */
516    ret = xmlParse3986Userinfo(uri, &cur);
517    if ((ret != 0) || (*cur != '@'))
518        cur = *str;
519    else
520        cur++;
521    ret = xmlParse3986Host(uri, &cur);
522    if (ret != 0) return(ret);
523    if (*cur == ':') {
524        cur++;
525        ret = xmlParse3986Port(uri, &cur);
526	if (ret != 0) return(ret);
527    }
528    *str = cur;
529    return(0);
530}
531
532/**
533 * xmlParse3986Segment:
534 * @str:  the string to analyze
535 * @forbid: an optional forbidden character
536 * @empty: allow an empty segment
537 *
538 * Parse a segment and fills in the appropriate fields
539 * of the @uri structure
540 *
541 * segment       = *pchar
542 * segment-nz    = 1*pchar
543 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
544 *               ; non-zero-length segment without any colon ":"
545 *
546 * Returns 0 or the error code
547 */
548static int
549xmlParse3986Segment(const char **str, char forbid, int empty)
550{
551    const char *cur;
552
553    cur = *str;
554    if (!ISA_PCHAR(cur)) {
555        if (empty)
556	    return(0);
557	return(1);
558    }
559    while (ISA_PCHAR(cur) && (*cur != forbid))
560        NEXT(cur);
561    *str = cur;
562    return (0);
563}
564
565/**
566 * xmlParse3986PathAbEmpty:
567 * @uri:  pointer to an URI structure
568 * @str:  the string to analyze
569 *
570 * Parse an path absolute or empty and fills in the appropriate fields
571 * of the @uri structure
572 *
573 * path-abempty  = *( "/" segment )
574 *
575 * Returns 0 or the error code
576 */
577static int
578xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
579{
580    const char *cur;
581    int ret;
582
583    cur = *str;
584
585    while (*cur == '/') {
586        cur++;
587	ret = xmlParse3986Segment(&cur, 0, 1);
588	if (ret != 0) return(ret);
589    }
590    if (uri != NULL) {
591	if (uri->path != NULL) xmlFree(uri->path);
592        if (*str != cur) {
593            if (uri->cleanup & 2)
594                uri->path = STRNDUP(*str, cur - *str);
595            else
596                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
597        } else {
598            uri->path = NULL;
599        }
600    }
601    *str = cur;
602    return (0);
603}
604
605/**
606 * xmlParse3986PathAbsolute:
607 * @uri:  pointer to an URI structure
608 * @str:  the string to analyze
609 *
610 * Parse an path absolute and fills in the appropriate fields
611 * of the @uri structure
612 *
613 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
614 *
615 * Returns 0 or the error code
616 */
617static int
618xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
619{
620    const char *cur;
621    int ret;
622
623    cur = *str;
624
625    if (*cur != '/')
626        return(1);
627    cur++;
628    ret = xmlParse3986Segment(&cur, 0, 0);
629    if (ret == 0) {
630	while (*cur == '/') {
631	    cur++;
632	    ret = xmlParse3986Segment(&cur, 0, 1);
633	    if (ret != 0) return(ret);
634	}
635    }
636    if (uri != NULL) {
637	if (uri->path != NULL) xmlFree(uri->path);
638        if (cur != *str) {
639            if (uri->cleanup & 2)
640                uri->path = STRNDUP(*str, cur - *str);
641            else
642                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
643        } else {
644            uri->path = NULL;
645        }
646    }
647    *str = cur;
648    return (0);
649}
650
651/**
652 * xmlParse3986PathRootless:
653 * @uri:  pointer to an URI structure
654 * @str:  the string to analyze
655 *
656 * Parse an path without root and fills in the appropriate fields
657 * of the @uri structure
658 *
659 * path-rootless = segment-nz *( "/" segment )
660 *
661 * Returns 0 or the error code
662 */
663static int
664xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
665{
666    const char *cur;
667    int ret;
668
669    cur = *str;
670
671    ret = xmlParse3986Segment(&cur, 0, 0);
672    if (ret != 0) return(ret);
673    while (*cur == '/') {
674        cur++;
675	ret = xmlParse3986Segment(&cur, 0, 1);
676	if (ret != 0) return(ret);
677    }
678    if (uri != NULL) {
679	if (uri->path != NULL) xmlFree(uri->path);
680        if (cur != *str) {
681            if (uri->cleanup & 2)
682                uri->path = STRNDUP(*str, cur - *str);
683            else
684                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
685        } else {
686            uri->path = NULL;
687        }
688    }
689    *str = cur;
690    return (0);
691}
692
693/**
694 * xmlParse3986PathNoScheme:
695 * @uri:  pointer to an URI structure
696 * @str:  the string to analyze
697 *
698 * Parse an path which is not a scheme and fills in the appropriate fields
699 * of the @uri structure
700 *
701 * path-noscheme = segment-nz-nc *( "/" segment )
702 *
703 * Returns 0 or the error code
704 */
705static int
706xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
707{
708    const char *cur;
709    int ret;
710
711    cur = *str;
712
713    ret = xmlParse3986Segment(&cur, ':', 0);
714    if (ret != 0) return(ret);
715    while (*cur == '/') {
716        cur++;
717	ret = xmlParse3986Segment(&cur, 0, 1);
718	if (ret != 0) return(ret);
719    }
720    if (uri != NULL) {
721	if (uri->path != NULL) xmlFree(uri->path);
722        if (cur != *str) {
723            if (uri->cleanup & 2)
724                uri->path = STRNDUP(*str, cur - *str);
725            else
726                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
727        } else {
728            uri->path = NULL;
729        }
730    }
731    *str = cur;
732    return (0);
733}
734
735/**
736 * xmlParse3986HierPart:
737 * @uri:  pointer to an URI structure
738 * @str:  the string to analyze
739 *
740 * Parse an hierarchical part and fills in the appropriate fields
741 * of the @uri structure
742 *
743 * hier-part     = "//" authority path-abempty
744 *                / path-absolute
745 *                / path-rootless
746 *                / path-empty
747 *
748 * Returns 0 or the error code
749 */
750static int
751xmlParse3986HierPart(xmlURIPtr uri, const char **str)
752{
753    const char *cur;
754    int ret;
755
756    cur = *str;
757
758    if ((*cur == '/') && (*(cur + 1) == '/')) {
759        cur += 2;
760	ret = xmlParse3986Authority(uri, &cur);
761	if (ret != 0) return(ret);
762	ret = xmlParse3986PathAbEmpty(uri, &cur);
763	if (ret != 0) return(ret);
764	*str = cur;
765	return(0);
766    } else if (*cur == '/') {
767        ret = xmlParse3986PathAbsolute(uri, &cur);
768	if (ret != 0) return(ret);
769    } else if (ISA_PCHAR(cur)) {
770        ret = xmlParse3986PathRootless(uri, &cur);
771	if (ret != 0) return(ret);
772    } else {
773	/* path-empty is effectively empty */
774	if (uri != NULL) {
775	    if (uri->path != NULL) xmlFree(uri->path);
776	    uri->path = NULL;
777	}
778    }
779    *str = cur;
780    return (0);
781}
782
783/**
784 * xmlParse3986RelativeRef:
785 * @uri:  pointer to an URI structure
786 * @str:  the string to analyze
787 *
788 * Parse an URI string and fills in the appropriate fields
789 * of the @uri structure
790 *
791 * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
792 * relative-part = "//" authority path-abempty
793 *               / path-absolute
794 *               / path-noscheme
795 *               / path-empty
796 *
797 * Returns 0 or the error code
798 */
799static int
800xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
801    int ret;
802
803    if ((*str == '/') && (*(str + 1) == '/')) {
804        str += 2;
805	ret = xmlParse3986Authority(uri, &str);
806	if (ret != 0) return(ret);
807	ret = xmlParse3986PathAbEmpty(uri, &str);
808	if (ret != 0) return(ret);
809    } else if (*str == '/') {
810	ret = xmlParse3986PathAbsolute(uri, &str);
811	if (ret != 0) return(ret);
812    } else if (ISA_PCHAR(str)) {
813        ret = xmlParse3986PathNoScheme(uri, &str);
814	if (ret != 0) return(ret);
815    } else {
816	/* path-empty is effectively empty */
817	if (uri != NULL) {
818	    if (uri->path != NULL) xmlFree(uri->path);
819	    uri->path = NULL;
820	}
821    }
822
823    if (*str == '?') {
824	str++;
825	ret = xmlParse3986Query(uri, &str);
826	if (ret != 0) return(ret);
827    }
828    if (*str == '#') {
829	str++;
830	ret = xmlParse3986Fragment(uri, &str);
831	if (ret != 0) return(ret);
832    }
833    if (*str != 0) {
834	xmlCleanURI(uri);
835	return(1);
836    }
837    return(0);
838}
839
840
841/**
842 * xmlParse3986URI:
843 * @uri:  pointer to an URI structure
844 * @str:  the string to analyze
845 *
846 * Parse an URI string and fills in the appropriate fields
847 * of the @uri structure
848 *
849 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
850 *
851 * Returns 0 or the error code
852 */
853static int
854xmlParse3986URI(xmlURIPtr uri, const char *str) {
855    int ret;
856
857    ret = xmlParse3986Scheme(uri, &str);
858    if (ret != 0) return(ret);
859    if (*str != ':') {
860	return(1);
861    }
862    str++;
863    ret = xmlParse3986HierPart(uri, &str);
864    if (ret != 0) return(ret);
865    if (*str == '?') {
866	str++;
867	ret = xmlParse3986Query(uri, &str);
868	if (ret != 0) return(ret);
869    }
870    if (*str == '#') {
871	str++;
872	ret = xmlParse3986Fragment(uri, &str);
873	if (ret != 0) return(ret);
874    }
875    if (*str != 0) {
876	xmlCleanURI(uri);
877	return(1);
878    }
879    return(0);
880}
881
882/**
883 * xmlParse3986URIReference:
884 * @uri:  pointer to an URI structure
885 * @str:  the string to analyze
886 *
887 * Parse an URI reference string and fills in the appropriate fields
888 * of the @uri structure
889 *
890 * URI-reference = URI / relative-ref
891 *
892 * Returns 0 or the error code
893 */
894static int
895xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
896    int ret;
897
898    if (str == NULL)
899	return(-1);
900    xmlCleanURI(uri);
901
902    /*
903     * Try first to parse absolute refs, then fallback to relative if
904     * it fails.
905     */
906    ret = xmlParse3986URI(uri, str);
907    if (ret != 0) {
908	xmlCleanURI(uri);
909        ret = xmlParse3986RelativeRef(uri, str);
910	if (ret != 0) {
911	    xmlCleanURI(uri);
912	    return(ret);
913	}
914    }
915    return(0);
916}
917
918/**
919 * xmlParseURI:
920 * @str:  the URI string to analyze
921 *
922 * Parse an URI based on RFC 3986
923 *
924 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
925 *
926 * Returns a newly built xmlURIPtr or NULL in case of error
927 */
928xmlURIPtr
929xmlParseURI(const char *str) {
930    xmlURIPtr uri;
931    int ret;
932
933    if (str == NULL)
934	return(NULL);
935    uri = xmlCreateURI();
936    if (uri != NULL) {
937	ret = xmlParse3986URIReference(uri, str);
938        if (ret) {
939	    xmlFreeURI(uri);
940	    return(NULL);
941	}
942    }
943    return(uri);
944}
945
946/**
947 * xmlParseURIReference:
948 * @uri:  pointer to an URI structure
949 * @str:  the string to analyze
950 *
951 * Parse an URI reference string based on RFC 3986 and fills in the
952 * appropriate fields of the @uri structure
953 *
954 * URI-reference = URI / relative-ref
955 *
956 * Returns 0 or the error code
957 */
958int
959xmlParseURIReference(xmlURIPtr uri, const char *str) {
960    return(xmlParse3986URIReference(uri, str));
961}
962
963/**
964 * xmlParseURIRaw:
965 * @str:  the URI string to analyze
966 * @raw:  if 1 unescaping of URI pieces are disabled
967 *
968 * Parse an URI but allows to keep intact the original fragments.
969 *
970 * URI-reference = URI / relative-ref
971 *
972 * Returns a newly built xmlURIPtr or NULL in case of error
973 */
974xmlURIPtr
975xmlParseURIRaw(const char *str, int raw) {
976    xmlURIPtr uri;
977    int ret;
978
979    if (str == NULL)
980	return(NULL);
981    uri = xmlCreateURI();
982    if (uri != NULL) {
983        if (raw) {
984	    uri->cleanup |= 2;
985	}
986	ret = xmlParseURIReference(uri, str);
987        if (ret) {
988	    xmlFreeURI(uri);
989	    return(NULL);
990	}
991    }
992    return(uri);
993}
994
995/************************************************************************
996 *									*
997 *			Generic URI structure functions			*
998 *									*
999 ************************************************************************/
1000
1001/**
1002 * xmlCreateURI:
1003 *
1004 * Simply creates an empty xmlURI
1005 *
1006 * Returns the new structure or NULL in case of error
1007 */
1008xmlURIPtr
1009xmlCreateURI(void) {
1010    xmlURIPtr ret;
1011
1012    ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1013    if (ret == NULL) {
1014        xmlURIErrMemory("creating URI structure\n");
1015	return(NULL);
1016    }
1017    memset(ret, 0, sizeof(xmlURI));
1018    return(ret);
1019}
1020
1021/**
1022 * xmlSaveUriRealloc:
1023 *
1024 * Function to handle properly a reallocation when saving an URI
1025 * Also imposes some limit on the length of an URI string output
1026 */
1027static xmlChar *
1028xmlSaveUriRealloc(xmlChar *ret, int *max) {
1029    xmlChar *temp;
1030    int tmp;
1031
1032    if (*max > MAX_URI_LENGTH) {
1033        xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1034        return(NULL);
1035    }
1036    tmp = *max * 2;
1037    temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1038    if (temp == NULL) {
1039        xmlURIErrMemory("saving URI\n");
1040        return(NULL);
1041    }
1042    *max = tmp;
1043    return(temp);
1044}
1045
1046/**
1047 * xmlSaveUri:
1048 * @uri:  pointer to an xmlURI
1049 *
1050 * Save the URI as an escaped string
1051 *
1052 * Returns a new string (to be deallocated by caller)
1053 */
1054xmlChar *
1055xmlSaveUri(xmlURIPtr uri) {
1056    xmlChar *ret = NULL;
1057    xmlChar *temp;
1058    const char *p;
1059    int len;
1060    int max;
1061
1062    if (uri == NULL) return(NULL);
1063
1064
1065    max = 80;
1066    ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1067    if (ret == NULL) {
1068        xmlURIErrMemory("saving URI\n");
1069	return(NULL);
1070    }
1071    len = 0;
1072
1073    if (uri->scheme != NULL) {
1074	p = uri->scheme;
1075	while (*p != 0) {
1076	    if (len >= max) {
1077                temp = xmlSaveUriRealloc(ret, &max);
1078                if (temp == NULL) goto mem_error;
1079		ret = temp;
1080	    }
1081	    ret[len++] = *p++;
1082	}
1083	if (len >= max) {
1084            temp = xmlSaveUriRealloc(ret, &max);
1085            if (temp == NULL) goto mem_error;
1086            ret = temp;
1087	}
1088	ret[len++] = ':';
1089    }
1090    if (uri->opaque != NULL) {
1091	p = uri->opaque;
1092	while (*p != 0) {
1093	    if (len + 3 >= max) {
1094                temp = xmlSaveUriRealloc(ret, &max);
1095                if (temp == NULL) goto mem_error;
1096                ret = temp;
1097	    }
1098	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1099		ret[len++] = *p++;
1100	    else {
1101		int val = *(unsigned char *)p++;
1102		int hi = val / 0x10, lo = val % 0x10;
1103		ret[len++] = '%';
1104		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1105		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1106	    }
1107	}
1108    } else {
1109	if (uri->server != NULL) {
1110	    if (len + 3 >= max) {
1111                temp = xmlSaveUriRealloc(ret, &max);
1112                if (temp == NULL) goto mem_error;
1113                ret = temp;
1114	    }
1115	    ret[len++] = '/';
1116	    ret[len++] = '/';
1117	    if (uri->user != NULL) {
1118		p = uri->user;
1119		while (*p != 0) {
1120		    if (len + 3 >= max) {
1121                        temp = xmlSaveUriRealloc(ret, &max);
1122                        if (temp == NULL) goto mem_error;
1123                        ret = temp;
1124		    }
1125		    if ((IS_UNRESERVED(*(p))) ||
1126			((*(p) == ';')) || ((*(p) == ':')) ||
1127			((*(p) == '&')) || ((*(p) == '=')) ||
1128			((*(p) == '+')) || ((*(p) == '$')) ||
1129			((*(p) == ',')))
1130			ret[len++] = *p++;
1131		    else {
1132			int val = *(unsigned char *)p++;
1133			int hi = val / 0x10, lo = val % 0x10;
1134			ret[len++] = '%';
1135			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1136			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1137		    }
1138		}
1139		if (len + 3 >= max) {
1140                    temp = xmlSaveUriRealloc(ret, &max);
1141                    if (temp == NULL) goto mem_error;
1142                    ret = temp;
1143		}
1144		ret[len++] = '@';
1145	    }
1146	    p = uri->server;
1147	    while (*p != 0) {
1148		if (len >= max) {
1149                    temp = xmlSaveUriRealloc(ret, &max);
1150                    if (temp == NULL) goto mem_error;
1151                    ret = temp;
1152		}
1153		ret[len++] = *p++;
1154	    }
1155	    if (uri->port > 0) {
1156		if (len + 10 >= max) {
1157                    temp = xmlSaveUriRealloc(ret, &max);
1158                    if (temp == NULL) goto mem_error;
1159                    ret = temp;
1160		}
1161		len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1162	    }
1163	} else if (uri->authority != NULL) {
1164	    if (len + 3 >= max) {
1165                temp = xmlSaveUriRealloc(ret, &max);
1166                if (temp == NULL) goto mem_error;
1167                ret = temp;
1168	    }
1169	    ret[len++] = '/';
1170	    ret[len++] = '/';
1171	    p = uri->authority;
1172	    while (*p != 0) {
1173		if (len + 3 >= max) {
1174                    temp = xmlSaveUriRealloc(ret, &max);
1175                    if (temp == NULL) goto mem_error;
1176                    ret = temp;
1177		}
1178		if ((IS_UNRESERVED(*(p))) ||
1179                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1180                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1181                    ((*(p) == '=')) || ((*(p) == '+')))
1182		    ret[len++] = *p++;
1183		else {
1184		    int val = *(unsigned char *)p++;
1185		    int hi = val / 0x10, lo = val % 0x10;
1186		    ret[len++] = '%';
1187		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1188		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1189		}
1190	    }
1191	} else if (uri->scheme != NULL) {
1192	    if (len + 3 >= max) {
1193                temp = xmlSaveUriRealloc(ret, &max);
1194                if (temp == NULL) goto mem_error;
1195                ret = temp;
1196	    }
1197	    ret[len++] = '/';
1198	    ret[len++] = '/';
1199	}
1200	if (uri->path != NULL) {
1201	    p = uri->path;
1202	    /*
1203	     * the colon in file:///d: should not be escaped or
1204	     * Windows accesses fail later.
1205	     */
1206	    if ((uri->scheme != NULL) &&
1207		(p[0] == '/') &&
1208		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1209		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1210		(p[2] == ':') &&
1211	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1212		if (len + 3 >= max) {
1213                    temp = xmlSaveUriRealloc(ret, &max);
1214                    if (temp == NULL) goto mem_error;
1215                    ret = temp;
1216		}
1217		ret[len++] = *p++;
1218		ret[len++] = *p++;
1219		ret[len++] = *p++;
1220	    }
1221	    while (*p != 0) {
1222		if (len + 3 >= max) {
1223                    temp = xmlSaveUriRealloc(ret, &max);
1224                    if (temp == NULL) goto mem_error;
1225                    ret = temp;
1226		}
1227		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1228                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1229	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1230	            ((*(p) == ',')))
1231		    ret[len++] = *p++;
1232		else {
1233		    int val = *(unsigned char *)p++;
1234		    int hi = val / 0x10, lo = val % 0x10;
1235		    ret[len++] = '%';
1236		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1237		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1238		}
1239	    }
1240	}
1241	if (uri->query_raw != NULL) {
1242	    if (len + 1 >= max) {
1243                temp = xmlSaveUriRealloc(ret, &max);
1244                if (temp == NULL) goto mem_error;
1245                ret = temp;
1246	    }
1247	    ret[len++] = '?';
1248	    p = uri->query_raw;
1249	    while (*p != 0) {
1250		if (len + 1 >= max) {
1251                    temp = xmlSaveUriRealloc(ret, &max);
1252                    if (temp == NULL) goto mem_error;
1253                    ret = temp;
1254		}
1255		ret[len++] = *p++;
1256	    }
1257	} else if (uri->query != NULL) {
1258	    if (len + 3 >= max) {
1259                temp = xmlSaveUriRealloc(ret, &max);
1260                if (temp == NULL) goto mem_error;
1261                ret = temp;
1262	    }
1263	    ret[len++] = '?';
1264	    p = uri->query;
1265	    while (*p != 0) {
1266		if (len + 3 >= max) {
1267                    temp = xmlSaveUriRealloc(ret, &max);
1268                    if (temp == NULL) goto mem_error;
1269                    ret = temp;
1270		}
1271		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1272		    ret[len++] = *p++;
1273		else {
1274		    int val = *(unsigned char *)p++;
1275		    int hi = val / 0x10, lo = val % 0x10;
1276		    ret[len++] = '%';
1277		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1278		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1279		}
1280	    }
1281	}
1282    }
1283    if (uri->fragment != NULL) {
1284	if (len + 3 >= max) {
1285            temp = xmlSaveUriRealloc(ret, &max);
1286            if (temp == NULL) goto mem_error;
1287            ret = temp;
1288	}
1289	ret[len++] = '#';
1290	p = uri->fragment;
1291	while (*p != 0) {
1292	    if (len + 3 >= max) {
1293                temp = xmlSaveUriRealloc(ret, &max);
1294                if (temp == NULL) goto mem_error;
1295                ret = temp;
1296	    }
1297	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1298		ret[len++] = *p++;
1299	    else {
1300		int val = *(unsigned char *)p++;
1301		int hi = val / 0x10, lo = val % 0x10;
1302		ret[len++] = '%';
1303		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1304		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1305	    }
1306	}
1307    }
1308    if (len >= max) {
1309        temp = xmlSaveUriRealloc(ret, &max);
1310        if (temp == NULL) goto mem_error;
1311        ret = temp;
1312    }
1313    ret[len] = 0;
1314    return(ret);
1315
1316mem_error:
1317    xmlFree(ret);
1318    return(NULL);
1319}
1320
1321/**
1322 * xmlPrintURI:
1323 * @stream:  a FILE* for the output
1324 * @uri:  pointer to an xmlURI
1325 *
1326 * Prints the URI in the stream @stream.
1327 */
1328void
1329xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1330    xmlChar *out;
1331
1332    out = xmlSaveUri(uri);
1333    if (out != NULL) {
1334	fprintf(stream, "%s", (char *) out);
1335	xmlFree(out);
1336    }
1337}
1338
1339/**
1340 * xmlCleanURI:
1341 * @uri:  pointer to an xmlURI
1342 *
1343 * Make sure the xmlURI struct is free of content
1344 */
1345static void
1346xmlCleanURI(xmlURIPtr uri) {
1347    if (uri == NULL) return;
1348
1349    if (uri->scheme != NULL) xmlFree(uri->scheme);
1350    uri->scheme = NULL;
1351    if (uri->server != NULL) xmlFree(uri->server);
1352    uri->server = NULL;
1353    if (uri->user != NULL) xmlFree(uri->user);
1354    uri->user = NULL;
1355    if (uri->path != NULL) xmlFree(uri->path);
1356    uri->path = NULL;
1357    if (uri->fragment != NULL) xmlFree(uri->fragment);
1358    uri->fragment = NULL;
1359    if (uri->opaque != NULL) xmlFree(uri->opaque);
1360    uri->opaque = NULL;
1361    if (uri->authority != NULL) xmlFree(uri->authority);
1362    uri->authority = NULL;
1363    if (uri->query != NULL) xmlFree(uri->query);
1364    uri->query = NULL;
1365    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1366    uri->query_raw = NULL;
1367}
1368
1369/**
1370 * xmlFreeURI:
1371 * @uri:  pointer to an xmlURI
1372 *
1373 * Free up the xmlURI struct
1374 */
1375void
1376xmlFreeURI(xmlURIPtr uri) {
1377    if (uri == NULL) return;
1378
1379    if (uri->scheme != NULL) xmlFree(uri->scheme);
1380    if (uri->server != NULL) xmlFree(uri->server);
1381    if (uri->user != NULL) xmlFree(uri->user);
1382    if (uri->path != NULL) xmlFree(uri->path);
1383    if (uri->fragment != NULL) xmlFree(uri->fragment);
1384    if (uri->opaque != NULL) xmlFree(uri->opaque);
1385    if (uri->authority != NULL) xmlFree(uri->authority);
1386    if (uri->query != NULL) xmlFree(uri->query);
1387    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1388    xmlFree(uri);
1389}
1390
1391/************************************************************************
1392 *									*
1393 *			Helper functions				*
1394 *									*
1395 ************************************************************************/
1396
1397/**
1398 * xmlNormalizeURIPath:
1399 * @path:  pointer to the path string
1400 *
1401 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1402 * Section 5.2, steps 6.c through 6.g.
1403 *
1404 * Normalization occurs directly on the string, no new allocation is done
1405 *
1406 * Returns 0 or an error code
1407 */
1408int
1409xmlNormalizeURIPath(char *path) {
1410    char *cur, *out;
1411
1412    if (path == NULL)
1413	return(-1);
1414
1415    /* Skip all initial "/" chars.  We want to get to the beginning of the
1416     * first non-empty segment.
1417     */
1418    cur = path;
1419    while (cur[0] == '/')
1420      ++cur;
1421    if (cur[0] == '\0')
1422      return(0);
1423
1424    /* Keep everything we've seen so far.  */
1425    out = cur;
1426
1427    /*
1428     * Analyze each segment in sequence for cases (c) and (d).
1429     */
1430    while (cur[0] != '\0') {
1431	/*
1432	 * c) All occurrences of "./", where "." is a complete path segment,
1433	 *    are removed from the buffer string.
1434	 */
1435	if ((cur[0] == '.') && (cur[1] == '/')) {
1436	    cur += 2;
1437	    /* '//' normalization should be done at this point too */
1438	    while (cur[0] == '/')
1439		cur++;
1440	    continue;
1441	}
1442
1443	/*
1444	 * d) If the buffer string ends with "." as a complete path segment,
1445	 *    that "." is removed.
1446	 */
1447	if ((cur[0] == '.') && (cur[1] == '\0'))
1448	    break;
1449
1450	/* Otherwise keep the segment.  */
1451	while (cur[0] != '/') {
1452            if (cur[0] == '\0')
1453              goto done_cd;
1454	    (out++)[0] = (cur++)[0];
1455	}
1456	/* nomalize // */
1457	while ((cur[0] == '/') && (cur[1] == '/'))
1458	    cur++;
1459
1460        (out++)[0] = (cur++)[0];
1461    }
1462 done_cd:
1463    out[0] = '\0';
1464
1465    /* Reset to the beginning of the first segment for the next sequence.  */
1466    cur = path;
1467    while (cur[0] == '/')
1468      ++cur;
1469    if (cur[0] == '\0')
1470	return(0);
1471
1472    /*
1473     * Analyze each segment in sequence for cases (e) and (f).
1474     *
1475     * e) All occurrences of "<segment>/../", where <segment> is a
1476     *    complete path segment not equal to "..", are removed from the
1477     *    buffer string.  Removal of these path segments is performed
1478     *    iteratively, removing the leftmost matching pattern on each
1479     *    iteration, until no matching pattern remains.
1480     *
1481     * f) If the buffer string ends with "<segment>/..", where <segment>
1482     *    is a complete path segment not equal to "..", that
1483     *    "<segment>/.." is removed.
1484     *
1485     * To satisfy the "iterative" clause in (e), we need to collapse the
1486     * string every time we find something that needs to be removed.  Thus,
1487     * we don't need to keep two pointers into the string: we only need a
1488     * "current position" pointer.
1489     */
1490    while (1) {
1491        char *segp, *tmp;
1492
1493        /* At the beginning of each iteration of this loop, "cur" points to
1494         * the first character of the segment we want to examine.
1495         */
1496
1497        /* Find the end of the current segment.  */
1498        segp = cur;
1499        while ((segp[0] != '/') && (segp[0] != '\0'))
1500          ++segp;
1501
1502        /* If this is the last segment, we're done (we need at least two
1503         * segments to meet the criteria for the (e) and (f) cases).
1504         */
1505        if (segp[0] == '\0')
1506          break;
1507
1508        /* If the first segment is "..", or if the next segment _isn't_ "..",
1509         * keep this segment and try the next one.
1510         */
1511        ++segp;
1512        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1513            || ((segp[0] != '.') || (segp[1] != '.')
1514                || ((segp[2] != '/') && (segp[2] != '\0')))) {
1515          cur = segp;
1516          continue;
1517        }
1518
1519        /* If we get here, remove this segment and the next one and back up
1520         * to the previous segment (if there is one), to implement the
1521         * "iteratively" clause.  It's pretty much impossible to back up
1522         * while maintaining two pointers into the buffer, so just compact
1523         * the whole buffer now.
1524         */
1525
1526        /* If this is the end of the buffer, we're done.  */
1527        if (segp[2] == '\0') {
1528          cur[0] = '\0';
1529          break;
1530        }
1531        /* Valgrind complained, strcpy(cur, segp + 3); */
1532        /* string will overlap, do not use strcpy */
1533        tmp = cur;
1534        segp += 3;
1535        while ((*tmp++ = *segp++) != 0)
1536          ;
1537
1538        /* If there are no previous segments, then keep going from here.  */
1539        segp = cur;
1540        while ((segp > path) && ((--segp)[0] == '/'))
1541          ;
1542        if (segp == path)
1543          continue;
1544
1545        /* "segp" is pointing to the end of a previous segment; find it's
1546         * start.  We need to back up to the previous segment and start
1547         * over with that to handle things like "foo/bar/../..".  If we
1548         * don't do this, then on the first pass we'll remove the "bar/..",
1549         * but be pointing at the second ".." so we won't realize we can also
1550         * remove the "foo/..".
1551         */
1552        cur = segp;
1553        while ((cur > path) && (cur[-1] != '/'))
1554          --cur;
1555    }
1556    out[0] = '\0';
1557
1558    /*
1559     * g) If the resulting buffer string still begins with one or more
1560     *    complete path segments of "..", then the reference is
1561     *    considered to be in error. Implementations may handle this
1562     *    error by retaining these components in the resolved path (i.e.,
1563     *    treating them as part of the final URI), by removing them from
1564     *    the resolved path (i.e., discarding relative levels above the
1565     *    root), or by avoiding traversal of the reference.
1566     *
1567     * We discard them from the final path.
1568     */
1569    if (path[0] == '/') {
1570      cur = path;
1571      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1572             && ((cur[3] == '/') || (cur[3] == '\0')))
1573	cur += 3;
1574
1575      if (cur != path) {
1576	out = path;
1577	while (cur[0] != '\0')
1578          (out++)[0] = (cur++)[0];
1579	out[0] = 0;
1580      }
1581    }
1582
1583    return(0);
1584}
1585
1586static int is_hex(char c) {
1587    if (((c >= '0') && (c <= '9')) ||
1588        ((c >= 'a') && (c <= 'f')) ||
1589        ((c >= 'A') && (c <= 'F')))
1590	return(1);
1591    return(0);
1592}
1593
1594/**
1595 * xmlURIUnescapeString:
1596 * @str:  the string to unescape
1597 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1598 * @target:  optional destination buffer
1599 *
1600 * Unescaping routine, but does not check that the string is an URI. The
1601 * output is a direct unsigned char translation of %XX values (no encoding)
1602 * Note that the length of the result can only be smaller or same size as
1603 * the input string.
1604 *
1605 * Returns a copy of the string, but unescaped, will return NULL only in case
1606 * of error
1607 */
1608char *
1609xmlURIUnescapeString(const char *str, int len, char *target) {
1610    char *ret, *out;
1611    const char *in;
1612
1613    if (str == NULL)
1614	return(NULL);
1615    if (len <= 0) len = strlen(str);
1616    if (len < 0) return(NULL);
1617
1618    if (target == NULL) {
1619	ret = (char *) xmlMallocAtomic(len + 1);
1620	if (ret == NULL) {
1621            xmlURIErrMemory("unescaping URI value\n");
1622	    return(NULL);
1623	}
1624    } else
1625	ret = target;
1626    in = str;
1627    out = ret;
1628    while(len > 0) {
1629	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1630	    in++;
1631	    if ((*in >= '0') && (*in <= '9'))
1632	        *out = (*in - '0');
1633	    else if ((*in >= 'a') && (*in <= 'f'))
1634	        *out = (*in - 'a') + 10;
1635	    else if ((*in >= 'A') && (*in <= 'F'))
1636	        *out = (*in - 'A') + 10;
1637	    in++;
1638	    if ((*in >= '0') && (*in <= '9'))
1639	        *out = *out * 16 + (*in - '0');
1640	    else if ((*in >= 'a') && (*in <= 'f'))
1641	        *out = *out * 16 + (*in - 'a') + 10;
1642	    else if ((*in >= 'A') && (*in <= 'F'))
1643	        *out = *out * 16 + (*in - 'A') + 10;
1644	    in++;
1645	    len -= 3;
1646	    out++;
1647	} else {
1648	    *out++ = *in++;
1649	    len--;
1650	}
1651    }
1652    *out = 0;
1653    return(ret);
1654}
1655
1656/**
1657 * xmlURIEscapeStr:
1658 * @str:  string to escape
1659 * @list: exception list string of chars not to escape
1660 *
1661 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1662 * and the characters in the exception list.
1663 *
1664 * Returns a new escaped string or NULL in case of error.
1665 */
1666xmlChar *
1667xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1668    xmlChar *ret, ch;
1669    xmlChar *temp;
1670    const xmlChar *in;
1671    int len, out;
1672
1673    if (str == NULL)
1674	return(NULL);
1675    if (str[0] == 0)
1676	return(xmlStrdup(str));
1677    len = xmlStrlen(str);
1678    if (!(len > 0)) return(NULL);
1679
1680    len += 20;
1681    ret = (xmlChar *) xmlMallocAtomic(len);
1682    if (ret == NULL) {
1683        xmlURIErrMemory("escaping URI value\n");
1684	return(NULL);
1685    }
1686    in = (const xmlChar *) str;
1687    out = 0;
1688    while(*in != 0) {
1689	if (len - out <= 3) {
1690            temp = xmlSaveUriRealloc(ret, &len);
1691	    if (temp == NULL) {
1692                xmlURIErrMemory("escaping URI value\n");
1693		xmlFree(ret);
1694		return(NULL);
1695	    }
1696	    ret = temp;
1697	}
1698
1699	ch = *in;
1700
1701	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1702	    unsigned char val;
1703	    ret[out++] = '%';
1704	    val = ch >> 4;
1705	    if (val <= 9)
1706		ret[out++] = '0' + val;
1707	    else
1708		ret[out++] = 'A' + val - 0xA;
1709	    val = ch & 0xF;
1710	    if (val <= 9)
1711		ret[out++] = '0' + val;
1712	    else
1713		ret[out++] = 'A' + val - 0xA;
1714	    in++;
1715	} else {
1716	    ret[out++] = *in++;
1717	}
1718
1719    }
1720    ret[out] = 0;
1721    return(ret);
1722}
1723
1724/**
1725 * xmlURIEscape:
1726 * @str:  the string of the URI to escape
1727 *
1728 * Escaping routine, does not do validity checks !
1729 * It will try to escape the chars needing this, but this is heuristic
1730 * based it's impossible to be sure.
1731 *
1732 * Returns an copy of the string, but escaped
1733 *
1734 * 25 May 2001
1735 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1736 * according to RFC2396.
1737 *   - Carl Douglas
1738 */
1739xmlChar *
1740xmlURIEscape(const xmlChar * str)
1741{
1742    xmlChar *ret, *segment = NULL;
1743    xmlURIPtr uri;
1744    int ret2;
1745
1746#define NULLCHK(p) if(!p) { \
1747         xmlURIErrMemory("escaping URI value\n"); \
1748         xmlFreeURI(uri); \
1749         return NULL; } \
1750
1751    if (str == NULL)
1752        return (NULL);
1753
1754    uri = xmlCreateURI();
1755    if (uri != NULL) {
1756	/*
1757	 * Allow escaping errors in the unescaped form
1758	 */
1759        uri->cleanup = 1;
1760        ret2 = xmlParseURIReference(uri, (const char *)str);
1761        if (ret2) {
1762            xmlFreeURI(uri);
1763            return (NULL);
1764        }
1765    }
1766
1767    if (!uri)
1768        return NULL;
1769
1770    ret = NULL;
1771
1772    if (uri->scheme) {
1773        segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1774        NULLCHK(segment)
1775        ret = xmlStrcat(ret, segment);
1776        ret = xmlStrcat(ret, BAD_CAST ":");
1777        xmlFree(segment);
1778    }
1779
1780    if (uri->authority) {
1781        segment =
1782            xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1783        NULLCHK(segment)
1784        ret = xmlStrcat(ret, BAD_CAST "//");
1785        ret = xmlStrcat(ret, segment);
1786        xmlFree(segment);
1787    }
1788
1789    if (uri->user) {
1790        segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1791        NULLCHK(segment)
1792		ret = xmlStrcat(ret,BAD_CAST "//");
1793        ret = xmlStrcat(ret, segment);
1794        ret = xmlStrcat(ret, BAD_CAST "@");
1795        xmlFree(segment);
1796    }
1797
1798    if (uri->server) {
1799        segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1800        NULLCHK(segment)
1801		if (uri->user == NULL)
1802		ret = xmlStrcat(ret, BAD_CAST "//");
1803        ret = xmlStrcat(ret, segment);
1804        xmlFree(segment);
1805    }
1806
1807    if (uri->port) {
1808        xmlChar port[10];
1809
1810        snprintf((char *) port, 10, "%d", uri->port);
1811        ret = xmlStrcat(ret, BAD_CAST ":");
1812        ret = xmlStrcat(ret, port);
1813    }
1814
1815    if (uri->path) {
1816        segment =
1817            xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1818        NULLCHK(segment)
1819        ret = xmlStrcat(ret, segment);
1820        xmlFree(segment);
1821    }
1822
1823    if (uri->query_raw) {
1824        ret = xmlStrcat(ret, BAD_CAST "?");
1825        ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1826    }
1827    else if (uri->query) {
1828        segment =
1829            xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1830        NULLCHK(segment)
1831        ret = xmlStrcat(ret, BAD_CAST "?");
1832        ret = xmlStrcat(ret, segment);
1833        xmlFree(segment);
1834    }
1835
1836    if (uri->opaque) {
1837        segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1838        NULLCHK(segment)
1839        ret = xmlStrcat(ret, segment);
1840        xmlFree(segment);
1841    }
1842
1843    if (uri->fragment) {
1844        segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1845        NULLCHK(segment)
1846        ret = xmlStrcat(ret, BAD_CAST "#");
1847        ret = xmlStrcat(ret, segment);
1848        xmlFree(segment);
1849    }
1850
1851    xmlFreeURI(uri);
1852#undef NULLCHK
1853
1854    return (ret);
1855}
1856
1857/************************************************************************
1858 *									*
1859 *			Public functions				*
1860 *									*
1861 ************************************************************************/
1862
1863/**
1864 * xmlBuildURI:
1865 * @URI:  the URI instance found in the document
1866 * @base:  the base value
1867 *
1868 * Computes he final URI of the reference done by checking that
1869 * the given URI is valid, and building the final URI using the
1870 * base URI. This is processed according to section 5.2 of the
1871 * RFC 2396
1872 *
1873 * 5.2. Resolving Relative References to Absolute Form
1874 *
1875 * Returns a new URI string (to be freed by the caller) or NULL in case
1876 *         of error.
1877 */
1878xmlChar *
1879xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1880    xmlChar *val = NULL;
1881    int ret, len, indx, cur, out;
1882    xmlURIPtr ref = NULL;
1883    xmlURIPtr bas = NULL;
1884    xmlURIPtr res = NULL;
1885
1886    /*
1887     * 1) The URI reference is parsed into the potential four components and
1888     *    fragment identifier, as described in Section 4.3.
1889     *
1890     *    NOTE that a completely empty URI is treated by modern browsers
1891     *    as a reference to "." rather than as a synonym for the current
1892     *    URI.  Should we do that here?
1893     */
1894    if (URI == NULL)
1895	ret = -1;
1896    else {
1897	if (*URI) {
1898	    ref = xmlCreateURI();
1899	    if (ref == NULL)
1900		goto done;
1901	    ret = xmlParseURIReference(ref, (const char *) URI);
1902	}
1903	else
1904	    ret = 0;
1905    }
1906    if (ret != 0)
1907	goto done;
1908    if ((ref != NULL) && (ref->scheme != NULL)) {
1909	/*
1910	 * The URI is absolute don't modify.
1911	 */
1912	val = xmlStrdup(URI);
1913	goto done;
1914    }
1915    if (base == NULL)
1916	ret = -1;
1917    else {
1918	bas = xmlCreateURI();
1919	if (bas == NULL)
1920	    goto done;
1921	ret = xmlParseURIReference(bas, (const char *) base);
1922    }
1923    if (ret != 0) {
1924	if (ref)
1925	    val = xmlSaveUri(ref);
1926	goto done;
1927    }
1928    if (ref == NULL) {
1929	/*
1930	 * the base fragment must be ignored
1931	 */
1932	if (bas->fragment != NULL) {
1933	    xmlFree(bas->fragment);
1934	    bas->fragment = NULL;
1935	}
1936	val = xmlSaveUri(bas);
1937	goto done;
1938    }
1939
1940    /*
1941     * 2) If the path component is empty and the scheme, authority, and
1942     *    query components are undefined, then it is a reference to the
1943     *    current document and we are done.  Otherwise, the reference URI's
1944     *    query and fragment components are defined as found (or not found)
1945     *    within the URI reference and not inherited from the base URI.
1946     *
1947     *    NOTE that in modern browsers, the parsing differs from the above
1948     *    in the following aspect:  the query component is allowed to be
1949     *    defined while still treating this as a reference to the current
1950     *    document.
1951     */
1952    res = xmlCreateURI();
1953    if (res == NULL)
1954	goto done;
1955    if ((ref->scheme == NULL) && (ref->path == NULL) &&
1956	((ref->authority == NULL) && (ref->server == NULL))) {
1957	if (bas->scheme != NULL)
1958	    res->scheme = xmlMemStrdup(bas->scheme);
1959	if (bas->authority != NULL)
1960	    res->authority = xmlMemStrdup(bas->authority);
1961	else if (bas->server != NULL) {
1962	    res->server = xmlMemStrdup(bas->server);
1963	    if (bas->user != NULL)
1964		res->user = xmlMemStrdup(bas->user);
1965	    res->port = bas->port;
1966	}
1967	if (bas->path != NULL)
1968	    res->path = xmlMemStrdup(bas->path);
1969	if (ref->query_raw != NULL)
1970	    res->query_raw = xmlMemStrdup (ref->query_raw);
1971	else if (ref->query != NULL)
1972	    res->query = xmlMemStrdup(ref->query);
1973	else if (bas->query_raw != NULL)
1974	    res->query_raw = xmlMemStrdup(bas->query_raw);
1975	else if (bas->query != NULL)
1976	    res->query = xmlMemStrdup(bas->query);
1977	if (ref->fragment != NULL)
1978	    res->fragment = xmlMemStrdup(ref->fragment);
1979	goto step_7;
1980    }
1981
1982    /*
1983     * 3) If the scheme component is defined, indicating that the reference
1984     *    starts with a scheme name, then the reference is interpreted as an
1985     *    absolute URI and we are done.  Otherwise, the reference URI's
1986     *    scheme is inherited from the base URI's scheme component.
1987     */
1988    if (ref->scheme != NULL) {
1989	val = xmlSaveUri(ref);
1990	goto done;
1991    }
1992    if (bas->scheme != NULL)
1993	res->scheme = xmlMemStrdup(bas->scheme);
1994
1995    if (ref->query_raw != NULL)
1996	res->query_raw = xmlMemStrdup(ref->query_raw);
1997    else if (ref->query != NULL)
1998	res->query = xmlMemStrdup(ref->query);
1999    if (ref->fragment != NULL)
2000	res->fragment = xmlMemStrdup(ref->fragment);
2001
2002    /*
2003     * 4) If the authority component is defined, then the reference is a
2004     *    network-path and we skip to step 7.  Otherwise, the reference
2005     *    URI's authority is inherited from the base URI's authority
2006     *    component, which will also be undefined if the URI scheme does not
2007     *    use an authority component.
2008     */
2009    if ((ref->authority != NULL) || (ref->server != NULL)) {
2010	if (ref->authority != NULL)
2011	    res->authority = xmlMemStrdup(ref->authority);
2012	else {
2013	    res->server = xmlMemStrdup(ref->server);
2014	    if (ref->user != NULL)
2015		res->user = xmlMemStrdup(ref->user);
2016            res->port = ref->port;
2017	}
2018	if (ref->path != NULL)
2019	    res->path = xmlMemStrdup(ref->path);
2020	goto step_7;
2021    }
2022    if (bas->authority != NULL)
2023	res->authority = xmlMemStrdup(bas->authority);
2024    else if (bas->server != NULL) {
2025	res->server = xmlMemStrdup(bas->server);
2026	if (bas->user != NULL)
2027	    res->user = xmlMemStrdup(bas->user);
2028	res->port = bas->port;
2029    }
2030
2031    /*
2032     * 5) If the path component begins with a slash character ("/"), then
2033     *    the reference is an absolute-path and we skip to step 7.
2034     */
2035    if ((ref->path != NULL) && (ref->path[0] == '/')) {
2036	res->path = xmlMemStrdup(ref->path);
2037	goto step_7;
2038    }
2039
2040
2041    /*
2042     * 6) If this step is reached, then we are resolving a relative-path
2043     *    reference.  The relative path needs to be merged with the base
2044     *    URI's path.  Although there are many ways to do this, we will
2045     *    describe a simple method using a separate string buffer.
2046     *
2047     * Allocate a buffer large enough for the result string.
2048     */
2049    len = 2; /* extra / and 0 */
2050    if (ref->path != NULL)
2051	len += strlen(ref->path);
2052    if (bas->path != NULL)
2053	len += strlen(bas->path);
2054    res->path = (char *) xmlMallocAtomic(len);
2055    if (res->path == NULL) {
2056        xmlURIErrMemory("resolving URI against base\n");
2057	goto done;
2058    }
2059    res->path[0] = 0;
2060
2061    /*
2062     * a) All but the last segment of the base URI's path component is
2063     *    copied to the buffer.  In other words, any characters after the
2064     *    last (right-most) slash character, if any, are excluded.
2065     */
2066    cur = 0;
2067    out = 0;
2068    if (bas->path != NULL) {
2069	while (bas->path[cur] != 0) {
2070	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2071		cur++;
2072	    if (bas->path[cur] == 0)
2073		break;
2074
2075	    cur++;
2076	    while (out < cur) {
2077		res->path[out] = bas->path[out];
2078		out++;
2079	    }
2080	}
2081    }
2082    res->path[out] = 0;
2083
2084    /*
2085     * b) The reference's path component is appended to the buffer
2086     *    string.
2087     */
2088    if (ref->path != NULL && ref->path[0] != 0) {
2089	indx = 0;
2090	/*
2091	 * Ensure the path includes a '/'
2092	 */
2093	if ((out == 0) && (bas->server != NULL))
2094	    res->path[out++] = '/';
2095	while (ref->path[indx] != 0) {
2096	    res->path[out++] = ref->path[indx++];
2097	}
2098    }
2099    res->path[out] = 0;
2100
2101    /*
2102     * Steps c) to h) are really path normalization steps
2103     */
2104    xmlNormalizeURIPath(res->path);
2105
2106step_7:
2107
2108    /*
2109     * 7) The resulting URI components, including any inherited from the
2110     *    base URI, are recombined to give the absolute form of the URI
2111     *    reference.
2112     */
2113    val = xmlSaveUri(res);
2114
2115done:
2116    if (ref != NULL)
2117	xmlFreeURI(ref);
2118    if (bas != NULL)
2119	xmlFreeURI(bas);
2120    if (res != NULL)
2121	xmlFreeURI(res);
2122    return(val);
2123}
2124
2125/**
2126 * xmlBuildRelativeURI:
2127 * @URI:  the URI reference under consideration
2128 * @base:  the base value
2129 *
2130 * Expresses the URI of the reference in terms relative to the
2131 * base.  Some examples of this operation include:
2132 *     base = "http://site1.com/docs/book1.html"
2133 *        URI input                        URI returned
2134 *     docs/pic1.gif                    pic1.gif
2135 *     docs/img/pic1.gif                img/pic1.gif
2136 *     img/pic1.gif                     ../img/pic1.gif
2137 *     http://site1.com/docs/pic1.gif   pic1.gif
2138 *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2139 *
2140 *     base = "docs/book1.html"
2141 *        URI input                        URI returned
2142 *     docs/pic1.gif                    pic1.gif
2143 *     docs/img/pic1.gif                img/pic1.gif
2144 *     img/pic1.gif                     ../img/pic1.gif
2145 *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2146 *
2147 *
2148 * Note: if the URI reference is really wierd or complicated, it may be
2149 *       worthwhile to first convert it into a "nice" one by calling
2150 *       xmlBuildURI (using 'base') before calling this routine,
2151 *       since this routine (for reasonable efficiency) assumes URI has
2152 *       already been through some validation.
2153 *
2154 * Returns a new URI string (to be freed by the caller) or NULL in case
2155 * error.
2156 */
2157xmlChar *
2158xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2159{
2160    xmlChar *val = NULL;
2161    int ret;
2162    int ix;
2163    int pos = 0;
2164    int nbslash = 0;
2165    int len;
2166    xmlURIPtr ref = NULL;
2167    xmlURIPtr bas = NULL;
2168    xmlChar *bptr, *uptr, *vptr;
2169    int remove_path = 0;
2170
2171    if ((URI == NULL) || (*URI == 0))
2172	return NULL;
2173
2174    /*
2175     * First parse URI into a standard form
2176     */
2177    ref = xmlCreateURI ();
2178    if (ref == NULL)
2179	return NULL;
2180    /* If URI not already in "relative" form */
2181    if (URI[0] != '.') {
2182	ret = xmlParseURIReference (ref, (const char *) URI);
2183	if (ret != 0)
2184	    goto done;		/* Error in URI, return NULL */
2185    } else
2186	ref->path = (char *)xmlStrdup(URI);
2187
2188    /*
2189     * Next parse base into the same standard form
2190     */
2191    if ((base == NULL) || (*base == 0)) {
2192	val = xmlStrdup (URI);
2193	goto done;
2194    }
2195    bas = xmlCreateURI ();
2196    if (bas == NULL)
2197	goto done;
2198    if (base[0] != '.') {
2199	ret = xmlParseURIReference (bas, (const char *) base);
2200	if (ret != 0)
2201	    goto done;		/* Error in base, return NULL */
2202    } else
2203	bas->path = (char *)xmlStrdup(base);
2204
2205    /*
2206     * If the scheme / server on the URI differs from the base,
2207     * just return the URI
2208     */
2209    if ((ref->scheme != NULL) &&
2210	((bas->scheme == NULL) ||
2211	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2212	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2213	val = xmlStrdup (URI);
2214	goto done;
2215    }
2216    if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2217	val = xmlStrdup(BAD_CAST "");
2218	goto done;
2219    }
2220    if (bas->path == NULL) {
2221	val = xmlStrdup((xmlChar *)ref->path);
2222	goto done;
2223    }
2224    if (ref->path == NULL) {
2225        ref->path = (char *) "/";
2226	remove_path = 1;
2227    }
2228
2229    /*
2230     * At this point (at last!) we can compare the two paths
2231     *
2232     * First we take care of the special case where either of the
2233     * two path components may be missing (bug 316224)
2234     */
2235    if (bas->path == NULL) {
2236	if (ref->path != NULL) {
2237	    uptr = (xmlChar *) ref->path;
2238	    if (*uptr == '/')
2239		uptr++;
2240	    /* exception characters from xmlSaveUri */
2241	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2242	}
2243	goto done;
2244    }
2245    bptr = (xmlChar *)bas->path;
2246    if (ref->path == NULL) {
2247	for (ix = 0; bptr[ix] != 0; ix++) {
2248	    if (bptr[ix] == '/')
2249		nbslash++;
2250	}
2251	uptr = NULL;
2252	len = 1;	/* this is for a string terminator only */
2253    } else {
2254    /*
2255     * Next we compare the two strings and find where they first differ
2256     */
2257	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2258            pos += 2;
2259	if ((*bptr == '.') && (bptr[1] == '/'))
2260            bptr += 2;
2261	else if ((*bptr == '/') && (ref->path[pos] != '/'))
2262	    bptr++;
2263	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2264	    pos++;
2265
2266	if (bptr[pos] == ref->path[pos]) {
2267	    val = xmlStrdup(BAD_CAST "");
2268	    goto done;		/* (I can't imagine why anyone would do this) */
2269	}
2270
2271	/*
2272	 * In URI, "back up" to the last '/' encountered.  This will be the
2273	 * beginning of the "unique" suffix of URI
2274	 */
2275	ix = pos;
2276	if ((ref->path[ix] == '/') && (ix > 0))
2277	    ix--;
2278	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2279	    ix -= 2;
2280	for (; ix > 0; ix--) {
2281	    if (ref->path[ix] == '/')
2282		break;
2283	}
2284	if (ix == 0) {
2285	    uptr = (xmlChar *)ref->path;
2286	} else {
2287	    ix++;
2288	    uptr = (xmlChar *)&ref->path[ix];
2289	}
2290
2291	/*
2292	 * In base, count the number of '/' from the differing point
2293	 */
2294	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2295	    for (; bptr[ix] != 0; ix++) {
2296		if (bptr[ix] == '/')
2297		    nbslash++;
2298	    }
2299	}
2300	len = xmlStrlen (uptr) + 1;
2301    }
2302
2303    if (nbslash == 0) {
2304	if (uptr != NULL)
2305	    /* exception characters from xmlSaveUri */
2306	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2307	goto done;
2308    }
2309
2310    /*
2311     * Allocate just enough space for the returned string -
2312     * length of the remainder of the URI, plus enough space
2313     * for the "../" groups, plus one for the terminator
2314     */
2315    val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2316    if (val == NULL) {
2317        xmlURIErrMemory("building relative URI\n");
2318	goto done;
2319    }
2320    vptr = val;
2321    /*
2322     * Put in as many "../" as needed
2323     */
2324    for (; nbslash>0; nbslash--) {
2325	*vptr++ = '.';
2326	*vptr++ = '.';
2327	*vptr++ = '/';
2328    }
2329    /*
2330     * Finish up with the end of the URI
2331     */
2332    if (uptr != NULL) {
2333        if ((vptr > val) && (len > 0) &&
2334	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2335	    memcpy (vptr, uptr + 1, len - 1);
2336	    vptr[len - 2] = 0;
2337	} else {
2338	    memcpy (vptr, uptr, len);
2339	    vptr[len - 1] = 0;
2340	}
2341    } else {
2342	vptr[len - 1] = 0;
2343    }
2344
2345    /* escape the freshly-built path */
2346    vptr = val;
2347	/* exception characters from xmlSaveUri */
2348    val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2349    xmlFree(vptr);
2350
2351done:
2352    /*
2353     * Free the working variables
2354     */
2355    if (remove_path != 0)
2356        ref->path = NULL;
2357    if (ref != NULL)
2358	xmlFreeURI (ref);
2359    if (bas != NULL)
2360	xmlFreeURI (bas);
2361
2362    return val;
2363}
2364
2365/**
2366 * xmlCanonicPath:
2367 * @path:  the resource locator in a filesystem notation
2368 *
2369 * Constructs a canonic path from the specified path.
2370 *
2371 * Returns a new canonic path, or a duplicate of the path parameter if the
2372 * construction fails. The caller is responsible for freeing the memory occupied
2373 * by the returned string. If there is insufficient memory available, or the
2374 * argument is NULL, the function returns NULL.
2375 */
2376#define IS_WINDOWS_PATH(p)					\
2377	((p != NULL) &&						\
2378	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2379	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2380	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2381xmlChar *
2382xmlCanonicPath(const xmlChar *path)
2383{
2384/*
2385 * For Windows implementations, additional work needs to be done to
2386 * replace backslashes in pathnames with "forward slashes"
2387 */
2388#if defined(_WIN32) && !defined(__CYGWIN__)
2389    int len = 0;
2390    int i = 0;
2391    xmlChar *p = NULL;
2392#endif
2393    xmlURIPtr uri;
2394    xmlChar *ret;
2395    const xmlChar *absuri;
2396
2397    if (path == NULL)
2398	return(NULL);
2399
2400#if defined(_WIN32)
2401    /*
2402     * We must not change the backslashes to slashes if the the path
2403     * starts with \\?\
2404     * Those paths can be up to 32k characters long.
2405     * Was added specifically for OpenOffice, those paths can't be converted
2406     * to URIs anyway.
2407     */
2408    if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2409        (path[3] == '\\') )
2410	return xmlStrdup((const xmlChar *) path);
2411#endif
2412
2413	/* sanitize filename starting with // so it can be used as URI */
2414    if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2415        path++;
2416
2417    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2418	xmlFreeURI(uri);
2419	return xmlStrdup(path);
2420    }
2421
2422    /* Check if this is an "absolute uri" */
2423    absuri = xmlStrstr(path, BAD_CAST "://");
2424    if (absuri != NULL) {
2425        int l, j;
2426	unsigned char c;
2427	xmlChar *escURI;
2428
2429        /*
2430	 * this looks like an URI where some parts have not been
2431	 * escaped leading to a parsing problem.  Check that the first
2432	 * part matches a protocol.
2433	 */
2434	l = absuri - path;
2435	/* Bypass if first part (part before the '://') is > 20 chars */
2436	if ((l <= 0) || (l > 20))
2437	    goto path_processing;
2438	/* Bypass if any non-alpha characters are present in first part */
2439	for (j = 0;j < l;j++) {
2440	    c = path[j];
2441	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2442	        goto path_processing;
2443	}
2444
2445	/* Escape all except the characters specified in the supplied path */
2446        escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2447	if (escURI != NULL) {
2448	    /* Try parsing the escaped path */
2449	    uri = xmlParseURI((const char *) escURI);
2450	    /* If successful, return the escaped string */
2451	    if (uri != NULL) {
2452	        xmlFreeURI(uri);
2453		return escURI;
2454	    }
2455	}
2456    }
2457
2458path_processing:
2459/* For Windows implementations, replace backslashes with 'forward slashes' */
2460#if defined(_WIN32) && !defined(__CYGWIN__)
2461    /*
2462     * Create a URI structure
2463     */
2464    uri = xmlCreateURI();
2465    if (uri == NULL) {		/* Guard against 'out of memory' */
2466        return(NULL);
2467    }
2468
2469    len = xmlStrlen(path);
2470    if ((len > 2) && IS_WINDOWS_PATH(path)) {
2471        /* make the scheme 'file' */
2472	uri->scheme = xmlStrdup(BAD_CAST "file");
2473	/* allocate space for leading '/' + path + string terminator */
2474	uri->path = xmlMallocAtomic(len + 2);
2475	if (uri->path == NULL) {
2476	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2477	    return(NULL);
2478	}
2479	/* Put in leading '/' plus path */
2480	uri->path[0] = '/';
2481	p = uri->path + 1;
2482	strncpy(p, path, len + 1);
2483    } else {
2484	uri->path = xmlStrdup(path);
2485	if (uri->path == NULL) {
2486	    xmlFreeURI(uri);
2487	    return(NULL);
2488	}
2489	p = uri->path;
2490    }
2491    /* Now change all occurences of '\' to '/' */
2492    while (*p != '\0') {
2493	if (*p == '\\')
2494	    *p = '/';
2495	p++;
2496    }
2497
2498    if (uri->scheme == NULL) {
2499	ret = xmlStrdup((const xmlChar *) uri->path);
2500    } else {
2501	ret = xmlSaveUri(uri);
2502    }
2503
2504    xmlFreeURI(uri);
2505#else
2506    ret = xmlStrdup((const xmlChar *) path);
2507#endif
2508    return(ret);
2509}
2510
2511/**
2512 * xmlPathToURI:
2513 * @path:  the resource locator in a filesystem notation
2514 *
2515 * Constructs an URI expressing the existing path
2516 *
2517 * Returns a new URI, or a duplicate of the path parameter if the
2518 * construction fails. The caller is responsible for freeing the memory
2519 * occupied by the returned string. If there is insufficient memory available,
2520 * or the argument is NULL, the function returns NULL.
2521 */
2522xmlChar *
2523xmlPathToURI(const xmlChar *path)
2524{
2525    xmlURIPtr uri;
2526    xmlURI temp;
2527    xmlChar *ret, *cal;
2528
2529    if (path == NULL)
2530        return(NULL);
2531
2532    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2533	xmlFreeURI(uri);
2534	return xmlStrdup(path);
2535    }
2536    cal = xmlCanonicPath(path);
2537    if (cal == NULL)
2538        return(NULL);
2539#if defined(_WIN32) && !defined(__CYGWIN__)
2540    /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2541       If 'cal' is a valid URI allready then we are done here, as continuing would make
2542       it invalid. */
2543    if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2544	xmlFreeURI(uri);
2545	return cal;
2546    }
2547    /* 'cal' can contain a relative path with backslashes. If that is processed
2548       by xmlSaveURI, they will be escaped and the external entity loader machinery
2549       will fail. So convert them to slashes. Misuse 'ret' for walking. */
2550    ret = cal;
2551    while (*ret != '\0') {
2552	if (*ret == '\\')
2553	    *ret = '/';
2554	ret++;
2555    }
2556#endif
2557    memset(&temp, 0, sizeof(temp));
2558    temp.path = (char *) cal;
2559    ret = xmlSaveUri(&temp);
2560    xmlFree(cal);
2561    return(ret);
2562}
2563#define bottom_uri
2564#include "elfgcchack.h"
2565