1/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFCs 3986, 2732 and 2373
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11#define IN_LIBXML
12#include "libxml.h"
13
14#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
18#include <libxml/globals.h>
19#include <libxml/xmlerror.h>
20
21static void xmlCleanURI(xmlURIPtr uri);
22
23/*
24 * Old rule from 2396 used in legacy handling code
25 * alpha    = lowalpha | upalpha
26 */
27#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
28
29
30/*
31 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33 *            "u" | "v" | "w" | "x" | "y" | "z"
34 */
35
36#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
37
38/*
39 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41 *           "U" | "V" | "W" | "X" | "Y" | "Z"
42 */
43#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
44
45#ifdef IS_DIGIT
46#undef IS_DIGIT
47#endif
48/*
49 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50 */
51#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
52
53/*
54 * alphanum = alpha | digit
55 */
56
57#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
58
59/*
60 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
61 */
62
63#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
64    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
65    ((x) == '(') || ((x) == ')'))
66
67/*
68 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
69 */
70
71#define IS_UNWISE(p)                                                    \
72      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
73       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
74       ((*(p) == ']')) || ((*(p) == '`')))
75/*
76 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
77 *            "[" | "]"
78 */
79
80#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
81        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
82        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
83        ((x) == ']'))
84
85/*
86 * unreserved = alphanum | mark
87 */
88
89#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
90
91/*
92 * Skip to next pointer char, handle escaped sequences
93 */
94
95#define NEXT(p) ((*p == '%')? p += 3 : p++)
96
97/*
98 * Productions from the spec.
99 *
100 *    authority     = server | reg_name
101 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
102 *                        ";" | ":" | "@" | "&" | "=" | "+" )
103 *
104 * path          = [ abs_path | opaque_part ]
105 */
106
107#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
108
109/************************************************************************
110 *									*
111 *                         RFC 3986 parser				*
112 *									*
113 ************************************************************************/
114
115#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
116#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
117                      ((*(p) >= 'A') && (*(p) <= 'Z')))
118#define ISA_HEXDIG(p)							\
119       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
120        ((*(p) >= 'A') && (*(p) <= 'F')))
121
122/*
123 *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
124 *                     / "*" / "+" / "," / ";" / "="
125 */
126#define ISA_SUB_DELIM(p)						\
127      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
128       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
129       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
130       ((*(p) == '=')))
131
132/*
133 *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
134 */
135#define ISA_GEN_DELIM(p)						\
136      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
137       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
138       ((*(p) == '@')))
139
140/*
141 *    reserved      = gen-delims / sub-delims
142 */
143#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
144
145/*
146 *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
147 */
148#define ISA_UNRESERVED(p)						\
149      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
150       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
151
152/*
153 *    pct-encoded   = "%" HEXDIG HEXDIG
154 */
155#define ISA_PCT_ENCODED(p)						\
156     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
157
158/*
159 *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
160 */
161#define ISA_PCHAR(p)							\
162     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
163      ((*(p) == ':')) || ((*(p) == '@')))
164
165/**
166 * xmlParse3986Scheme:
167 * @uri:  pointer to an URI structure
168 * @str:  pointer to the string to analyze
169 *
170 * Parse an URI scheme
171 *
172 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
173 *
174 * Returns 0 or the error code
175 */
176static int
177xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
178    const char *cur;
179
180    if (str == NULL)
181	return(-1);
182
183    cur = *str;
184    if (!ISA_ALPHA(cur))
185	return(2);
186    cur++;
187    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
188           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
189    if (uri != NULL) {
190	if (uri->scheme != NULL) xmlFree(uri->scheme);
191	uri->scheme = STRNDUP(*str, cur - *str);
192    }
193    *str = cur;
194    return(0);
195}
196
197/**
198 * xmlParse3986Fragment:
199 * @uri:  pointer to an URI structure
200 * @str:  pointer to the string to analyze
201 *
202 * Parse the query part of an URI
203 *
204 * fragment      = *( pchar / "/" / "?" )
205 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
206 *       in the fragment identifier but this is used very broadly for
207 *       xpointer scheme selection, so we are allowing it here to not break
208 *       for example all the DocBook processing chains.
209 *
210 * Returns 0 or the error code
211 */
212static int
213xmlParse3986Fragment(xmlURIPtr uri, const char **str)
214{
215    const char *cur;
216
217    if (str == NULL)
218        return (-1);
219
220    cur = *str;
221
222    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
223           (*cur == '[') || (*cur == ']') ||
224           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
225        NEXT(cur);
226    if (uri != NULL) {
227        if (uri->fragment != NULL)
228            xmlFree(uri->fragment);
229	if (uri->cleanup & 2)
230	    uri->fragment = STRNDUP(*str, cur - *str);
231	else
232	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
233    }
234    *str = cur;
235    return (0);
236}
237
238/**
239 * xmlParse3986Query:
240 * @uri:  pointer to an URI structure
241 * @str:  pointer to the string to analyze
242 *
243 * Parse the query part of an URI
244 *
245 * query = *uric
246 *
247 * Returns 0 or the error code
248 */
249static int
250xmlParse3986Query(xmlURIPtr uri, const char **str)
251{
252    const char *cur;
253
254    if (str == NULL)
255        return (-1);
256
257    cur = *str;
258
259    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
260           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261        NEXT(cur);
262    if (uri != NULL) {
263        if (uri->query != NULL)
264            xmlFree(uri->query);
265	if (uri->cleanup & 2)
266	    uri->query = STRNDUP(*str, cur - *str);
267	else
268	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
269
270	/* Save the raw bytes of the query as well.
271	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
272	 */
273	if (uri->query_raw != NULL)
274	    xmlFree (uri->query_raw);
275	uri->query_raw = STRNDUP (*str, cur - *str);
276    }
277    *str = cur;
278    return (0);
279}
280
281/**
282 * xmlParse3986Port:
283 * @uri:  pointer to an URI structure
284 * @str:  the string to analyze
285 *
286 * Parse a port  part and fills in the appropriate fields
287 * of the @uri structure
288 *
289 * port          = *DIGIT
290 *
291 * Returns 0 or the error code
292 */
293static int
294xmlParse3986Port(xmlURIPtr uri, const char **str)
295{
296    const char *cur = *str;
297
298    if (ISA_DIGIT(cur)) {
299	if (uri != NULL)
300	    uri->port = 0;
301	while (ISA_DIGIT(cur)) {
302	    if (uri != NULL)
303		uri->port = uri->port * 10 + (*cur - '0');
304	    cur++;
305	}
306	*str = cur;
307	return(0);
308    }
309    return(1);
310}
311
312/**
313 * xmlParse3986Userinfo:
314 * @uri:  pointer to an URI structure
315 * @str:  the string to analyze
316 *
317 * Parse an user informations part and fills in the appropriate fields
318 * of the @uri structure
319 *
320 * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
321 *
322 * Returns 0 or the error code
323 */
324static int
325xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
326{
327    const char *cur;
328
329    cur = *str;
330    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
331           ISA_SUB_DELIM(cur) || (*cur == ':'))
332	NEXT(cur);
333    if (*cur == '@') {
334	if (uri != NULL) {
335	    if (uri->user != NULL) xmlFree(uri->user);
336	    if (uri->cleanup & 2)
337		uri->user = STRNDUP(*str, cur - *str);
338	    else
339		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
340	}
341	*str = cur;
342	return(0);
343    }
344    return(1);
345}
346
347/**
348 * xmlParse3986DecOctet:
349 * @str:  the string to analyze
350 *
351 *    dec-octet     = DIGIT                 ; 0-9
352 *                  / %x31-39 DIGIT         ; 10-99
353 *                  / "1" 2DIGIT            ; 100-199
354 *                  / "2" %x30-34 DIGIT     ; 200-249
355 *                  / "25" %x30-35          ; 250-255
356 *
357 * Skip a dec-octet.
358 *
359 * Returns 0 if found and skipped, 1 otherwise
360 */
361static int
362xmlParse3986DecOctet(const char **str) {
363    const char *cur = *str;
364
365    if (!(ISA_DIGIT(cur)))
366        return(1);
367    if (!ISA_DIGIT(cur+1))
368	cur++;
369    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
370	cur += 2;
371    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
372	cur += 3;
373    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
374	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
375	cur += 3;
376    else if ((*cur == '2') && (*(cur + 1) == '5') &&
377	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
378	cur += 3;
379    else
380        return(1);
381    *str = cur;
382    return(0);
383}
384/**
385 * xmlParse3986Host:
386 * @uri:  pointer to an URI structure
387 * @str:  the string to analyze
388 *
389 * Parse an host part and fills in the appropriate fields
390 * of the @uri structure
391 *
392 * host          = IP-literal / IPv4address / reg-name
393 * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
394 * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
395 * reg-name      = *( unreserved / pct-encoded / sub-delims )
396 *
397 * Returns 0 or the error code
398 */
399static int
400xmlParse3986Host(xmlURIPtr uri, const char **str)
401{
402    const char *cur = *str;
403    const char *host;
404
405    host = cur;
406    /*
407     * IPv6 and future adressing scheme are enclosed between brackets
408     */
409    if (*cur == '[') {
410        cur++;
411	while ((*cur != ']') && (*cur != 0))
412	    cur++;
413	if (*cur != ']')
414	    return(1);
415	cur++;
416	goto found;
417    }
418    /*
419     * try to parse an IPv4
420     */
421    if (ISA_DIGIT(cur)) {
422        if (xmlParse3986DecOctet(&cur) != 0)
423	    goto not_ipv4;
424	if (*cur != '.')
425	    goto not_ipv4;
426	cur++;
427        if (xmlParse3986DecOctet(&cur) != 0)
428	    goto not_ipv4;
429	if (*cur != '.')
430	    goto not_ipv4;
431        if (xmlParse3986DecOctet(&cur) != 0)
432	    goto not_ipv4;
433	if (*cur != '.')
434	    goto not_ipv4;
435        if (xmlParse3986DecOctet(&cur) != 0)
436	    goto not_ipv4;
437	goto found;
438not_ipv4:
439        cur = *str;
440    }
441    /*
442     * then this should be a hostname which can be empty
443     */
444    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
445        NEXT(cur);
446found:
447    if (uri != NULL) {
448	if (uri->authority != NULL) xmlFree(uri->authority);
449	uri->authority = NULL;
450	if (uri->server != NULL) xmlFree(uri->server);
451	if (cur != host) {
452	    if (uri->cleanup & 2)
453		uri->server = STRNDUP(host, cur - host);
454	    else
455		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
456	} else
457	    uri->server = NULL;
458    }
459    *str = cur;
460    return(0);
461}
462
463/**
464 * xmlParse3986Authority:
465 * @uri:  pointer to an URI structure
466 * @str:  the string to analyze
467 *
468 * Parse an authority part and fills in the appropriate fields
469 * of the @uri structure
470 *
471 * authority     = [ userinfo "@" ] host [ ":" port ]
472 *
473 * Returns 0 or the error code
474 */
475static int
476xmlParse3986Authority(xmlURIPtr uri, const char **str)
477{
478    const char *cur;
479    int ret;
480
481    cur = *str;
482    /*
483     * try to parse an userinfo and check for the trailing @
484     */
485    ret = xmlParse3986Userinfo(uri, &cur);
486    if ((ret != 0) || (*cur != '@'))
487        cur = *str;
488    else
489        cur++;
490    ret = xmlParse3986Host(uri, &cur);
491    if (ret != 0) return(ret);
492    if (*cur == ':') {
493        cur++;
494        ret = xmlParse3986Port(uri, &cur);
495	if (ret != 0) return(ret);
496    }
497    *str = cur;
498    return(0);
499}
500
501/**
502 * xmlParse3986Segment:
503 * @str:  the string to analyze
504 * @forbid: an optional forbidden character
505 * @empty: allow an empty segment
506 *
507 * Parse a segment and fills in the appropriate fields
508 * of the @uri structure
509 *
510 * segment       = *pchar
511 * segment-nz    = 1*pchar
512 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
513 *               ; non-zero-length segment without any colon ":"
514 *
515 * Returns 0 or the error code
516 */
517static int
518xmlParse3986Segment(const char **str, char forbid, int empty)
519{
520    const char *cur;
521
522    cur = *str;
523    if (!ISA_PCHAR(cur)) {
524        if (empty)
525	    return(0);
526	return(1);
527    }
528    while (ISA_PCHAR(cur) && (*cur != forbid))
529        NEXT(cur);
530    *str = cur;
531    return (0);
532}
533
534/**
535 * xmlParse3986PathAbEmpty:
536 * @uri:  pointer to an URI structure
537 * @str:  the string to analyze
538 *
539 * Parse an path absolute or empty and fills in the appropriate fields
540 * of the @uri structure
541 *
542 * path-abempty  = *( "/" segment )
543 *
544 * Returns 0 or the error code
545 */
546static int
547xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
548{
549    const char *cur;
550    int ret;
551
552    cur = *str;
553
554    while (*cur == '/') {
555        cur++;
556	ret = xmlParse3986Segment(&cur, 0, 1);
557	if (ret != 0) return(ret);
558    }
559    if (uri != NULL) {
560	if (uri->path != NULL) xmlFree(uri->path);
561	if (uri->cleanup & 2)
562	    uri->path = STRNDUP(*str, cur - *str);
563	else
564	    uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
565    }
566    *str = cur;
567    return (0);
568}
569
570/**
571 * xmlParse3986PathAbsolute:
572 * @uri:  pointer to an URI structure
573 * @str:  the string to analyze
574 *
575 * Parse an path absolute and fills in the appropriate fields
576 * of the @uri structure
577 *
578 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
579 *
580 * Returns 0 or the error code
581 */
582static int
583xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
584{
585    const char *cur;
586    int ret;
587
588    cur = *str;
589
590    if (*cur != '/')
591        return(1);
592    cur++;
593    ret = xmlParse3986Segment(&cur, 0, 0);
594    if (ret == 0) {
595	while (*cur == '/') {
596	    cur++;
597	    ret = xmlParse3986Segment(&cur, 0, 1);
598	    if (ret != 0) return(ret);
599	}
600    }
601    if (uri != NULL) {
602	if (uri->path != NULL) xmlFree(uri->path);
603	if (uri->cleanup & 2)
604	    uri->path = STRNDUP(*str, cur - *str);
605	else
606	    uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
607    }
608    *str = cur;
609    return (0);
610}
611
612/**
613 * xmlParse3986PathRootless:
614 * @uri:  pointer to an URI structure
615 * @str:  the string to analyze
616 *
617 * Parse an path without root and fills in the appropriate fields
618 * of the @uri structure
619 *
620 * path-rootless = segment-nz *( "/" segment )
621 *
622 * Returns 0 or the error code
623 */
624static int
625xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
626{
627    const char *cur;
628    int ret;
629
630    cur = *str;
631
632    ret = xmlParse3986Segment(&cur, 0, 0);
633    if (ret != 0) return(ret);
634    while (*cur == '/') {
635        cur++;
636	ret = xmlParse3986Segment(&cur, 0, 1);
637	if (ret != 0) return(ret);
638    }
639    if (uri != NULL) {
640	if (uri->path != NULL) xmlFree(uri->path);
641	if (uri->cleanup & 2)
642	    uri->path = STRNDUP(*str, cur - *str);
643	else
644	    uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
645    }
646    *str = cur;
647    return (0);
648}
649
650/**
651 * xmlParse3986PathNoScheme:
652 * @uri:  pointer to an URI structure
653 * @str:  the string to analyze
654 *
655 * Parse an path which is not a scheme and fills in the appropriate fields
656 * of the @uri structure
657 *
658 * path-noscheme = segment-nz-nc *( "/" segment )
659 *
660 * Returns 0 or the error code
661 */
662static int
663xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
664{
665    const char *cur;
666    int ret;
667
668    cur = *str;
669
670    ret = xmlParse3986Segment(&cur, ':', 0);
671    if (ret != 0) return(ret);
672    while (*cur == '/') {
673        cur++;
674	ret = xmlParse3986Segment(&cur, 0, 1);
675	if (ret != 0) return(ret);
676    }
677    if (uri != NULL) {
678	if (uri->path != NULL) xmlFree(uri->path);
679	if (uri->cleanup & 2)
680	    uri->path = STRNDUP(*str, cur - *str);
681	else
682	    uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
683    }
684    *str = cur;
685    return (0);
686}
687
688/**
689 * xmlParse3986HierPart:
690 * @uri:  pointer to an URI structure
691 * @str:  the string to analyze
692 *
693 * Parse an hierarchical part and fills in the appropriate fields
694 * of the @uri structure
695 *
696 * hier-part     = "//" authority path-abempty
697 *                / path-absolute
698 *                / path-rootless
699 *                / path-empty
700 *
701 * Returns 0 or the error code
702 */
703static int
704xmlParse3986HierPart(xmlURIPtr uri, const char **str)
705{
706    const char *cur;
707    int ret;
708
709    cur = *str;
710
711    if ((*cur == '/') && (*(cur + 1) == '/')) {
712        cur += 2;
713	ret = xmlParse3986Authority(uri, &cur);
714	if (ret != 0) return(ret);
715	ret = xmlParse3986PathAbEmpty(uri, &cur);
716	if (ret != 0) return(ret);
717	*str = cur;
718	return(0);
719    } else if (*cur == '/') {
720        ret = xmlParse3986PathAbsolute(uri, &cur);
721	if (ret != 0) return(ret);
722    } else if (ISA_PCHAR(cur)) {
723        ret = xmlParse3986PathRootless(uri, &cur);
724	if (ret != 0) return(ret);
725    } else {
726	/* path-empty is effectively empty */
727	if (uri != NULL) {
728	    if (uri->path != NULL) xmlFree(uri->path);
729	    uri->path = NULL;
730	}
731    }
732    *str = cur;
733    return (0);
734}
735
736/**
737 * xmlParse3986RelativeRef:
738 * @uri:  pointer to an URI structure
739 * @str:  the string to analyze
740 *
741 * Parse an URI string and fills in the appropriate fields
742 * of the @uri structure
743 *
744 * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
745 * relative-part = "//" authority path-abempty
746 *               / path-absolute
747 *               / path-noscheme
748 *               / path-empty
749 *
750 * Returns 0 or the error code
751 */
752static int
753xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
754    int ret;
755
756    if ((*str == '/') && (*(str + 1) == '/')) {
757        str += 2;
758	ret = xmlParse3986Authority(uri, &str);
759	if (ret != 0) return(ret);
760	ret = xmlParse3986PathAbEmpty(uri, &str);
761	if (ret != 0) return(ret);
762    } else if (*str == '/') {
763	ret = xmlParse3986PathAbsolute(uri, &str);
764	if (ret != 0) return(ret);
765    } else if (ISA_PCHAR(str)) {
766        ret = xmlParse3986PathNoScheme(uri, &str);
767	if (ret != 0) return(ret);
768    } else {
769	/* path-empty is effectively empty */
770	if (uri != NULL) {
771	    if (uri->path != NULL) xmlFree(uri->path);
772	    uri->path = NULL;
773	}
774    }
775
776    if (*str == '?') {
777	str++;
778	ret = xmlParse3986Query(uri, &str);
779	if (ret != 0) return(ret);
780    }
781    if (*str == '#') {
782	str++;
783	ret = xmlParse3986Fragment(uri, &str);
784	if (ret != 0) return(ret);
785    }
786    if (*str != 0) {
787	xmlCleanURI(uri);
788	return(1);
789    }
790    return(0);
791}
792
793
794/**
795 * xmlParse3986URI:
796 * @uri:  pointer to an URI structure
797 * @str:  the string to analyze
798 *
799 * Parse an URI string and fills in the appropriate fields
800 * of the @uri structure
801 *
802 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
803 *
804 * Returns 0 or the error code
805 */
806static int
807xmlParse3986URI(xmlURIPtr uri, const char *str) {
808    int ret;
809
810    ret = xmlParse3986Scheme(uri, &str);
811    if (ret != 0) return(ret);
812    if (*str != ':') {
813	return(1);
814    }
815    str++;
816    ret = xmlParse3986HierPart(uri, &str);
817    if (ret != 0) return(ret);
818    if (*str == '?') {
819	str++;
820	ret = xmlParse3986Query(uri, &str);
821	if (ret != 0) return(ret);
822    }
823    if (*str == '#') {
824	str++;
825	ret = xmlParse3986Fragment(uri, &str);
826	if (ret != 0) return(ret);
827    }
828    if (*str != 0) {
829	xmlCleanURI(uri);
830	return(1);
831    }
832    return(0);
833}
834
835/**
836 * xmlParse3986URIReference:
837 * @uri:  pointer to an URI structure
838 * @str:  the string to analyze
839 *
840 * Parse an URI reference string and fills in the appropriate fields
841 * of the @uri structure
842 *
843 * URI-reference = URI / relative-ref
844 *
845 * Returns 0 or the error code
846 */
847static int
848xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
849    int ret;
850
851    if (str == NULL)
852	return(-1);
853    xmlCleanURI(uri);
854
855    /*
856     * Try first to parse absolute refs, then fallback to relative if
857     * it fails.
858     */
859    ret = xmlParse3986URI(uri, str);
860    if (ret != 0) {
861	xmlCleanURI(uri);
862        ret = xmlParse3986RelativeRef(uri, str);
863	if (ret != 0) {
864	    xmlCleanURI(uri);
865	    return(ret);
866	}
867    }
868    return(0);
869}
870
871/**
872 * xmlParseURI:
873 * @str:  the URI string to analyze
874 *
875 * Parse an URI based on RFC 3986
876 *
877 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
878 *
879 * Returns a newly built xmlURIPtr or NULL in case of error
880 */
881xmlURIPtr
882xmlParseURI(const char *str) {
883    xmlURIPtr uri;
884    int ret;
885
886    if (str == NULL)
887	return(NULL);
888    uri = xmlCreateURI();
889    if (uri != NULL) {
890	ret = xmlParse3986URIReference(uri, str);
891        if (ret) {
892	    xmlFreeURI(uri);
893	    return(NULL);
894	}
895    }
896    return(uri);
897}
898
899/**
900 * xmlParseURIReference:
901 * @uri:  pointer to an URI structure
902 * @str:  the string to analyze
903 *
904 * Parse an URI reference string based on RFC 3986 and fills in the
905 * appropriate fields of the @uri structure
906 *
907 * URI-reference = URI / relative-ref
908 *
909 * Returns 0 or the error code
910 */
911int
912xmlParseURIReference(xmlURIPtr uri, const char *str) {
913    return(xmlParse3986URIReference(uri, str));
914}
915
916/**
917 * xmlParseURIRaw:
918 * @str:  the URI string to analyze
919 * @raw:  if 1 unescaping of URI pieces are disabled
920 *
921 * Parse an URI but allows to keep intact the original fragments.
922 *
923 * URI-reference = URI / relative-ref
924 *
925 * Returns a newly built xmlURIPtr or NULL in case of error
926 */
927xmlURIPtr
928xmlParseURIRaw(const char *str, int raw) {
929    xmlURIPtr uri;
930    int ret;
931
932    if (str == NULL)
933	return(NULL);
934    uri = xmlCreateURI();
935    if (uri != NULL) {
936        if (raw) {
937	    uri->cleanup |= 2;
938	}
939	ret = xmlParseURIReference(uri, str);
940        if (ret) {
941	    xmlFreeURI(uri);
942	    return(NULL);
943	}
944    }
945    return(uri);
946}
947
948/************************************************************************
949 *									*
950 *			Generic URI structure functions			*
951 *									*
952 ************************************************************************/
953
954/**
955 * xmlCreateURI:
956 *
957 * Simply creates an empty xmlURI
958 *
959 * Returns the new structure or NULL in case of error
960 */
961xmlURIPtr
962xmlCreateURI(void) {
963    xmlURIPtr ret;
964
965    ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
966    if (ret == NULL) {
967	xmlGenericError(xmlGenericErrorContext,
968		"xmlCreateURI: out of memory\n");
969	return(NULL);
970    }
971    memset(ret, 0, sizeof(xmlURI));
972    return(ret);
973}
974
975/**
976 * xmlSaveUri:
977 * @uri:  pointer to an xmlURI
978 *
979 * Save the URI as an escaped string
980 *
981 * Returns a new string (to be deallocated by caller)
982 */
983xmlChar *
984xmlSaveUri(xmlURIPtr uri) {
985    xmlChar *ret = NULL;
986    xmlChar *temp;
987    const char *p;
988    int len;
989    int max;
990
991    if (uri == NULL) return(NULL);
992
993
994    max = 80;
995    ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
996    if (ret == NULL) {
997	xmlGenericError(xmlGenericErrorContext,
998		"xmlSaveUri: out of memory\n");
999	return(NULL);
1000    }
1001    len = 0;
1002
1003    if (uri->scheme != NULL) {
1004	p = uri->scheme;
1005	while (*p != 0) {
1006	    if (len >= max) {
1007		max *= 2;
1008		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1009		if (temp == NULL) {
1010		    xmlGenericError(xmlGenericErrorContext,
1011			    "xmlSaveUri: out of memory\n");
1012		    xmlFree(ret);
1013		    return(NULL);
1014		}
1015		ret = temp;
1016	    }
1017	    ret[len++] = *p++;
1018	}
1019	if (len >= max) {
1020	    max *= 2;
1021	    temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1022	    if (temp == NULL) {
1023		xmlGenericError(xmlGenericErrorContext,
1024			"xmlSaveUri: out of memory\n");
1025		xmlFree(ret);
1026		return(NULL);
1027	    }
1028	    ret = temp;
1029	}
1030	ret[len++] = ':';
1031    }
1032    if (uri->opaque != NULL) {
1033	p = uri->opaque;
1034	while (*p != 0) {
1035	    if (len + 3 >= max) {
1036		max *= 2;
1037		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1038		if (temp == NULL) {
1039		    xmlGenericError(xmlGenericErrorContext,
1040			    "xmlSaveUri: out of memory\n");
1041		    xmlFree(ret);
1042		    return(NULL);
1043		}
1044		ret = temp;
1045	    }
1046	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1047		ret[len++] = *p++;
1048	    else {
1049		int val = *(unsigned char *)p++;
1050		int hi = val / 0x10, lo = val % 0x10;
1051		ret[len++] = '%';
1052		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1053		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1054	    }
1055	}
1056    } else {
1057	if (uri->server != NULL) {
1058	    if (len + 3 >= max) {
1059		max *= 2;
1060		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1061		if (temp == NULL) {
1062		    xmlGenericError(xmlGenericErrorContext,
1063			    "xmlSaveUri: out of memory\n");
1064                  xmlFree(ret);
1065		    return(NULL);
1066		}
1067		ret = temp;
1068	    }
1069	    ret[len++] = '/';
1070	    ret[len++] = '/';
1071	    if (uri->user != NULL) {
1072		p = uri->user;
1073		while (*p != 0) {
1074		    if (len + 3 >= max) {
1075			max *= 2;
1076			temp = (xmlChar *) xmlRealloc(ret,
1077				(max + 1) * sizeof(xmlChar));
1078			if (temp == NULL) {
1079			    xmlGenericError(xmlGenericErrorContext,
1080				    "xmlSaveUri: out of memory\n");
1081			    xmlFree(ret);
1082			    return(NULL);
1083			}
1084			ret = temp;
1085		    }
1086		    if ((IS_UNRESERVED(*(p))) ||
1087			((*(p) == ';')) || ((*(p) == ':')) ||
1088			((*(p) == '&')) || ((*(p) == '=')) ||
1089			((*(p) == '+')) || ((*(p) == '$')) ||
1090			((*(p) == ',')))
1091			ret[len++] = *p++;
1092		    else {
1093			int val = *(unsigned char *)p++;
1094			int hi = val / 0x10, lo = val % 0x10;
1095			ret[len++] = '%';
1096			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1097			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1098		    }
1099		}
1100		if (len + 3 >= max) {
1101		    max *= 2;
1102		    temp = (xmlChar *) xmlRealloc(ret,
1103			    (max + 1) * sizeof(xmlChar));
1104		    if (temp == NULL) {
1105			xmlGenericError(xmlGenericErrorContext,
1106				"xmlSaveUri: out of memory\n");
1107			xmlFree(ret);
1108			return(NULL);
1109		    }
1110		    ret = temp;
1111		}
1112		ret[len++] = '@';
1113	    }
1114	    p = uri->server;
1115	    while (*p != 0) {
1116		if (len >= max) {
1117		    max *= 2;
1118		    temp = (xmlChar *) xmlRealloc(ret,
1119			    (max + 1) * sizeof(xmlChar));
1120		    if (temp == NULL) {
1121			xmlGenericError(xmlGenericErrorContext,
1122				"xmlSaveUri: out of memory\n");
1123			xmlFree(ret);
1124			return(NULL);
1125		    }
1126		    ret = temp;
1127		}
1128		ret[len++] = *p++;
1129	    }
1130	    if (uri->port > 0) {
1131		if (len + 10 >= max) {
1132		    max *= 2;
1133		    temp = (xmlChar *) xmlRealloc(ret,
1134			    (max + 1) * sizeof(xmlChar));
1135		    if (temp == NULL) {
1136			xmlGenericError(xmlGenericErrorContext,
1137				"xmlSaveUri: out of memory\n");
1138                     xmlFree(ret);
1139			return(NULL);
1140		    }
1141		    ret = temp;
1142		}
1143		len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1144	    }
1145	} else if (uri->authority != NULL) {
1146	    if (len + 3 >= max) {
1147		max *= 2;
1148		temp = (xmlChar *) xmlRealloc(ret,
1149			(max + 1) * sizeof(xmlChar));
1150		if (temp == NULL) {
1151			xmlGenericError(xmlGenericErrorContext,
1152				"xmlSaveUri: out of memory\n");
1153                     xmlFree(ret);
1154			return(NULL);
1155		    }
1156		    ret = temp;
1157	    }
1158	    ret[len++] = '/';
1159	    ret[len++] = '/';
1160	    p = uri->authority;
1161	    while (*p != 0) {
1162		if (len + 3 >= max) {
1163		    max *= 2;
1164		    temp = (xmlChar *) xmlRealloc(ret,
1165			    (max + 1) * sizeof(xmlChar));
1166		    if (temp == NULL) {
1167			xmlGenericError(xmlGenericErrorContext,
1168				"xmlSaveUri: out of memory\n");
1169                     xmlFree(ret);
1170			return(NULL);
1171		    }
1172		    ret = temp;
1173		}
1174		if ((IS_UNRESERVED(*(p))) ||
1175                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1176                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1177                    ((*(p) == '=')) || ((*(p) == '+')))
1178		    ret[len++] = *p++;
1179		else {
1180		    int val = *(unsigned char *)p++;
1181		    int hi = val / 0x10, lo = val % 0x10;
1182		    ret[len++] = '%';
1183		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1184		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1185		}
1186	    }
1187	} else if (uri->scheme != NULL) {
1188	    if (len + 3 >= max) {
1189		max *= 2;
1190		temp = (xmlChar *) xmlRealloc(ret,
1191			(max + 1) * sizeof(xmlChar));
1192		if (temp == NULL) {
1193			xmlGenericError(xmlGenericErrorContext,
1194				"xmlSaveUri: out of memory\n");
1195                     xmlFree(ret);
1196			return(NULL);
1197		    }
1198		    ret = temp;
1199	    }
1200	    ret[len++] = '/';
1201	    ret[len++] = '/';
1202	}
1203	if (uri->path != NULL) {
1204	    p = uri->path;
1205	    /*
1206	     * the colon in file:///d: should not be escaped or
1207	     * Windows accesses fail later.
1208	     */
1209	    if ((uri->scheme != NULL) &&
1210		(p[0] == '/') &&
1211		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1212		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213		(p[2] == ':') &&
1214	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215		if (len + 3 >= max) {
1216		    max *= 2;
1217		    ret = (xmlChar *) xmlRealloc(ret,
1218			    (max + 1) * sizeof(xmlChar));
1219		    if (ret == NULL) {
1220			xmlGenericError(xmlGenericErrorContext,
1221				"xmlSaveUri: out of memory\n");
1222			return(NULL);
1223		    }
1224		}
1225		ret[len++] = *p++;
1226		ret[len++] = *p++;
1227		ret[len++] = *p++;
1228	    }
1229	    while (*p != 0) {
1230		if (len + 3 >= max) {
1231		    max *= 2;
1232		    temp = (xmlChar *) xmlRealloc(ret,
1233			    (max + 1) * sizeof(xmlChar));
1234		    if (temp == NULL) {
1235			xmlGenericError(xmlGenericErrorContext,
1236				"xmlSaveUri: out of memory\n");
1237                     xmlFree(ret);
1238			return(NULL);
1239		    }
1240		    ret = temp;
1241		}
1242		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1243                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1244	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1245	            ((*(p) == ',')))
1246		    ret[len++] = *p++;
1247		else {
1248		    int val = *(unsigned char *)p++;
1249		    int hi = val / 0x10, lo = val % 0x10;
1250		    ret[len++] = '%';
1251		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1252		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1253		}
1254	    }
1255	}
1256	if (uri->query_raw != NULL) {
1257	    if (len + 1 >= max) {
1258		max *= 2;
1259		temp = (xmlChar *) xmlRealloc(ret,
1260			(max + 1) * sizeof(xmlChar));
1261		if (temp == NULL) {
1262			xmlGenericError(xmlGenericErrorContext,
1263				"xmlSaveUri: out of memory\n");
1264                     xmlFree(ret);
1265			return(NULL);
1266		    }
1267		    ret = temp;
1268	    }
1269	    ret[len++] = '?';
1270	    p = uri->query_raw;
1271	    while (*p != 0) {
1272		if (len + 1 >= max) {
1273		    max *= 2;
1274		    temp = (xmlChar *) xmlRealloc(ret,
1275			    (max + 1) * sizeof(xmlChar));
1276		    if (temp == NULL) {
1277			xmlGenericError(xmlGenericErrorContext,
1278				"xmlSaveUri: out of memory\n");
1279                     xmlFree(ret);
1280			return(NULL);
1281		    }
1282		    ret = temp;
1283		}
1284		ret[len++] = *p++;
1285	    }
1286	} else if (uri->query != NULL) {
1287	    if (len + 3 >= max) {
1288		max *= 2;
1289		temp = (xmlChar *) xmlRealloc(ret,
1290			(max + 1) * sizeof(xmlChar));
1291		if (temp == NULL) {
1292			xmlGenericError(xmlGenericErrorContext,
1293				"xmlSaveUri: out of memory\n");
1294                     xmlFree(ret);
1295			return(NULL);
1296		    }
1297		    ret = temp;
1298	    }
1299	    ret[len++] = '?';
1300	    p = uri->query;
1301	    while (*p != 0) {
1302		if (len + 3 >= max) {
1303		    max *= 2;
1304		    temp = (xmlChar *) xmlRealloc(ret,
1305			    (max + 1) * sizeof(xmlChar));
1306		    if (temp == NULL) {
1307			xmlGenericError(xmlGenericErrorContext,
1308				"xmlSaveUri: out of memory\n");
1309                     xmlFree(ret);
1310			return(NULL);
1311		    }
1312		    ret = temp;
1313		}
1314		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1315		    ret[len++] = *p++;
1316		else {
1317		    int val = *(unsigned char *)p++;
1318		    int hi = val / 0x10, lo = val % 0x10;
1319		    ret[len++] = '%';
1320		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1321		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1322		}
1323	    }
1324	}
1325    }
1326    if (uri->fragment != NULL) {
1327	if (len + 3 >= max) {
1328	    max *= 2;
1329	    temp = (xmlChar *) xmlRealloc(ret,
1330		    (max + 1) * sizeof(xmlChar));
1331	    if (temp == NULL) {
1332			xmlGenericError(xmlGenericErrorContext,
1333				"xmlSaveUri: out of memory\n");
1334                     xmlFree(ret);
1335			return(NULL);
1336		    }
1337		    ret = temp;
1338	}
1339	ret[len++] = '#';
1340	p = uri->fragment;
1341	while (*p != 0) {
1342	    if (len + 3 >= max) {
1343		max *= 2;
1344		temp = (xmlChar *) xmlRealloc(ret,
1345			(max + 1) * sizeof(xmlChar));
1346		if (temp == NULL) {
1347			xmlGenericError(xmlGenericErrorContext,
1348				"xmlSaveUri: out of memory\n");
1349                     xmlFree(ret);
1350			return(NULL);
1351		    }
1352		    ret = temp;
1353	    }
1354	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1355		ret[len++] = *p++;
1356	    else {
1357		int val = *(unsigned char *)p++;
1358		int hi = val / 0x10, lo = val % 0x10;
1359		ret[len++] = '%';
1360		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1361		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1362	    }
1363	}
1364    }
1365    if (len >= max) {
1366	max *= 2;
1367	temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1368	if (temp == NULL) {
1369			xmlGenericError(xmlGenericErrorContext,
1370				"xmlSaveUri: out of memory\n");
1371                     xmlFree(ret);
1372			return(NULL);
1373		    }
1374		    ret = temp;
1375    }
1376    ret[len++] = 0;
1377    return(ret);
1378}
1379
1380/**
1381 * xmlPrintURI:
1382 * @stream:  a FILE* for the output
1383 * @uri:  pointer to an xmlURI
1384 *
1385 * Prints the URI in the stream @stream.
1386 */
1387void
1388xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1389    xmlChar *out;
1390
1391    out = xmlSaveUri(uri);
1392    if (out != NULL) {
1393	fprintf(stream, "%s", (char *) out);
1394	xmlFree(out);
1395    }
1396}
1397
1398/**
1399 * xmlCleanURI:
1400 * @uri:  pointer to an xmlURI
1401 *
1402 * Make sure the xmlURI struct is free of content
1403 */
1404static void
1405xmlCleanURI(xmlURIPtr uri) {
1406    if (uri == NULL) return;
1407
1408    if (uri->scheme != NULL) xmlFree(uri->scheme);
1409    uri->scheme = NULL;
1410    if (uri->server != NULL) xmlFree(uri->server);
1411    uri->server = NULL;
1412    if (uri->user != NULL) xmlFree(uri->user);
1413    uri->user = NULL;
1414    if (uri->path != NULL) xmlFree(uri->path);
1415    uri->path = NULL;
1416    if (uri->fragment != NULL) xmlFree(uri->fragment);
1417    uri->fragment = NULL;
1418    if (uri->opaque != NULL) xmlFree(uri->opaque);
1419    uri->opaque = NULL;
1420    if (uri->authority != NULL) xmlFree(uri->authority);
1421    uri->authority = NULL;
1422    if (uri->query != NULL) xmlFree(uri->query);
1423    uri->query = NULL;
1424    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1425    uri->query_raw = NULL;
1426}
1427
1428/**
1429 * xmlFreeURI:
1430 * @uri:  pointer to an xmlURI
1431 *
1432 * Free up the xmlURI struct
1433 */
1434void
1435xmlFreeURI(xmlURIPtr uri) {
1436    if (uri == NULL) return;
1437
1438    if (uri->scheme != NULL) xmlFree(uri->scheme);
1439    if (uri->server != NULL) xmlFree(uri->server);
1440    if (uri->user != NULL) xmlFree(uri->user);
1441    if (uri->path != NULL) xmlFree(uri->path);
1442    if (uri->fragment != NULL) xmlFree(uri->fragment);
1443    if (uri->opaque != NULL) xmlFree(uri->opaque);
1444    if (uri->authority != NULL) xmlFree(uri->authority);
1445    if (uri->query != NULL) xmlFree(uri->query);
1446    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1447    xmlFree(uri);
1448}
1449
1450/************************************************************************
1451 *									*
1452 *			Helper functions				*
1453 *									*
1454 ************************************************************************/
1455
1456/**
1457 * xmlNormalizeURIPath:
1458 * @path:  pointer to the path string
1459 *
1460 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1461 * Section 5.2, steps 6.c through 6.g.
1462 *
1463 * Normalization occurs directly on the string, no new allocation is done
1464 *
1465 * Returns 0 or an error code
1466 */
1467int
1468xmlNormalizeURIPath(char *path) {
1469    char *cur, *out;
1470
1471    if (path == NULL)
1472	return(-1);
1473
1474    /* Skip all initial "/" chars.  We want to get to the beginning of the
1475     * first non-empty segment.
1476     */
1477    cur = path;
1478    while (cur[0] == '/')
1479      ++cur;
1480    if (cur[0] == '\0')
1481      return(0);
1482
1483    /* Keep everything we've seen so far.  */
1484    out = cur;
1485
1486    /*
1487     * Analyze each segment in sequence for cases (c) and (d).
1488     */
1489    while (cur[0] != '\0') {
1490	/*
1491	 * c) All occurrences of "./", where "." is a complete path segment,
1492	 *    are removed from the buffer string.
1493	 */
1494	if ((cur[0] == '.') && (cur[1] == '/')) {
1495	    cur += 2;
1496	    /* '//' normalization should be done at this point too */
1497	    while (cur[0] == '/')
1498		cur++;
1499	    continue;
1500	}
1501
1502	/*
1503	 * d) If the buffer string ends with "." as a complete path segment,
1504	 *    that "." is removed.
1505	 */
1506	if ((cur[0] == '.') && (cur[1] == '\0'))
1507	    break;
1508
1509	/* Otherwise keep the segment.  */
1510	while (cur[0] != '/') {
1511            if (cur[0] == '\0')
1512              goto done_cd;
1513	    (out++)[0] = (cur++)[0];
1514	}
1515	/* nomalize // */
1516	while ((cur[0] == '/') && (cur[1] == '/'))
1517	    cur++;
1518
1519        (out++)[0] = (cur++)[0];
1520    }
1521 done_cd:
1522    out[0] = '\0';
1523
1524    /* Reset to the beginning of the first segment for the next sequence.  */
1525    cur = path;
1526    while (cur[0] == '/')
1527      ++cur;
1528    if (cur[0] == '\0')
1529	return(0);
1530
1531    /*
1532     * Analyze each segment in sequence for cases (e) and (f).
1533     *
1534     * e) All occurrences of "<segment>/../", where <segment> is a
1535     *    complete path segment not equal to "..", are removed from the
1536     *    buffer string.  Removal of these path segments is performed
1537     *    iteratively, removing the leftmost matching pattern on each
1538     *    iteration, until no matching pattern remains.
1539     *
1540     * f) If the buffer string ends with "<segment>/..", where <segment>
1541     *    is a complete path segment not equal to "..", that
1542     *    "<segment>/.." is removed.
1543     *
1544     * To satisfy the "iterative" clause in (e), we need to collapse the
1545     * string every time we find something that needs to be removed.  Thus,
1546     * we don't need to keep two pointers into the string: we only need a
1547     * "current position" pointer.
1548     */
1549    while (1) {
1550        char *segp, *tmp;
1551
1552        /* At the beginning of each iteration of this loop, "cur" points to
1553         * the first character of the segment we want to examine.
1554         */
1555
1556        /* Find the end of the current segment.  */
1557        segp = cur;
1558        while ((segp[0] != '/') && (segp[0] != '\0'))
1559          ++segp;
1560
1561        /* If this is the last segment, we're done (we need at least two
1562         * segments to meet the criteria for the (e) and (f) cases).
1563         */
1564        if (segp[0] == '\0')
1565          break;
1566
1567        /* If the first segment is "..", or if the next segment _isn't_ "..",
1568         * keep this segment and try the next one.
1569         */
1570        ++segp;
1571        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1572            || ((segp[0] != '.') || (segp[1] != '.')
1573                || ((segp[2] != '/') && (segp[2] != '\0')))) {
1574          cur = segp;
1575          continue;
1576        }
1577
1578        /* If we get here, remove this segment and the next one and back up
1579         * to the previous segment (if there is one), to implement the
1580         * "iteratively" clause.  It's pretty much impossible to back up
1581         * while maintaining two pointers into the buffer, so just compact
1582         * the whole buffer now.
1583         */
1584
1585        /* If this is the end of the buffer, we're done.  */
1586        if (segp[2] == '\0') {
1587          cur[0] = '\0';
1588          break;
1589        }
1590        /* Valgrind complained, strcpy(cur, segp + 3); */
1591	/* string will overlap, do not use strcpy */
1592	tmp = cur;
1593	segp += 3;
1594	while ((*tmp++ = *segp++) != 0);
1595
1596        /* If there are no previous segments, then keep going from here.  */
1597        segp = cur;
1598        while ((segp > path) && ((--segp)[0] == '/'))
1599          ;
1600        if (segp == path)
1601          continue;
1602
1603        /* "segp" is pointing to the end of a previous segment; find it's
1604         * start.  We need to back up to the previous segment and start
1605         * over with that to handle things like "foo/bar/../..".  If we
1606         * don't do this, then on the first pass we'll remove the "bar/..",
1607         * but be pointing at the second ".." so we won't realize we can also
1608         * remove the "foo/..".
1609         */
1610        cur = segp;
1611        while ((cur > path) && (cur[-1] != '/'))
1612          --cur;
1613    }
1614    out[0] = '\0';
1615
1616    /*
1617     * g) If the resulting buffer string still begins with one or more
1618     *    complete path segments of "..", then the reference is
1619     *    considered to be in error. Implementations may handle this
1620     *    error by retaining these components in the resolved path (i.e.,
1621     *    treating them as part of the final URI), by removing them from
1622     *    the resolved path (i.e., discarding relative levels above the
1623     *    root), or by avoiding traversal of the reference.
1624     *
1625     * We discard them from the final path.
1626     */
1627    if (path[0] == '/') {
1628      cur = path;
1629      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1630             && ((cur[3] == '/') || (cur[3] == '\0')))
1631	cur += 3;
1632
1633      if (cur != path) {
1634	out = path;
1635	while (cur[0] != '\0')
1636          (out++)[0] = (cur++)[0];
1637	out[0] = 0;
1638      }
1639    }
1640
1641    return(0);
1642}
1643
1644static int is_hex(char c) {
1645    if (((c >= '0') && (c <= '9')) ||
1646        ((c >= 'a') && (c <= 'f')) ||
1647        ((c >= 'A') && (c <= 'F')))
1648	return(1);
1649    return(0);
1650}
1651
1652/**
1653 * xmlURIUnescapeString:
1654 * @str:  the string to unescape
1655 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1656 * @target:  optional destination buffer
1657 *
1658 * Unescaping routine, but does not check that the string is an URI. The
1659 * output is a direct unsigned char translation of %XX values (no encoding)
1660 * Note that the length of the result can only be smaller or same size as
1661 * the input string.
1662 *
1663 * Returns a copy of the string, but unescaped, will return NULL only in case
1664 * of error
1665 */
1666char *
1667xmlURIUnescapeString(const char *str, int len, char *target) {
1668    char *ret, *out;
1669    const char *in;
1670
1671    if (str == NULL)
1672	return(NULL);
1673    if (len <= 0) len = strlen(str);
1674    if (len < 0) return(NULL);
1675
1676    if (target == NULL) {
1677	ret = (char *) xmlMallocAtomic(len + 1);
1678	if (ret == NULL) {
1679	    xmlGenericError(xmlGenericErrorContext,
1680		    "xmlURIUnescapeString: out of memory\n");
1681	    return(NULL);
1682	}
1683    } else
1684	ret = target;
1685    in = str;
1686    out = ret;
1687    while(len > 0) {
1688	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1689	    in++;
1690	    if ((*in >= '0') && (*in <= '9'))
1691	        *out = (*in - '0');
1692	    else if ((*in >= 'a') && (*in <= 'f'))
1693	        *out = (*in - 'a') + 10;
1694	    else if ((*in >= 'A') && (*in <= 'F'))
1695	        *out = (*in - 'A') + 10;
1696	    in++;
1697	    if ((*in >= '0') && (*in <= '9'))
1698	        *out = *out * 16 + (*in - '0');
1699	    else if ((*in >= 'a') && (*in <= 'f'))
1700	        *out = *out * 16 + (*in - 'a') + 10;
1701	    else if ((*in >= 'A') && (*in <= 'F'))
1702	        *out = *out * 16 + (*in - 'A') + 10;
1703	    in++;
1704	    len -= 3;
1705	    out++;
1706	} else {
1707	    *out++ = *in++;
1708	    len--;
1709	}
1710    }
1711    *out = 0;
1712    return(ret);
1713}
1714
1715/**
1716 * xmlURIEscapeStr:
1717 * @str:  string to escape
1718 * @list: exception list string of chars not to escape
1719 *
1720 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1721 * and the characters in the exception list.
1722 *
1723 * Returns a new escaped string or NULL in case of error.
1724 */
1725xmlChar *
1726xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1727    xmlChar *ret, ch;
1728    xmlChar *temp;
1729    const xmlChar *in;
1730
1731    unsigned int len, out;
1732
1733    if (str == NULL)
1734	return(NULL);
1735    if (str[0] == 0)
1736	return(xmlStrdup(str));
1737    len = xmlStrlen(str);
1738    if (!(len > 0)) return(NULL);
1739
1740    len += 20;
1741    ret = (xmlChar *) xmlMallocAtomic(len);
1742    if (ret == NULL) {
1743	xmlGenericError(xmlGenericErrorContext,
1744		"xmlURIEscapeStr: out of memory\n");
1745	return(NULL);
1746    }
1747    in = (const xmlChar *) str;
1748    out = 0;
1749    while(*in != 0) {
1750	if (len - out <= 3) {
1751	    len += 20;
1752	    temp = (xmlChar *) xmlRealloc(ret, len);
1753	    if (temp == NULL) {
1754		xmlGenericError(xmlGenericErrorContext,
1755			"xmlURIEscapeStr: out of memory\n");
1756		xmlFree(ret);
1757		return(NULL);
1758	    }
1759	    ret = temp;
1760	}
1761
1762	ch = *in;
1763
1764	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1765	    unsigned char val;
1766	    ret[out++] = '%';
1767	    val = ch >> 4;
1768	    if (val <= 9)
1769		ret[out++] = '0' + val;
1770	    else
1771		ret[out++] = 'A' + val - 0xA;
1772	    val = ch & 0xF;
1773	    if (val <= 9)
1774		ret[out++] = '0' + val;
1775	    else
1776		ret[out++] = 'A' + val - 0xA;
1777	    in++;
1778	} else {
1779	    ret[out++] = *in++;
1780	}
1781
1782    }
1783    ret[out] = 0;
1784    return(ret);
1785}
1786
1787/**
1788 * xmlURIEscape:
1789 * @str:  the string of the URI to escape
1790 *
1791 * Escaping routine, does not do validity checks !
1792 * It will try to escape the chars needing this, but this is heuristic
1793 * based it's impossible to be sure.
1794 *
1795 * Returns an copy of the string, but escaped
1796 *
1797 * 25 May 2001
1798 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1799 * according to RFC2396.
1800 *   - Carl Douglas
1801 */
1802xmlChar *
1803xmlURIEscape(const xmlChar * str)
1804{
1805    xmlChar *ret, *segment = NULL;
1806    xmlURIPtr uri;
1807    int ret2;
1808
1809#define NULLCHK(p) if(!p) { \
1810                   xmlGenericError(xmlGenericErrorContext, \
1811                        "xmlURIEscape: out of memory\n"); \
1812                        xmlFreeURI(uri); \
1813                        return NULL; } \
1814
1815    if (str == NULL)
1816        return (NULL);
1817
1818    uri = xmlCreateURI();
1819    if (uri != NULL) {
1820	/*
1821	 * Allow escaping errors in the unescaped form
1822	 */
1823        uri->cleanup = 1;
1824        ret2 = xmlParseURIReference(uri, (const char *)str);
1825        if (ret2) {
1826            xmlFreeURI(uri);
1827            return (NULL);
1828        }
1829    }
1830
1831    if (!uri)
1832        return NULL;
1833
1834    ret = NULL;
1835
1836    if (uri->scheme) {
1837        segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1838        NULLCHK(segment)
1839        ret = xmlStrcat(ret, segment);
1840        ret = xmlStrcat(ret, BAD_CAST ":");
1841        xmlFree(segment);
1842    }
1843
1844    if (uri->authority) {
1845        segment =
1846            xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1847        NULLCHK(segment)
1848        ret = xmlStrcat(ret, BAD_CAST "//");
1849        ret = xmlStrcat(ret, segment);
1850        xmlFree(segment);
1851    }
1852
1853    if (uri->user) {
1854        segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1855        NULLCHK(segment)
1856		ret = xmlStrcat(ret,BAD_CAST "//");
1857        ret = xmlStrcat(ret, segment);
1858        ret = xmlStrcat(ret, BAD_CAST "@");
1859        xmlFree(segment);
1860    }
1861
1862    if (uri->server) {
1863        segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1864        NULLCHK(segment)
1865		if (uri->user == NULL)
1866		ret = xmlStrcat(ret, BAD_CAST "//");
1867        ret = xmlStrcat(ret, segment);
1868        xmlFree(segment);
1869    }
1870
1871    if (uri->port) {
1872        xmlChar port[10];
1873
1874        snprintf((char *) port, 10, "%d", uri->port);
1875        ret = xmlStrcat(ret, BAD_CAST ":");
1876        ret = xmlStrcat(ret, port);
1877    }
1878
1879    if (uri->path) {
1880        segment =
1881            xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1882        NULLCHK(segment)
1883        ret = xmlStrcat(ret, segment);
1884        xmlFree(segment);
1885    }
1886
1887    if (uri->query_raw) {
1888        ret = xmlStrcat(ret, BAD_CAST "?");
1889        ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1890    }
1891    else if (uri->query) {
1892        segment =
1893            xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1894        NULLCHK(segment)
1895        ret = xmlStrcat(ret, BAD_CAST "?");
1896        ret = xmlStrcat(ret, segment);
1897        xmlFree(segment);
1898    }
1899
1900    if (uri->opaque) {
1901        segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1902        NULLCHK(segment)
1903        ret = xmlStrcat(ret, segment);
1904        xmlFree(segment);
1905    }
1906
1907    if (uri->fragment) {
1908        segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1909        NULLCHK(segment)
1910        ret = xmlStrcat(ret, BAD_CAST "#");
1911        ret = xmlStrcat(ret, segment);
1912        xmlFree(segment);
1913    }
1914
1915    xmlFreeURI(uri);
1916#undef NULLCHK
1917
1918    return (ret);
1919}
1920
1921/************************************************************************
1922 *									*
1923 *			Public functions				*
1924 *									*
1925 ************************************************************************/
1926
1927/**
1928 * xmlBuildURI:
1929 * @URI:  the URI instance found in the document
1930 * @base:  the base value
1931 *
1932 * Computes he final URI of the reference done by checking that
1933 * the given URI is valid, and building the final URI using the
1934 * base URI. This is processed according to section 5.2 of the
1935 * RFC 2396
1936 *
1937 * 5.2. Resolving Relative References to Absolute Form
1938 *
1939 * Returns a new URI string (to be freed by the caller) or NULL in case
1940 *         of error.
1941 */
1942xmlChar *
1943xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1944    xmlChar *val = NULL;
1945    int ret, len, indx, cur, out;
1946    xmlURIPtr ref = NULL;
1947    xmlURIPtr bas = NULL;
1948    xmlURIPtr res = NULL;
1949
1950    /*
1951     * 1) The URI reference is parsed into the potential four components and
1952     *    fragment identifier, as described in Section 4.3.
1953     *
1954     *    NOTE that a completely empty URI is treated by modern browsers
1955     *    as a reference to "." rather than as a synonym for the current
1956     *    URI.  Should we do that here?
1957     */
1958    if (URI == NULL)
1959	ret = -1;
1960    else {
1961	if (*URI) {
1962	    ref = xmlCreateURI();
1963	    if (ref == NULL)
1964		goto done;
1965	    ret = xmlParseURIReference(ref, (const char *) URI);
1966	}
1967	else
1968	    ret = 0;
1969    }
1970    if (ret != 0)
1971	goto done;
1972    if ((ref != NULL) && (ref->scheme != NULL)) {
1973	/*
1974	 * The URI is absolute don't modify.
1975	 */
1976	val = xmlStrdup(URI);
1977	goto done;
1978    }
1979    if (base == NULL)
1980	ret = -1;
1981    else {
1982	bas = xmlCreateURI();
1983	if (bas == NULL)
1984	    goto done;
1985	ret = xmlParseURIReference(bas, (const char *) base);
1986    }
1987    if (ret != 0) {
1988	if (ref)
1989	    val = xmlSaveUri(ref);
1990	goto done;
1991    }
1992    if (ref == NULL) {
1993	/*
1994	 * the base fragment must be ignored
1995	 */
1996	if (bas->fragment != NULL) {
1997	    xmlFree(bas->fragment);
1998	    bas->fragment = NULL;
1999	}
2000	val = xmlSaveUri(bas);
2001	goto done;
2002    }
2003
2004    /*
2005     * 2) If the path component is empty and the scheme, authority, and
2006     *    query components are undefined, then it is a reference to the
2007     *    current document and we are done.  Otherwise, the reference URI's
2008     *    query and fragment components are defined as found (or not found)
2009     *    within the URI reference and not inherited from the base URI.
2010     *
2011     *    NOTE that in modern browsers, the parsing differs from the above
2012     *    in the following aspect:  the query component is allowed to be
2013     *    defined while still treating this as a reference to the current
2014     *    document.
2015     */
2016    res = xmlCreateURI();
2017    if (res == NULL)
2018	goto done;
2019    if ((ref->scheme == NULL) && (ref->path == NULL) &&
2020	((ref->authority == NULL) && (ref->server == NULL))) {
2021	if (bas->scheme != NULL)
2022	    res->scheme = xmlMemStrdup(bas->scheme);
2023	if (bas->authority != NULL)
2024	    res->authority = xmlMemStrdup(bas->authority);
2025	else if (bas->server != NULL) {
2026	    res->server = xmlMemStrdup(bas->server);
2027	    if (bas->user != NULL)
2028		res->user = xmlMemStrdup(bas->user);
2029	    res->port = bas->port;
2030	}
2031	if (bas->path != NULL)
2032	    res->path = xmlMemStrdup(bas->path);
2033	if (ref->query_raw != NULL)
2034	    res->query_raw = xmlMemStrdup (ref->query_raw);
2035	else if (ref->query != NULL)
2036	    res->query = xmlMemStrdup(ref->query);
2037	else if (bas->query_raw != NULL)
2038	    res->query_raw = xmlMemStrdup(bas->query_raw);
2039	else if (bas->query != NULL)
2040	    res->query = xmlMemStrdup(bas->query);
2041	if (ref->fragment != NULL)
2042	    res->fragment = xmlMemStrdup(ref->fragment);
2043	goto step_7;
2044    }
2045
2046    /*
2047     * 3) If the scheme component is defined, indicating that the reference
2048     *    starts with a scheme name, then the reference is interpreted as an
2049     *    absolute URI and we are done.  Otherwise, the reference URI's
2050     *    scheme is inherited from the base URI's scheme component.
2051     */
2052    if (ref->scheme != NULL) {
2053	val = xmlSaveUri(ref);
2054	goto done;
2055    }
2056    if (bas->scheme != NULL)
2057	res->scheme = xmlMemStrdup(bas->scheme);
2058
2059    if (ref->query_raw != NULL)
2060	res->query_raw = xmlMemStrdup(ref->query_raw);
2061    else if (ref->query != NULL)
2062	res->query = xmlMemStrdup(ref->query);
2063    if (ref->fragment != NULL)
2064	res->fragment = xmlMemStrdup(ref->fragment);
2065
2066    /*
2067     * 4) If the authority component is defined, then the reference is a
2068     *    network-path and we skip to step 7.  Otherwise, the reference
2069     *    URI's authority is inherited from the base URI's authority
2070     *    component, which will also be undefined if the URI scheme does not
2071     *    use an authority component.
2072     */
2073    if ((ref->authority != NULL) || (ref->server != NULL)) {
2074	if (ref->authority != NULL)
2075	    res->authority = xmlMemStrdup(ref->authority);
2076	else {
2077	    res->server = xmlMemStrdup(ref->server);
2078	    if (ref->user != NULL)
2079		res->user = xmlMemStrdup(ref->user);
2080            res->port = ref->port;
2081	}
2082	if (ref->path != NULL)
2083	    res->path = xmlMemStrdup(ref->path);
2084	goto step_7;
2085    }
2086    if (bas->authority != NULL)
2087	res->authority = xmlMemStrdup(bas->authority);
2088    else if (bas->server != NULL) {
2089	res->server = xmlMemStrdup(bas->server);
2090	if (bas->user != NULL)
2091	    res->user = xmlMemStrdup(bas->user);
2092	res->port = bas->port;
2093    }
2094
2095    /*
2096     * 5) If the path component begins with a slash character ("/"), then
2097     *    the reference is an absolute-path and we skip to step 7.
2098     */
2099    if ((ref->path != NULL) && (ref->path[0] == '/')) {
2100	res->path = xmlMemStrdup(ref->path);
2101	goto step_7;
2102    }
2103
2104
2105    /*
2106     * 6) If this step is reached, then we are resolving a relative-path
2107     *    reference.  The relative path needs to be merged with the base
2108     *    URI's path.  Although there are many ways to do this, we will
2109     *    describe a simple method using a separate string buffer.
2110     *
2111     * Allocate a buffer large enough for the result string.
2112     */
2113    len = 2; /* extra / and 0 */
2114    if (ref->path != NULL)
2115	len += strlen(ref->path);
2116    if (bas->path != NULL)
2117	len += strlen(bas->path);
2118    res->path = (char *) xmlMallocAtomic(len);
2119    if (res->path == NULL) {
2120	xmlGenericError(xmlGenericErrorContext,
2121		"xmlBuildURI: out of memory\n");
2122	goto done;
2123    }
2124    res->path[0] = 0;
2125
2126    /*
2127     * a) All but the last segment of the base URI's path component is
2128     *    copied to the buffer.  In other words, any characters after the
2129     *    last (right-most) slash character, if any, are excluded.
2130     */
2131    cur = 0;
2132    out = 0;
2133    if (bas->path != NULL) {
2134	while (bas->path[cur] != 0) {
2135	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2136		cur++;
2137	    if (bas->path[cur] == 0)
2138		break;
2139
2140	    cur++;
2141	    while (out < cur) {
2142		res->path[out] = bas->path[out];
2143		out++;
2144	    }
2145	}
2146    }
2147    res->path[out] = 0;
2148
2149    /*
2150     * b) The reference's path component is appended to the buffer
2151     *    string.
2152     */
2153    if (ref->path != NULL && ref->path[0] != 0) {
2154	indx = 0;
2155	/*
2156	 * Ensure the path includes a '/'
2157	 */
2158	if ((out == 0) && (bas->server != NULL))
2159	    res->path[out++] = '/';
2160	while (ref->path[indx] != 0) {
2161	    res->path[out++] = ref->path[indx++];
2162	}
2163    }
2164    res->path[out] = 0;
2165
2166    /*
2167     * Steps c) to h) are really path normalization steps
2168     */
2169    xmlNormalizeURIPath(res->path);
2170
2171step_7:
2172
2173    /*
2174     * 7) The resulting URI components, including any inherited from the
2175     *    base URI, are recombined to give the absolute form of the URI
2176     *    reference.
2177     */
2178    val = xmlSaveUri(res);
2179
2180done:
2181    if (ref != NULL)
2182	xmlFreeURI(ref);
2183    if (bas != NULL)
2184	xmlFreeURI(bas);
2185    if (res != NULL)
2186	xmlFreeURI(res);
2187    return(val);
2188}
2189
2190/**
2191 * xmlBuildRelativeURI:
2192 * @URI:  the URI reference under consideration
2193 * @base:  the base value
2194 *
2195 * Expresses the URI of the reference in terms relative to the
2196 * base.  Some examples of this operation include:
2197 *     base = "http://site1.com/docs/book1.html"
2198 *        URI input                        URI returned
2199 *     docs/pic1.gif                    pic1.gif
2200 *     docs/img/pic1.gif                img/pic1.gif
2201 *     img/pic1.gif                     ../img/pic1.gif
2202 *     http://site1.com/docs/pic1.gif   pic1.gif
2203 *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2204 *
2205 *     base = "docs/book1.html"
2206 *        URI input                        URI returned
2207 *     docs/pic1.gif                    pic1.gif
2208 *     docs/img/pic1.gif                img/pic1.gif
2209 *     img/pic1.gif                     ../img/pic1.gif
2210 *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2211 *
2212 *
2213 * Note: if the URI reference is really wierd or complicated, it may be
2214 *       worthwhile to first convert it into a "nice" one by calling
2215 *       xmlBuildURI (using 'base') before calling this routine,
2216 *       since this routine (for reasonable efficiency) assumes URI has
2217 *       already been through some validation.
2218 *
2219 * Returns a new URI string (to be freed by the caller) or NULL in case
2220 * error.
2221 */
2222xmlChar *
2223xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2224{
2225    xmlChar *val = NULL;
2226    int ret;
2227    int ix;
2228    int pos = 0;
2229    int nbslash = 0;
2230    int len;
2231    xmlURIPtr ref = NULL;
2232    xmlURIPtr bas = NULL;
2233    xmlChar *bptr, *uptr, *vptr;
2234    int remove_path = 0;
2235
2236    if ((URI == NULL) || (*URI == 0))
2237	return NULL;
2238
2239    /*
2240     * First parse URI into a standard form
2241     */
2242    ref = xmlCreateURI ();
2243    if (ref == NULL)
2244	return NULL;
2245    /* If URI not already in "relative" form */
2246    if (URI[0] != '.') {
2247	ret = xmlParseURIReference (ref, (const char *) URI);
2248	if (ret != 0)
2249	    goto done;		/* Error in URI, return NULL */
2250    } else
2251	ref->path = (char *)xmlStrdup(URI);
2252
2253    /*
2254     * Next parse base into the same standard form
2255     */
2256    if ((base == NULL) || (*base == 0)) {
2257	val = xmlStrdup (URI);
2258	goto done;
2259    }
2260    bas = xmlCreateURI ();
2261    if (bas == NULL)
2262	goto done;
2263    if (base[0] != '.') {
2264	ret = xmlParseURIReference (bas, (const char *) base);
2265	if (ret != 0)
2266	    goto done;		/* Error in base, return NULL */
2267    } else
2268	bas->path = (char *)xmlStrdup(base);
2269
2270    /*
2271     * If the scheme / server on the URI differs from the base,
2272     * just return the URI
2273     */
2274    if ((ref->scheme != NULL) &&
2275	((bas->scheme == NULL) ||
2276	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2277	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2278	val = xmlStrdup (URI);
2279	goto done;
2280    }
2281    if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2282	val = xmlStrdup(BAD_CAST "");
2283	goto done;
2284    }
2285    if (bas->path == NULL) {
2286	val = xmlStrdup((xmlChar *)ref->path);
2287	goto done;
2288    }
2289    if (ref->path == NULL) {
2290        ref->path = (char *) "/";
2291	remove_path = 1;
2292    }
2293
2294    /*
2295     * At this point (at last!) we can compare the two paths
2296     *
2297     * First we take care of the special case where either of the
2298     * two path components may be missing (bug 316224)
2299     */
2300    if (bas->path == NULL) {
2301	if (ref->path != NULL) {
2302	    uptr = (xmlChar *) ref->path;
2303	    if (*uptr == '/')
2304		uptr++;
2305	    /* exception characters from xmlSaveUri */
2306	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2307	}
2308	goto done;
2309    }
2310    bptr = (xmlChar *)bas->path;
2311    if (ref->path == NULL) {
2312	for (ix = 0; bptr[ix] != 0; ix++) {
2313	    if (bptr[ix] == '/')
2314		nbslash++;
2315	}
2316	uptr = NULL;
2317	len = 1;	/* this is for a string terminator only */
2318    } else {
2319    /*
2320     * Next we compare the two strings and find where they first differ
2321     */
2322	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2323            pos += 2;
2324	if ((*bptr == '.') && (bptr[1] == '/'))
2325            bptr += 2;
2326	else if ((*bptr == '/') && (ref->path[pos] != '/'))
2327	    bptr++;
2328	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2329	    pos++;
2330
2331	if (bptr[pos] == ref->path[pos]) {
2332	    val = xmlStrdup(BAD_CAST "");
2333	    goto done;		/* (I can't imagine why anyone would do this) */
2334	}
2335
2336	/*
2337	 * In URI, "back up" to the last '/' encountered.  This will be the
2338	 * beginning of the "unique" suffix of URI
2339	 */
2340	ix = pos;
2341	if ((ref->path[ix] == '/') && (ix > 0))
2342	    ix--;
2343	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2344	    ix -= 2;
2345	for (; ix > 0; ix--) {
2346	    if (ref->path[ix] == '/')
2347		break;
2348	}
2349	if (ix == 0) {
2350	    uptr = (xmlChar *)ref->path;
2351	} else {
2352	    ix++;
2353	    uptr = (xmlChar *)&ref->path[ix];
2354	}
2355
2356	/*
2357	 * In base, count the number of '/' from the differing point
2358	 */
2359	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2360	    for (; bptr[ix] != 0; ix++) {
2361		if (bptr[ix] == '/')
2362		    nbslash++;
2363	    }
2364	}
2365	len = xmlStrlen (uptr) + 1;
2366    }
2367
2368    if (nbslash == 0) {
2369	if (uptr != NULL)
2370	    /* exception characters from xmlSaveUri */
2371	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2372	goto done;
2373    }
2374
2375    /*
2376     * Allocate just enough space for the returned string -
2377     * length of the remainder of the URI, plus enough space
2378     * for the "../" groups, plus one for the terminator
2379     */
2380    val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2381    if (val == NULL) {
2382	xmlGenericError(xmlGenericErrorContext,
2383		"xmlBuildRelativeURI: out of memory\n");
2384	goto done;
2385    }
2386    vptr = val;
2387    /*
2388     * Put in as many "../" as needed
2389     */
2390    for (; nbslash>0; nbslash--) {
2391	*vptr++ = '.';
2392	*vptr++ = '.';
2393	*vptr++ = '/';
2394    }
2395    /*
2396     * Finish up with the end of the URI
2397     */
2398    if (uptr != NULL) {
2399        if ((vptr > val) && (len > 0) &&
2400	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2401	    memcpy (vptr, uptr + 1, len - 1);
2402	    vptr[len - 2] = 0;
2403	} else {
2404	    memcpy (vptr, uptr, len);
2405	    vptr[len - 1] = 0;
2406	}
2407    } else {
2408	vptr[len - 1] = 0;
2409    }
2410
2411    /* escape the freshly-built path */
2412    vptr = val;
2413	/* exception characters from xmlSaveUri */
2414    val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2415    xmlFree(vptr);
2416
2417done:
2418    /*
2419     * Free the working variables
2420     */
2421    if (remove_path != 0)
2422        ref->path = NULL;
2423    if (ref != NULL)
2424	xmlFreeURI (ref);
2425    if (bas != NULL)
2426	xmlFreeURI (bas);
2427
2428    return val;
2429}
2430
2431/**
2432 * xmlCanonicPath:
2433 * @path:  the resource locator in a filesystem notation
2434 *
2435 * Constructs a canonic path from the specified path.
2436 *
2437 * Returns a new canonic path, or a duplicate of the path parameter if the
2438 * construction fails. The caller is responsible for freeing the memory occupied
2439 * by the returned string. If there is insufficient memory available, or the
2440 * argument is NULL, the function returns NULL.
2441 */
2442#define IS_WINDOWS_PATH(p) 					\
2443	((p != NULL) &&						\
2444	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2445	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2446	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2447xmlChar *
2448xmlCanonicPath(const xmlChar *path)
2449{
2450/*
2451 * For Windows implementations, additional work needs to be done to
2452 * replace backslashes in pathnames with "forward slashes"
2453 */
2454#if defined(_WIN32) && !defined(__CYGWIN__)
2455    int len = 0;
2456    int i = 0;
2457    xmlChar *p = NULL;
2458#endif
2459    xmlURIPtr uri;
2460    xmlChar *ret;
2461    const xmlChar *absuri;
2462
2463    if (path == NULL)
2464	return(NULL);
2465
2466    /* sanitize filename starting with // so it can be used as URI */
2467    if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2468        path++;
2469
2470    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2471	xmlFreeURI(uri);
2472	return xmlStrdup(path);
2473    }
2474
2475    /* Check if this is an "absolute uri" */
2476    absuri = xmlStrstr(path, BAD_CAST "://");
2477    if (absuri != NULL) {
2478        int l, j;
2479	unsigned char c;
2480	xmlChar *escURI;
2481
2482        /*
2483	 * this looks like an URI where some parts have not been
2484	 * escaped leading to a parsing problem.  Check that the first
2485	 * part matches a protocol.
2486	 */
2487	l = absuri - path;
2488	/* Bypass if first part (part before the '://') is > 20 chars */
2489	if ((l <= 0) || (l > 20))
2490	    goto path_processing;
2491	/* Bypass if any non-alpha characters are present in first part */
2492	for (j = 0;j < l;j++) {
2493	    c = path[j];
2494	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2495	        goto path_processing;
2496	}
2497
2498	/* Escape all except the characters specified in the supplied path */
2499        escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2500	if (escURI != NULL) {
2501	    /* Try parsing the escaped path */
2502	    uri = xmlParseURI((const char *) escURI);
2503	    /* If successful, return the escaped string */
2504	    if (uri != NULL) {
2505	        xmlFreeURI(uri);
2506		return escURI;
2507	    }
2508	}
2509    }
2510
2511path_processing:
2512/* For Windows implementations, replace backslashes with 'forward slashes' */
2513#if defined(_WIN32) && !defined(__CYGWIN__)
2514    /*
2515     * Create a URI structure
2516     */
2517    uri = xmlCreateURI();
2518    if (uri == NULL) {		/* Guard against 'out of memory' */
2519        return(NULL);
2520    }
2521
2522    len = xmlStrlen(path);
2523    if ((len > 2) && IS_WINDOWS_PATH(path)) {
2524        /* make the scheme 'file' */
2525	uri->scheme = xmlStrdup(BAD_CAST "file");
2526	/* allocate space for leading '/' + path + string terminator */
2527	uri->path = xmlMallocAtomic(len + 2);
2528	if (uri->path == NULL) {
2529	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2530	    return(NULL);
2531	}
2532	/* Put in leading '/' plus path */
2533	uri->path[0] = '/';
2534	p = uri->path + 1;
2535	strncpy(p, path, len + 1);
2536    } else {
2537	uri->path = xmlStrdup(path);
2538	if (uri->path == NULL) {
2539	    xmlFreeURI(uri);
2540	    return(NULL);
2541	}
2542	p = uri->path;
2543    }
2544    /* Now change all occurences of '\' to '/' */
2545    while (*p != '\0') {
2546	if (*p == '\\')
2547	    *p = '/';
2548	p++;
2549    }
2550
2551    if (uri->scheme == NULL) {
2552	ret = xmlStrdup((const xmlChar *) uri->path);
2553    } else {
2554	ret = xmlSaveUri(uri);
2555    }
2556
2557    xmlFreeURI(uri);
2558#else
2559    ret = xmlStrdup((const xmlChar *) path);
2560#endif
2561    return(ret);
2562}
2563
2564/**
2565 * xmlPathToURI:
2566 * @path:  the resource locator in a filesystem notation
2567 *
2568 * Constructs an URI expressing the existing path
2569 *
2570 * Returns a new URI, or a duplicate of the path parameter if the
2571 * construction fails. The caller is responsible for freeing the memory
2572 * occupied by the returned string. If there is insufficient memory available,
2573 * or the argument is NULL, the function returns NULL.
2574 */
2575xmlChar *
2576xmlPathToURI(const xmlChar *path)
2577{
2578    xmlURIPtr uri;
2579    xmlURI temp;
2580    xmlChar *ret, *cal;
2581
2582    if (path == NULL)
2583        return(NULL);
2584
2585    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2586	xmlFreeURI(uri);
2587	return xmlStrdup(path);
2588    }
2589    cal = xmlCanonicPath(path);
2590    if (cal == NULL)
2591        return(NULL);
2592#if defined(_WIN32) && !defined(__CYGWIN__)
2593    /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2594       If 'cal' is a valid URI allready then we are done here, as continuing would make
2595       it invalid. */
2596    if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2597	xmlFreeURI(uri);
2598	return cal;
2599    }
2600    /* 'cal' can contain a relative path with backslashes. If that is processed
2601       by xmlSaveURI, they will be escaped and the external entity loader machinery
2602       will fail. So convert them to slashes. Misuse 'ret' for walking. */
2603    ret = cal;
2604    while (*ret != '\0') {
2605	if (*ret == '\\')
2606	    *ret = '/';
2607	ret++;
2608    }
2609#endif
2610    memset(&temp, 0, sizeof(temp));
2611    temp.path = (char *) cal;
2612    ret = xmlSaveUri(&temp);
2613    xmlFree(cal);
2614    return(ret);
2615}
2616#define bottom_uri
2617#include "elfgcchack.h"
2618