1/** 2 * uri.c: set of generic URI related routines 3 * 4 * Reference: RFCs 2396, 2732 and 2373 5 * 6 * See Copyright for the status of this software. 7 * 8 * daniel@veillard.com 9 */ 10 11#define IN_LIBXML 12#include "libxml.h" 13 14#include <string.h> 15 16#include <libxml/xmlmemory.h> 17#include <libxml/uri.h> 18#include <libxml/globals.h> 19#include <libxml/xmlerror.h> 20 21/************************************************************************ 22 * * 23 * Macros to differentiate various character type * 24 * directly extracted from RFC 2396 * 25 * * 26 ************************************************************************/ 27 28/* 29 * alpha = lowalpha | upalpha 30 */ 31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) 32 33 34/* 35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | 36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | 37 * "u" | "v" | "w" | "x" | "y" | "z" 38 */ 39 40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) 41 42/* 43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | 44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | 45 * "U" | "V" | "W" | "X" | "Y" | "Z" 46 */ 47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) 48 49#ifdef IS_DIGIT 50#undef IS_DIGIT 51#endif 52/* 53 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 54 */ 55#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) 56 57/* 58 * alphanum = alpha | digit 59 */ 60 61#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) 62 63/* 64 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | 65 * "a" | "b" | "c" | "d" | "e" | "f" 66 */ 67 68#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \ 69 (((x) >= 'A') && ((x) <= 'F'))) 70 71/* 72 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 73 */ 74 75#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ 76 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ 77 ((x) == '(') || ((x) == ')')) 78 79 80/* 81 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | 82 * "[" | "]" 83 */ 84 85#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ 86 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ 87 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ 88 ((x) == ']')) 89 90/* 91 * unreserved = alphanum | mark 92 */ 93 94#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) 95 96/* 97 * escaped = "%" hex hex 98 */ 99 100#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \ 101 (IS_HEX((p)[2]))) 102 103/* 104 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | 105 * "&" | "=" | "+" | "$" | "," 106 */ 107#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\ 108 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\ 109 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\ 110 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ','))) 111 112/* 113 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | "," 114 */ 115#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ 116 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\ 117 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\ 118 ((*(p) == ','))) 119 120/* 121 * rel_segment = 1*( unreserved | escaped | 122 * ";" | "@" | "&" | "=" | "+" | "$" | "," ) 123 */ 124 125#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ 126 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \ 127 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \ 128 ((*(p) == ','))) 129 130/* 131 * scheme = alpha *( alpha | digit | "+" | "-" | "." ) 132 */ 133 134#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \ 135 ((x) == '+') || ((x) == '-') || ((x) == '.')) 136 137/* 138 * reg_name = 1*( unreserved | escaped | "$" | "," | 139 * ";" | ":" | "@" | "&" | "=" | "+" ) 140 */ 141 142#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ 143 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \ 144 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \ 145 ((*(p) == '=')) || ((*(p) == '+'))) 146 147/* 148 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" | 149 * "+" | "$" | "," ) 150 */ 151#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ 152 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \ 153 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \ 154 ((*(p) == ','))) 155 156/* 157 * uric = reserved | unreserved | escaped 158 */ 159 160#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ 161 (IS_RESERVED(*(p)))) 162 163/* 164* unwise = "{" | "}" | "|" | "\" | "^" | "`" 165*/ 166 167#define IS_UNWISE(p) \ 168 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ 169 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ 170 ((*(p) == ']')) || ((*(p) == '`'))) 171 172/* 173 * Skip to next pointer char, handle escaped sequences 174 */ 175 176#define NEXT(p) ((*p == '%')? p += 3 : p++) 177 178/* 179 * Productions from the spec. 180 * 181 * authority = server | reg_name 182 * reg_name = 1*( unreserved | escaped | "$" | "," | 183 * ";" | ":" | "@" | "&" | "=" | "+" ) 184 * 185 * path = [ abs_path | opaque_part ] 186 */ 187 188#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n)) 189 190/************************************************************************ 191 * * 192 * Generic URI structure functions * 193 * * 194 ************************************************************************/ 195 196/** 197 * xmlCreateURI: 198 * 199 * Simply creates an empty xmlURI 200 * 201 * Returns the new structure or NULL in case of error 202 */ 203xmlURIPtr 204xmlCreateURI(void) { 205 xmlURIPtr ret; 206 207 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); 208 if (ret == NULL) { 209 xmlGenericError(xmlGenericErrorContext, 210 "xmlCreateURI: out of memory\n"); 211 return(NULL); 212 } 213 memset(ret, 0, sizeof(xmlURI)); 214 return(ret); 215} 216 217/** 218 * xmlSaveUri: 219 * @uri: pointer to an xmlURI 220 * 221 * Save the URI as an escaped string 222 * 223 * Returns a new string (to be deallocated by caller) 224 */ 225xmlChar * 226xmlSaveUri(xmlURIPtr uri) { 227 xmlChar *ret = NULL; 228 const char *p; 229 int len; 230 int max; 231 232 if (uri == NULL) return(NULL); 233 234 235 max = 80; 236 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar)); 237 if (ret == NULL) { 238 xmlGenericError(xmlGenericErrorContext, 239 "xmlSaveUri: out of memory\n"); 240 return(NULL); 241 } 242 len = 0; 243 244 if (uri->scheme != NULL) { 245 p = uri->scheme; 246 while (*p != 0) { 247 if (len >= max) { 248 max *= 2; 249 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 250 if (ret == NULL) { 251 xmlGenericError(xmlGenericErrorContext, 252 "xmlSaveUri: out of memory\n"); 253 return(NULL); 254 } 255 } 256 ret[len++] = *p++; 257 } 258 if (len >= max) { 259 max *= 2; 260 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 261 if (ret == NULL) { 262 xmlGenericError(xmlGenericErrorContext, 263 "xmlSaveUri: out of memory\n"); 264 return(NULL); 265 } 266 } 267 ret[len++] = ':'; 268 } 269 if (uri->opaque != NULL) { 270 p = uri->opaque; 271 while (*p != 0) { 272 if (len + 3 >= max) { 273 max *= 2; 274 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 275 if (ret == NULL) { 276 xmlGenericError(xmlGenericErrorContext, 277 "xmlSaveUri: out of memory\n"); 278 return(NULL); 279 } 280 } 281 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) 282 ret[len++] = *p++; 283 else { 284 int val = *(unsigned char *)p++; 285 int hi = val / 0x10, lo = val % 0x10; 286 ret[len++] = '%'; 287 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 288 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 289 } 290 } 291 } else { 292 if (uri->server != NULL) { 293 if (len + 3 >= max) { 294 max *= 2; 295 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 296 if (ret == NULL) { 297 xmlGenericError(xmlGenericErrorContext, 298 "xmlSaveUri: out of memory\n"); 299 return(NULL); 300 } 301 } 302 ret[len++] = '/'; 303 ret[len++] = '/'; 304 if (uri->user != NULL) { 305 p = uri->user; 306 while (*p != 0) { 307 if (len + 3 >= max) { 308 max *= 2; 309 ret = (xmlChar *) xmlRealloc(ret, 310 (max + 1) * sizeof(xmlChar)); 311 if (ret == NULL) { 312 xmlGenericError(xmlGenericErrorContext, 313 "xmlSaveUri: out of memory\n"); 314 return(NULL); 315 } 316 } 317 if ((IS_UNRESERVED(*(p))) || 318 ((*(p) == ';')) || ((*(p) == ':')) || 319 ((*(p) == '&')) || ((*(p) == '=')) || 320 ((*(p) == '+')) || ((*(p) == '$')) || 321 ((*(p) == ','))) 322 ret[len++] = *p++; 323 else { 324 int val = *(unsigned char *)p++; 325 int hi = val / 0x10, lo = val % 0x10; 326 ret[len++] = '%'; 327 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 328 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 329 } 330 } 331 if (len + 3 >= max) { 332 max *= 2; 333 ret = (xmlChar *) xmlRealloc(ret, 334 (max + 1) * sizeof(xmlChar)); 335 if (ret == NULL) { 336 xmlGenericError(xmlGenericErrorContext, 337 "xmlSaveUri: out of memory\n"); 338 return(NULL); 339 } 340 } 341 ret[len++] = '@'; 342 } 343 p = uri->server; 344 while (*p != 0) { 345 if (len >= max) { 346 max *= 2; 347 ret = (xmlChar *) xmlRealloc(ret, 348 (max + 1) * sizeof(xmlChar)); 349 if (ret == NULL) { 350 xmlGenericError(xmlGenericErrorContext, 351 "xmlSaveUri: out of memory\n"); 352 return(NULL); 353 } 354 } 355 ret[len++] = *p++; 356 } 357 if (uri->port > 0) { 358 if (len + 10 >= max) { 359 max *= 2; 360 ret = (xmlChar *) xmlRealloc(ret, 361 (max + 1) * sizeof(xmlChar)); 362 if (ret == NULL) { 363 xmlGenericError(xmlGenericErrorContext, 364 "xmlSaveUri: out of memory\n"); 365 return(NULL); 366 } 367 } 368 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port); 369 } 370 } else if (uri->authority != NULL) { 371 if (len + 3 >= max) { 372 max *= 2; 373 ret = (xmlChar *) xmlRealloc(ret, 374 (max + 1) * sizeof(xmlChar)); 375 if (ret == NULL) { 376 xmlGenericError(xmlGenericErrorContext, 377 "xmlSaveUri: out of memory\n"); 378 return(NULL); 379 } 380 } 381 ret[len++] = '/'; 382 ret[len++] = '/'; 383 p = uri->authority; 384 while (*p != 0) { 385 if (len + 3 >= max) { 386 max *= 2; 387 ret = (xmlChar *) xmlRealloc(ret, 388 (max + 1) * sizeof(xmlChar)); 389 if (ret == NULL) { 390 xmlGenericError(xmlGenericErrorContext, 391 "xmlSaveUri: out of memory\n"); 392 return(NULL); 393 } 394 } 395 if ((IS_UNRESERVED(*(p))) || 396 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || 397 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || 398 ((*(p) == '=')) || ((*(p) == '+'))) 399 ret[len++] = *p++; 400 else { 401 int val = *(unsigned char *)p++; 402 int hi = val / 0x10, lo = val % 0x10; 403 ret[len++] = '%'; 404 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 405 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 406 } 407 } 408 } else if (uri->scheme != NULL) { 409 if (len + 3 >= max) { 410 max *= 2; 411 ret = (xmlChar *) xmlRealloc(ret, 412 (max + 1) * sizeof(xmlChar)); 413 if (ret == NULL) { 414 xmlGenericError(xmlGenericErrorContext, 415 "xmlSaveUri: out of memory\n"); 416 return(NULL); 417 } 418 } 419 ret[len++] = '/'; 420 ret[len++] = '/'; 421 } 422 if (uri->path != NULL) { 423 p = uri->path; 424 while (*p != 0) { 425 if (len + 3 >= max) { 426 max *= 2; 427 ret = (xmlChar *) xmlRealloc(ret, 428 (max + 1) * sizeof(xmlChar)); 429 if (ret == NULL) { 430 xmlGenericError(xmlGenericErrorContext, 431 "xmlSaveUri: out of memory\n"); 432 return(NULL); 433 } 434 } 435 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || 436 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || 437 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || 438 ((*(p) == ','))) 439 ret[len++] = *p++; 440 else { 441 int val = *(unsigned char *)p++; 442 int hi = val / 0x10, lo = val % 0x10; 443 ret[len++] = '%'; 444 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 445 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 446 } 447 } 448 } 449 if (uri->query != NULL) { 450 if (len + 3 >= max) { 451 max *= 2; 452 ret = (xmlChar *) xmlRealloc(ret, 453 (max + 1) * sizeof(xmlChar)); 454 if (ret == NULL) { 455 xmlGenericError(xmlGenericErrorContext, 456 "xmlSaveUri: out of memory\n"); 457 return(NULL); 458 } 459 } 460 ret[len++] = '?'; 461 p = uri->query; 462 while (*p != 0) { 463 if (len + 3 >= max) { 464 max *= 2; 465 ret = (xmlChar *) xmlRealloc(ret, 466 (max + 1) * sizeof(xmlChar)); 467 if (ret == NULL) { 468 xmlGenericError(xmlGenericErrorContext, 469 "xmlSaveUri: out of memory\n"); 470 return(NULL); 471 } 472 } 473 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 474 ret[len++] = *p++; 475 else { 476 int val = *(unsigned char *)p++; 477 int hi = val / 0x10, lo = val % 0x10; 478 ret[len++] = '%'; 479 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 480 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 481 } 482 } 483 } 484 } 485 if (uri->fragment != NULL) { 486 if (len + 3 >= max) { 487 max *= 2; 488 ret = (xmlChar *) xmlRealloc(ret, 489 (max + 1) * sizeof(xmlChar)); 490 if (ret == NULL) { 491 xmlGenericError(xmlGenericErrorContext, 492 "xmlSaveUri: out of memory\n"); 493 return(NULL); 494 } 495 } 496 ret[len++] = '#'; 497 p = uri->fragment; 498 while (*p != 0) { 499 if (len + 3 >= max) { 500 max *= 2; 501 ret = (xmlChar *) xmlRealloc(ret, 502 (max + 1) * sizeof(xmlChar)); 503 if (ret == NULL) { 504 xmlGenericError(xmlGenericErrorContext, 505 "xmlSaveUri: out of memory\n"); 506 return(NULL); 507 } 508 } 509 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 510 ret[len++] = *p++; 511 else { 512 int val = *(unsigned char *)p++; 513 int hi = val / 0x10, lo = val % 0x10; 514 ret[len++] = '%'; 515 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 516 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 517 } 518 } 519 } 520 if (len >= max) { 521 max *= 2; 522 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 523 if (ret == NULL) { 524 xmlGenericError(xmlGenericErrorContext, 525 "xmlSaveUri: out of memory\n"); 526 return(NULL); 527 } 528 } 529 ret[len++] = 0; 530 return(ret); 531} 532 533/** 534 * xmlPrintURI: 535 * @stream: a FILE* for the output 536 * @uri: pointer to an xmlURI 537 * 538 * Prints the URI in the stream @stream. 539 */ 540void 541xmlPrintURI(FILE *stream, xmlURIPtr uri) { 542 xmlChar *out; 543 544 out = xmlSaveUri(uri); 545 if (out != NULL) { 546 fprintf(stream, "%s", (char *) out); 547 xmlFree(out); 548 } 549} 550 551/** 552 * xmlCleanURI: 553 * @uri: pointer to an xmlURI 554 * 555 * Make sure the xmlURI struct is free of content 556 */ 557static void 558xmlCleanURI(xmlURIPtr uri) { 559 if (uri == NULL) return; 560 561 if (uri->scheme != NULL) xmlFree(uri->scheme); 562 uri->scheme = NULL; 563 if (uri->server != NULL) xmlFree(uri->server); 564 uri->server = NULL; 565 if (uri->user != NULL) xmlFree(uri->user); 566 uri->user = NULL; 567 if (uri->path != NULL) xmlFree(uri->path); 568 uri->path = NULL; 569 if (uri->fragment != NULL) xmlFree(uri->fragment); 570 uri->fragment = NULL; 571 if (uri->opaque != NULL) xmlFree(uri->opaque); 572 uri->opaque = NULL; 573 if (uri->authority != NULL) xmlFree(uri->authority); 574 uri->authority = NULL; 575 if (uri->query != NULL) xmlFree(uri->query); 576 uri->query = NULL; 577} 578 579/** 580 * xmlFreeURI: 581 * @uri: pointer to an xmlURI 582 * 583 * Free up the xmlURI struct 584 */ 585void 586xmlFreeURI(xmlURIPtr uri) { 587 if (uri == NULL) return; 588 589 if (uri->scheme != NULL) xmlFree(uri->scheme); 590 if (uri->server != NULL) xmlFree(uri->server); 591 if (uri->user != NULL) xmlFree(uri->user); 592 if (uri->path != NULL) xmlFree(uri->path); 593 if (uri->fragment != NULL) xmlFree(uri->fragment); 594 if (uri->opaque != NULL) xmlFree(uri->opaque); 595 if (uri->authority != NULL) xmlFree(uri->authority); 596 if (uri->query != NULL) xmlFree(uri->query); 597 xmlFree(uri); 598} 599 600/************************************************************************ 601 * * 602 * Helper functions * 603 * * 604 ************************************************************************/ 605 606/** 607 * xmlNormalizeURIPath: 608 * @path: pointer to the path string 609 * 610 * Applies the 5 normalization steps to a path string--that is, RFC 2396 611 * Section 5.2, steps 6.c through 6.g. 612 * 613 * Normalization occurs directly on the string, no new allocation is done 614 * 615 * Returns 0 or an error code 616 */ 617int 618xmlNormalizeURIPath(char *path) { 619 char *cur, *out; 620 621 if (path == NULL) 622 return(-1); 623 624 /* Skip all initial "/" chars. We want to get to the beginning of the 625 * first non-empty segment. 626 */ 627 cur = path; 628 while (cur[0] == '/') 629 ++cur; 630 if (cur[0] == '\0') 631 return(0); 632 633 /* Keep everything we've seen so far. */ 634 out = cur; 635 636 /* 637 * Analyze each segment in sequence for cases (c) and (d). 638 */ 639 while (cur[0] != '\0') { 640 /* 641 * c) All occurrences of "./", where "." is a complete path segment, 642 * are removed from the buffer string. 643 */ 644 if ((cur[0] == '.') && (cur[1] == '/')) { 645 cur += 2; 646 /* '//' normalization should be done at this point too */ 647 while (cur[0] == '/') 648 cur++; 649 continue; 650 } 651 652 /* 653 * d) If the buffer string ends with "." as a complete path segment, 654 * that "." is removed. 655 */ 656 if ((cur[0] == '.') && (cur[1] == '\0')) 657 break; 658 659 /* Otherwise keep the segment. */ 660 while (cur[0] != '/') { 661 if (cur[0] == '\0') 662 goto done_cd; 663 (out++)[0] = (cur++)[0]; 664 } 665 /* nomalize // */ 666 while ((cur[0] == '/') && (cur[1] == '/')) 667 cur++; 668 669 (out++)[0] = (cur++)[0]; 670 } 671 done_cd: 672 out[0] = '\0'; 673 674 /* Reset to the beginning of the first segment for the next sequence. */ 675 cur = path; 676 while (cur[0] == '/') 677 ++cur; 678 if (cur[0] == '\0') 679 return(0); 680 681 /* 682 * Analyze each segment in sequence for cases (e) and (f). 683 * 684 * e) All occurrences of "<segment>/../", where <segment> is a 685 * complete path segment not equal to "..", are removed from the 686 * buffer string. Removal of these path segments is performed 687 * iteratively, removing the leftmost matching pattern on each 688 * iteration, until no matching pattern remains. 689 * 690 * f) If the buffer string ends with "<segment>/..", where <segment> 691 * is a complete path segment not equal to "..", that 692 * "<segment>/.." is removed. 693 * 694 * To satisfy the "iterative" clause in (e), we need to collapse the 695 * string every time we find something that needs to be removed. Thus, 696 * we don't need to keep two pointers into the string: we only need a 697 * "current position" pointer. 698 */ 699 while (1) { 700 char *segp, *tmp; 701 702 /* At the beginning of each iteration of this loop, "cur" points to 703 * the first character of the segment we want to examine. 704 */ 705 706 /* Find the end of the current segment. */ 707 segp = cur; 708 while ((segp[0] != '/') && (segp[0] != '\0')) 709 ++segp; 710 711 /* If this is the last segment, we're done (we need at least two 712 * segments to meet the criteria for the (e) and (f) cases). 713 */ 714 if (segp[0] == '\0') 715 break; 716 717 /* If the first segment is "..", or if the next segment _isn't_ "..", 718 * keep this segment and try the next one. 719 */ 720 ++segp; 721 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) 722 || ((segp[0] != '.') || (segp[1] != '.') 723 || ((segp[2] != '/') && (segp[2] != '\0')))) { 724 cur = segp; 725 continue; 726 } 727 728 /* If we get here, remove this segment and the next one and back up 729 * to the previous segment (if there is one), to implement the 730 * "iteratively" clause. It's pretty much impossible to back up 731 * while maintaining two pointers into the buffer, so just compact 732 * the whole buffer now. 733 */ 734 735 /* If this is the end of the buffer, we're done. */ 736 if (segp[2] == '\0') { 737 cur[0] = '\0'; 738 break; 739 } 740 /* Valgrind complained, strcpy(cur, segp + 3); */ 741 /* string will overlap, do not use strcpy */ 742 tmp = cur; 743 segp += 3; 744 while ((*tmp++ = *segp++) != 0); 745 746 /* If there are no previous segments, then keep going from here. */ 747 segp = cur; 748 while ((segp > path) && ((--segp)[0] == '/')) 749 ; 750 if (segp == path) 751 continue; 752 753 /* "segp" is pointing to the end of a previous segment; find it's 754 * start. We need to back up to the previous segment and start 755 * over with that to handle things like "foo/bar/../..". If we 756 * don't do this, then on the first pass we'll remove the "bar/..", 757 * but be pointing at the second ".." so we won't realize we can also 758 * remove the "foo/..". 759 */ 760 cur = segp; 761 while ((cur > path) && (cur[-1] != '/')) 762 --cur; 763 } 764 out[0] = '\0'; 765 766 /* 767 * g) If the resulting buffer string still begins with one or more 768 * complete path segments of "..", then the reference is 769 * considered to be in error. Implementations may handle this 770 * error by retaining these components in the resolved path (i.e., 771 * treating them as part of the final URI), by removing them from 772 * the resolved path (i.e., discarding relative levels above the 773 * root), or by avoiding traversal of the reference. 774 * 775 * We discard them from the final path. 776 */ 777 if (path[0] == '/') { 778 cur = path; 779 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') 780 && ((cur[3] == '/') || (cur[3] == '\0'))) 781 cur += 3; 782 783 if (cur != path) { 784 out = path; 785 while (cur[0] != '\0') 786 (out++)[0] = (cur++)[0]; 787 out[0] = 0; 788 } 789 } 790 791 return(0); 792} 793 794static int is_hex(char c) { 795 if (((c >= '0') && (c <= '9')) || 796 ((c >= 'a') && (c <= 'f')) || 797 ((c >= 'A') && (c <= 'F'))) 798 return(1); 799 return(0); 800} 801 802/** 803 * xmlURIUnescapeString: 804 * @str: the string to unescape 805 * @len: the length in bytes to unescape (or <= 0 to indicate full string) 806 * @target: optional destination buffer 807 * 808 * Unescaping routine, does not do validity checks ! 809 * Output is direct unsigned char translation of %XX values (no encoding) 810 * 811 * Returns an copy of the string, but unescaped 812 */ 813char * 814xmlURIUnescapeString(const char *str, int len, char *target) { 815 char *ret, *out; 816 const char *in; 817 818 if (str == NULL) 819 return(NULL); 820 if (len <= 0) len = strlen(str); 821 if (len < 0) return(NULL); 822 823 if (target == NULL) { 824 ret = (char *) xmlMallocAtomic(len + 1); 825 if (ret == NULL) { 826 xmlGenericError(xmlGenericErrorContext, 827 "xmlURIUnescapeString: out of memory\n"); 828 return(NULL); 829 } 830 } else 831 ret = target; 832 in = str; 833 out = ret; 834 while(len > 0) { 835 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { 836 in++; 837 if ((*in >= '0') && (*in <= '9')) 838 *out = (*in - '0'); 839 else if ((*in >= 'a') && (*in <= 'f')) 840 *out = (*in - 'a') + 10; 841 else if ((*in >= 'A') && (*in <= 'F')) 842 *out = (*in - 'A') + 10; 843 in++; 844 if ((*in >= '0') && (*in <= '9')) 845 *out = *out * 16 + (*in - '0'); 846 else if ((*in >= 'a') && (*in <= 'f')) 847 *out = *out * 16 + (*in - 'a') + 10; 848 else if ((*in >= 'A') && (*in <= 'F')) 849 *out = *out * 16 + (*in - 'A') + 10; 850 in++; 851 len -= 3; 852 out++; 853 } else { 854 *out++ = *in++; 855 len--; 856 } 857 } 858 *out = 0; 859 return(ret); 860} 861 862/** 863 * xmlURIEscapeStr: 864 * @str: string to escape 865 * @list: exception list string of chars not to escape 866 * 867 * This routine escapes a string to hex, ignoring reserved characters (a-z) 868 * and the characters in the exception list. 869 * 870 * Returns a new escaped string or NULL in case of error. 871 */ 872xmlChar * 873xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) { 874 xmlChar *ret, ch; 875 const xmlChar *in; 876 877 unsigned int len, out; 878 879 if (str == NULL) 880 return(NULL); 881 if (str[0] == 0) 882 return(xmlStrdup(str)); 883 len = xmlStrlen(str); 884 if (!(len > 0)) return(NULL); 885 886 len += 20; 887 ret = (xmlChar *) xmlMallocAtomic(len); 888 if (ret == NULL) { 889 xmlGenericError(xmlGenericErrorContext, 890 "xmlURIEscapeStr: out of memory\n"); 891 return(NULL); 892 } 893 in = (const xmlChar *) str; 894 out = 0; 895 while(*in != 0) { 896 if (len - out <= 3) { 897 len += 20; 898 ret = (xmlChar *) xmlRealloc(ret, len); 899 if (ret == NULL) { 900 xmlGenericError(xmlGenericErrorContext, 901 "xmlURIEscapeStr: out of memory\n"); 902 return(NULL); 903 } 904 } 905 906 ch = *in; 907 908 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) { 909 unsigned char val; 910 ret[out++] = '%'; 911 val = ch >> 4; 912 if (val <= 9) 913 ret[out++] = '0' + val; 914 else 915 ret[out++] = 'A' + val - 0xA; 916 val = ch & 0xF; 917 if (val <= 9) 918 ret[out++] = '0' + val; 919 else 920 ret[out++] = 'A' + val - 0xA; 921 in++; 922 } else { 923 ret[out++] = *in++; 924 } 925 926 } 927 ret[out] = 0; 928 return(ret); 929} 930 931/** 932 * xmlURIEscape: 933 * @str: the string of the URI to escape 934 * 935 * Escaping routine, does not do validity checks ! 936 * It will try to escape the chars needing this, but this is heuristic 937 * based it's impossible to be sure. 938 * 939 * Returns an copy of the string, but escaped 940 * 941 * 25 May 2001 942 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly 943 * according to RFC2396. 944 * - Carl Douglas 945 */ 946xmlChar * 947xmlURIEscape(const xmlChar * str) 948{ 949 xmlChar *ret, *segment = NULL; 950 xmlURIPtr uri; 951 int ret2; 952 953#define NULLCHK(p) if(!p) { \ 954 xmlGenericError(xmlGenericErrorContext, \ 955 "xmlURIEscape: out of memory\n"); \ 956 return NULL; } 957 958 if (str == NULL) 959 return (NULL); 960 961 uri = xmlCreateURI(); 962 if (uri != NULL) { 963 /* 964 * Allow escaping errors in the unescaped form 965 */ 966 uri->cleanup = 1; 967 ret2 = xmlParseURIReference(uri, (const char *)str); 968 if (ret2) { 969 xmlFreeURI(uri); 970 return (NULL); 971 } 972 } 973 974 if (!uri) 975 return NULL; 976 977 ret = NULL; 978 979 if (uri->scheme) { 980 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); 981 NULLCHK(segment) 982 ret = xmlStrcat(ret, segment); 983 ret = xmlStrcat(ret, BAD_CAST ":"); 984 xmlFree(segment); 985 } 986 987 if (uri->authority) { 988 segment = 989 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@"); 990 NULLCHK(segment) 991 ret = xmlStrcat(ret, BAD_CAST "//"); 992 ret = xmlStrcat(ret, segment); 993 xmlFree(segment); 994 } 995 996 if (uri->user) { 997 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); 998 NULLCHK(segment) 999 ret = xmlStrcat(ret,BAD_CAST "//"); 1000 ret = xmlStrcat(ret, segment); 1001 ret = xmlStrcat(ret, BAD_CAST "@"); 1002 xmlFree(segment); 1003 } 1004 1005 if (uri->server) { 1006 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); 1007 NULLCHK(segment) 1008 if (uri->user == NULL) 1009 ret = xmlStrcat(ret, BAD_CAST "//"); 1010 ret = xmlStrcat(ret, segment); 1011 xmlFree(segment); 1012 } 1013 1014 if (uri->port) { 1015 xmlChar port[10]; 1016 1017 snprintf((char *) port, 10, "%d", uri->port); 1018 ret = xmlStrcat(ret, BAD_CAST ":"); 1019 ret = xmlStrcat(ret, port); 1020 } 1021 1022 if (uri->path) { 1023 segment = 1024 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;"); 1025 NULLCHK(segment) 1026 ret = xmlStrcat(ret, segment); 1027 xmlFree(segment); 1028 } 1029 1030 if (uri->query) { 1031 segment = 1032 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$"); 1033 NULLCHK(segment) 1034 ret = xmlStrcat(ret, BAD_CAST "?"); 1035 ret = xmlStrcat(ret, segment); 1036 xmlFree(segment); 1037 } 1038 1039 if (uri->opaque) { 1040 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST ""); 1041 NULLCHK(segment) 1042 ret = xmlStrcat(ret, segment); 1043 xmlFree(segment); 1044 } 1045 1046 if (uri->fragment) { 1047 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#"); 1048 NULLCHK(segment) 1049 ret = xmlStrcat(ret, BAD_CAST "#"); 1050 ret = xmlStrcat(ret, segment); 1051 xmlFree(segment); 1052 } 1053 1054 xmlFreeURI(uri); 1055#undef NULLCHK 1056 1057 return (ret); 1058} 1059 1060/************************************************************************ 1061 * * 1062 * Escaped URI parsing * 1063 * * 1064 ************************************************************************/ 1065 1066/** 1067 * xmlParseURIFragment: 1068 * @uri: pointer to an URI structure 1069 * @str: pointer to the string to analyze 1070 * 1071 * Parse an URI fragment string and fills in the appropriate fields 1072 * of the @uri structure. 1073 * 1074 * fragment = *uric 1075 * 1076 * Returns 0 or the error code 1077 */ 1078static int 1079xmlParseURIFragment(xmlURIPtr uri, const char **str) 1080{ 1081 const char *cur; 1082 1083 if (str == NULL) 1084 return (-1); 1085 1086 cur = *str; 1087 1088 while (IS_URIC(cur) || IS_UNWISE(cur)) 1089 NEXT(cur); 1090 if (uri != NULL) { 1091 if (uri->fragment != NULL) 1092 xmlFree(uri->fragment); 1093 if (uri->cleanup & 2) 1094 uri->fragment = STRNDUP(*str, cur - *str); 1095 else 1096 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); 1097 } 1098 *str = cur; 1099 return (0); 1100} 1101 1102/** 1103 * xmlParseURIQuery: 1104 * @uri: pointer to an URI structure 1105 * @str: pointer to the string to analyze 1106 * 1107 * Parse the query part of an URI 1108 * 1109 * query = *uric 1110 * 1111 * Returns 0 or the error code 1112 */ 1113static int 1114xmlParseURIQuery(xmlURIPtr uri, const char **str) 1115{ 1116 const char *cur; 1117 1118 if (str == NULL) 1119 return (-1); 1120 1121 cur = *str; 1122 1123 while ((IS_URIC(cur)) || 1124 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 1125 NEXT(cur); 1126 if (uri != NULL) { 1127 if (uri->query != NULL) 1128 xmlFree(uri->query); 1129 if (uri->cleanup & 2) 1130 uri->query = STRNDUP(*str, cur - *str); 1131 else 1132 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); 1133 } 1134 *str = cur; 1135 return (0); 1136} 1137 1138/** 1139 * xmlParseURIScheme: 1140 * @uri: pointer to an URI structure 1141 * @str: pointer to the string to analyze 1142 * 1143 * Parse an URI scheme 1144 * 1145 * scheme = alpha *( alpha | digit | "+" | "-" | "." ) 1146 * 1147 * Returns 0 or the error code 1148 */ 1149static int 1150xmlParseURIScheme(xmlURIPtr uri, const char **str) { 1151 const char *cur; 1152 1153 if (str == NULL) 1154 return(-1); 1155 1156 cur = *str; 1157 if (!IS_ALPHA(*cur)) 1158 return(2); 1159 cur++; 1160 while (IS_SCHEME(*cur)) cur++; 1161 if (uri != NULL) { 1162 if (uri->scheme != NULL) xmlFree(uri->scheme); 1163 uri->scheme = STRNDUP(*str, cur - *str); 1164 } 1165 *str = cur; 1166 return(0); 1167} 1168 1169/** 1170 * xmlParseURIOpaquePart: 1171 * @uri: pointer to an URI structure 1172 * @str: pointer to the string to analyze 1173 * 1174 * Parse an URI opaque part 1175 * 1176 * opaque_part = uric_no_slash *uric 1177 * 1178 * Returns 0 or the error code 1179 */ 1180static int 1181xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) 1182{ 1183 const char *cur; 1184 1185 if (str == NULL) 1186 return (-1); 1187 1188 cur = *str; 1189 if (!((IS_URIC_NO_SLASH(cur)) || 1190 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) { 1191 return (3); 1192 } 1193 NEXT(cur); 1194 while ((IS_URIC(cur)) || 1195 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 1196 NEXT(cur); 1197 if (uri != NULL) { 1198 if (uri->opaque != NULL) 1199 xmlFree(uri->opaque); 1200 if (uri->cleanup & 2) 1201 uri->opaque = STRNDUP(*str, cur - *str); 1202 else 1203 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL); 1204 } 1205 *str = cur; 1206 return (0); 1207} 1208 1209/** 1210 * xmlParseURIServer: 1211 * @uri: pointer to an URI structure 1212 * @str: pointer to the string to analyze 1213 * 1214 * Parse a server subpart of an URI, it's a finer grain analysis 1215 * of the authority part. 1216 * 1217 * server = [ [ userinfo "@" ] hostport ] 1218 * userinfo = *( unreserved | escaped | 1219 * ";" | ":" | "&" | "=" | "+" | "$" | "," ) 1220 * hostport = host [ ":" port ] 1221 * host = hostname | IPv4address | IPv6reference 1222 * hostname = *( domainlabel "." ) toplabel [ "." ] 1223 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 1224 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum 1225 * IPv6reference = "[" IPv6address "]" 1226 * IPv6address = hexpart [ ":" IPv4address ] 1227 * IPv4address = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit 1228 * hexpart = hexseq | hexseq "::" [ hexseq ]| "::" [ hexseq ] 1229 * hexseq = hex4 *( ":" hex4) 1230 * hex4 = 1*4hexdig 1231 * port = *digit 1232 * 1233 * Returns 0 or the error code 1234 */ 1235static int 1236xmlParseURIServer(xmlURIPtr uri, const char **str) { 1237 const char *cur; 1238 const char *host, *tmp; 1239 const int IPV4max = 4; 1240 const int IPV6max = 8; 1241 int oct; 1242 1243 if (str == NULL) 1244 return(-1); 1245 1246 cur = *str; 1247 1248 /* 1249 * is there a userinfo ? 1250 */ 1251 while (IS_USERINFO(cur)) NEXT(cur); 1252 if (*cur == '@') { 1253 if (uri != NULL) { 1254 if (uri->user != NULL) xmlFree(uri->user); 1255 if (uri->cleanup & 2) 1256 uri->path = STRNDUP(*str, cur - *str); 1257 else 1258 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); 1259 } 1260 cur++; 1261 } else { 1262 if (uri != NULL) { 1263 if (uri->user != NULL) xmlFree(uri->user); 1264 uri->user = NULL; 1265 } 1266 cur = *str; 1267 } 1268 /* 1269 * This can be empty in the case where there is no server 1270 */ 1271 host = cur; 1272 if (*cur == '/') { 1273 if (uri != NULL) { 1274 if (uri->authority != NULL) xmlFree(uri->authority); 1275 uri->authority = NULL; 1276 if (uri->server != NULL) xmlFree(uri->server); 1277 uri->server = NULL; 1278 uri->port = 0; 1279 } 1280 return(0); 1281 } 1282 /* 1283 * host part of hostport can denote an IPV4 address, an IPV6 address 1284 * or an unresolved name. Check the IP first, its easier to detect 1285 * errors if wrong one. 1286 * An IPV6 address must start with a '[' and end with a ']'. 1287 */ 1288 if (*cur == '[') { 1289 int compress=0; 1290 cur++; 1291 for (oct = 0; oct < IPV6max; ++oct) { 1292 if (*cur == ':') { 1293 if (compress) 1294 return(3); /* multiple compression attempted */ 1295 if (!oct) { /* initial char is compression */ 1296 if (*++cur != ':') 1297 return(3); 1298 } 1299 compress = 1; /* set compression-encountered flag */ 1300 cur++; /* skip over the second ':' */ 1301 continue; 1302 } 1303 while(IS_HEX(*cur)) cur++; 1304 if (oct == (IPV6max-1)) 1305 continue; 1306 if (*cur != ':') 1307 break; 1308 cur++; 1309 } 1310 if ((!compress) && (oct != IPV6max)) 1311 return(3); 1312 if (*cur != ']') 1313 return(3); 1314 if (uri != NULL) { 1315 if (uri->server != NULL) xmlFree(uri->server); 1316 uri->server = (char *)xmlStrndup((xmlChar *)host+1, 1317 (cur-host)-1); 1318 } 1319 cur++; 1320 } else { 1321 /* 1322 * Not IPV6, maybe IPV4 1323 */ 1324 for (oct = 0; oct < IPV4max; ++oct) { 1325 if (*cur == '.') 1326 return(3); /* e.g. http://.xml/ or http://18.29..30/ */ 1327 while(IS_DIGIT(*cur)) cur++; 1328 if (oct == (IPV4max-1)) 1329 continue; 1330 if (*cur != '.') 1331 break; 1332 cur++; 1333 } 1334 } 1335 if ((host[0] != '[') && (oct < IPV4max || (*cur == '.' && cur++) || 1336 IS_ALPHA(*cur))) { 1337 /* maybe host_name */ 1338 if (!IS_ALPHANUM(*cur)) 1339 return(4); /* e.g. http://xml.$oft */ 1340 do { 1341 do ++cur; while (IS_ALPHANUM(*cur)); 1342 if (*cur == '-') { 1343 --cur; 1344 if (*cur == '.') 1345 return(5); /* e.g. http://xml.-soft */ 1346 ++cur; 1347 continue; 1348 } 1349 if (*cur == '.') { 1350 --cur; 1351 if (*cur == '-') 1352 return(6); /* e.g. http://xml-.soft */ 1353 if (*cur == '.') 1354 return(7); /* e.g. http://xml..soft */ 1355 ++cur; 1356 continue; 1357 } 1358 break; 1359 } while (1); 1360 tmp = cur; 1361 if (tmp[-1] == '.') 1362 --tmp; /* e.g. http://xml.$Oft/ */ 1363 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp)); 1364 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp)) 1365 return(8); /* e.g. http://xmlsOft.0rg/ */ 1366 } 1367 if (uri != NULL) { 1368 if (uri->authority != NULL) xmlFree(uri->authority); 1369 uri->authority = NULL; 1370 if (host[0] != '[') { /* it's not an IPV6 addr */ 1371 if (uri->server != NULL) xmlFree(uri->server); 1372 if (uri->cleanup & 2) 1373 uri->server = STRNDUP(host, cur - host); 1374 else 1375 uri->server = xmlURIUnescapeString(host, cur - host, NULL); 1376 } 1377 } 1378 /* 1379 * finish by checking for a port presence. 1380 */ 1381 if (*cur == ':') { 1382 cur++; 1383 if (IS_DIGIT(*cur)) { 1384 if (uri != NULL) 1385 uri->port = 0; 1386 while (IS_DIGIT(*cur)) { 1387 if (uri != NULL) 1388 uri->port = uri->port * 10 + (*cur - '0'); 1389 cur++; 1390 } 1391 } 1392 } 1393 *str = cur; 1394 return(0); 1395} 1396 1397/** 1398 * xmlParseURIRelSegment: 1399 * @uri: pointer to an URI structure 1400 * @str: pointer to the string to analyze 1401 * 1402 * Parse an URI relative segment 1403 * 1404 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" | 1405 * "+" | "$" | "," ) 1406 * 1407 * Returns 0 or the error code 1408 */ 1409static int 1410xmlParseURIRelSegment(xmlURIPtr uri, const char **str) 1411{ 1412 const char *cur; 1413 1414 if (str == NULL) 1415 return (-1); 1416 1417 cur = *str; 1418 if (!((IS_SEGMENT(cur)) || 1419 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) { 1420 return (3); 1421 } 1422 NEXT(cur); 1423 while ((IS_SEGMENT(cur)) || 1424 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 1425 NEXT(cur); 1426 if (uri != NULL) { 1427 if (uri->path != NULL) 1428 xmlFree(uri->path); 1429 if (uri->cleanup & 2) 1430 uri->path = STRNDUP(*str, cur - *str); 1431 else 1432 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 1433 } 1434 *str = cur; 1435 return (0); 1436} 1437 1438/** 1439 * xmlParseURIPathSegments: 1440 * @uri: pointer to an URI structure 1441 * @str: pointer to the string to analyze 1442 * @slash: should we add a leading slash 1443 * 1444 * Parse an URI set of path segments 1445 * 1446 * path_segments = segment *( "/" segment ) 1447 * segment = *pchar *( ";" param ) 1448 * param = *pchar 1449 * 1450 * Returns 0 or the error code 1451 */ 1452static int 1453xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) 1454{ 1455 const char *cur; 1456 1457 if (str == NULL) 1458 return (-1); 1459 1460 cur = *str; 1461 1462 do { 1463 while ((IS_PCHAR(cur)) || 1464 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 1465 NEXT(cur); 1466 while (*cur == ';') { 1467 cur++; 1468 while ((IS_PCHAR(cur)) || 1469 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 1470 NEXT(cur); 1471 } 1472 if (*cur != '/') 1473 break; 1474 cur++; 1475 } while (1); 1476 if (uri != NULL) { 1477 int len, len2 = 0; 1478 char *path; 1479 1480 /* 1481 * Concat the set of path segments to the current path 1482 */ 1483 len = cur - *str; 1484 if (slash) 1485 len++; 1486 1487 if (uri->path != NULL) { 1488 len2 = strlen(uri->path); 1489 len += len2; 1490 } 1491 path = (char *) xmlMallocAtomic(len + 1); 1492 if (path == NULL) { 1493 xmlGenericError(xmlGenericErrorContext, 1494 "xmlParseURIPathSegments: out of memory\n"); 1495 *str = cur; 1496 return (-1); 1497 } 1498 if (uri->path != NULL) 1499 memcpy(path, uri->path, len2); 1500 if (slash) { 1501 path[len2] = '/'; 1502 len2++; 1503 } 1504 path[len2] = 0; 1505 if (cur - *str > 0) { 1506 if (uri->cleanup & 2) { 1507 memcpy(&path[len2], *str, cur - *str); 1508 path[len2 + (cur - *str)] = 0; 1509 } else 1510 xmlURIUnescapeString(*str, cur - *str, &path[len2]); 1511 } 1512 if (uri->path != NULL) 1513 xmlFree(uri->path); 1514 uri->path = path; 1515 } 1516 *str = cur; 1517 return (0); 1518} 1519 1520/** 1521 * xmlParseURIAuthority: 1522 * @uri: pointer to an URI structure 1523 * @str: pointer to the string to analyze 1524 * 1525 * Parse the authority part of an URI. 1526 * 1527 * authority = server | reg_name 1528 * server = [ [ userinfo "@" ] hostport ] 1529 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" | 1530 * "@" | "&" | "=" | "+" ) 1531 * 1532 * Note : this is completely ambiguous since reg_name is allowed to 1533 * use the full set of chars in use by server: 1534 * 1535 * 3.2.1. Registry-based Naming Authority 1536 * 1537 * The structure of a registry-based naming authority is specific 1538 * to the URI scheme, but constrained to the allowed characters 1539 * for an authority component. 1540 * 1541 * Returns 0 or the error code 1542 */ 1543static int 1544xmlParseURIAuthority(xmlURIPtr uri, const char **str) { 1545 const char *cur; 1546 int ret; 1547 1548 if (str == NULL) 1549 return(-1); 1550 1551 cur = *str; 1552 1553 /* 1554 * try first to parse it as a server string. 1555 */ 1556 ret = xmlParseURIServer(uri, str); 1557 if ((ret == 0) && (*str != NULL) && 1558 ((**str == 0) || (**str == '/') || (**str == '?'))) 1559 return(0); 1560 *str = cur; 1561 1562 /* 1563 * failed, fallback to reg_name 1564 */ 1565 if (!IS_REG_NAME(cur)) { 1566 return(5); 1567 } 1568 NEXT(cur); 1569 while (IS_REG_NAME(cur)) NEXT(cur); 1570 if (uri != NULL) { 1571 if (uri->server != NULL) xmlFree(uri->server); 1572 uri->server = NULL; 1573 if (uri->user != NULL) xmlFree(uri->user); 1574 uri->user = NULL; 1575 if (uri->authority != NULL) xmlFree(uri->authority); 1576 if (uri->cleanup & 2) 1577 uri->authority = STRNDUP(*str, cur - *str); 1578 else 1579 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL); 1580 } 1581 *str = cur; 1582 return(0); 1583} 1584 1585/** 1586 * xmlParseURIHierPart: 1587 * @uri: pointer to an URI structure 1588 * @str: pointer to the string to analyze 1589 * 1590 * Parse an URI hierarchical part 1591 * 1592 * hier_part = ( net_path | abs_path ) [ "?" query ] 1593 * abs_path = "/" path_segments 1594 * net_path = "//" authority [ abs_path ] 1595 * 1596 * Returns 0 or the error code 1597 */ 1598static int 1599xmlParseURIHierPart(xmlURIPtr uri, const char **str) { 1600 int ret; 1601 const char *cur; 1602 1603 if (str == NULL) 1604 return(-1); 1605 1606 cur = *str; 1607 1608 if ((cur[0] == '/') && (cur[1] == '/')) { 1609 cur += 2; 1610 ret = xmlParseURIAuthority(uri, &cur); 1611 if (ret != 0) 1612 return(ret); 1613 if (cur[0] == '/') { 1614 cur++; 1615 ret = xmlParseURIPathSegments(uri, &cur, 1); 1616 } 1617 } else if (cur[0] == '/') { 1618 cur++; 1619 ret = xmlParseURIPathSegments(uri, &cur, 1); 1620 } else { 1621 return(4); 1622 } 1623 if (ret != 0) 1624 return(ret); 1625 if (*cur == '?') { 1626 cur++; 1627 ret = xmlParseURIQuery(uri, &cur); 1628 if (ret != 0) 1629 return(ret); 1630 } 1631 *str = cur; 1632 return(0); 1633} 1634 1635/** 1636 * xmlParseAbsoluteURI: 1637 * @uri: pointer to an URI structure 1638 * @str: pointer to the string to analyze 1639 * 1640 * Parse an URI reference string and fills in the appropriate fields 1641 * of the @uri structure 1642 * 1643 * absoluteURI = scheme ":" ( hier_part | opaque_part ) 1644 * 1645 * Returns 0 or the error code 1646 */ 1647static int 1648xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) { 1649 int ret; 1650 const char *cur; 1651 1652 if (str == NULL) 1653 return(-1); 1654 1655 cur = *str; 1656 1657 ret = xmlParseURIScheme(uri, str); 1658 if (ret != 0) return(ret); 1659 if (**str != ':') { 1660 *str = cur; 1661 return(1); 1662 } 1663 (*str)++; 1664 if (**str == '/') 1665 return(xmlParseURIHierPart(uri, str)); 1666 return(xmlParseURIOpaquePart(uri, str)); 1667} 1668 1669/** 1670 * xmlParseRelativeURI: 1671 * @uri: pointer to an URI structure 1672 * @str: pointer to the string to analyze 1673 * 1674 * Parse an relative URI string and fills in the appropriate fields 1675 * of the @uri structure 1676 * 1677 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] 1678 * abs_path = "/" path_segments 1679 * net_path = "//" authority [ abs_path ] 1680 * rel_path = rel_segment [ abs_path ] 1681 * 1682 * Returns 0 or the error code 1683 */ 1684static int 1685xmlParseRelativeURI(xmlURIPtr uri, const char **str) { 1686 int ret = 0; 1687 const char *cur; 1688 1689 if (str == NULL) 1690 return(-1); 1691 1692 cur = *str; 1693 if ((cur[0] == '/') && (cur[1] == '/')) { 1694 cur += 2; 1695 ret = xmlParseURIAuthority(uri, &cur); 1696 if (ret != 0) 1697 return(ret); 1698 if (cur[0] == '/') { 1699 cur++; 1700 ret = xmlParseURIPathSegments(uri, &cur, 1); 1701 } 1702 } else if (cur[0] == '/') { 1703 cur++; 1704 ret = xmlParseURIPathSegments(uri, &cur, 1); 1705 } else if (cur[0] != '#' && cur[0] != '?') { 1706 ret = xmlParseURIRelSegment(uri, &cur); 1707 if (ret != 0) 1708 return(ret); 1709 if (cur[0] == '/') { 1710 cur++; 1711 ret = xmlParseURIPathSegments(uri, &cur, 1); 1712 } 1713 } 1714 if (ret != 0) 1715 return(ret); 1716 if (*cur == '?') { 1717 cur++; 1718 ret = xmlParseURIQuery(uri, &cur); 1719 if (ret != 0) 1720 return(ret); 1721 } 1722 *str = cur; 1723 return(ret); 1724} 1725 1726/** 1727 * xmlParseURIReference: 1728 * @uri: pointer to an URI structure 1729 * @str: the string to analyze 1730 * 1731 * Parse an URI reference string and fills in the appropriate fields 1732 * of the @uri structure 1733 * 1734 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 1735 * 1736 * Returns 0 or the error code 1737 */ 1738int 1739xmlParseURIReference(xmlURIPtr uri, const char *str) { 1740 int ret; 1741 const char *tmp = str; 1742 1743 if (str == NULL) 1744 return(-1); 1745 xmlCleanURI(uri); 1746 1747 /* 1748 * Try first to parse absolute refs, then fallback to relative if 1749 * it fails. 1750 */ 1751 ret = xmlParseAbsoluteURI(uri, &str); 1752 if (ret != 0) { 1753 xmlCleanURI(uri); 1754 str = tmp; 1755 ret = xmlParseRelativeURI(uri, &str); 1756 } 1757 if (ret != 0) { 1758 xmlCleanURI(uri); 1759 return(ret); 1760 } 1761 1762 if (*str == '#') { 1763 str++; 1764 ret = xmlParseURIFragment(uri, &str); 1765 if (ret != 0) return(ret); 1766 } 1767 if (*str != 0) { 1768 xmlCleanURI(uri); 1769 return(1); 1770 } 1771 return(0); 1772} 1773 1774/** 1775 * xmlParseURI: 1776 * @str: the URI string to analyze 1777 * 1778 * Parse an URI 1779 * 1780 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 1781 * 1782 * Returns a newly built xmlURIPtr or NULL in case of error 1783 */ 1784xmlURIPtr 1785xmlParseURI(const char *str) { 1786 xmlURIPtr uri; 1787 int ret; 1788 1789 if (str == NULL) 1790 return(NULL); 1791 uri = xmlCreateURI(); 1792 if (uri != NULL) { 1793 ret = xmlParseURIReference(uri, str); 1794 if (ret) { 1795 xmlFreeURI(uri); 1796 return(NULL); 1797 } 1798 } 1799 return(uri); 1800} 1801 1802/** 1803 * xmlParseURIRaw: 1804 * @str: the URI string to analyze 1805 * @raw: if 1 unescaping of URI pieces are disabled 1806 * 1807 * Parse an URI but allows to keep intact the original fragments. 1808 * 1809 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 1810 * 1811 * Returns a newly built xmlURIPtr or NULL in case of error 1812 */ 1813xmlURIPtr 1814xmlParseURIRaw(const char *str, int raw) { 1815 xmlURIPtr uri; 1816 int ret; 1817 1818 if (str == NULL) 1819 return(NULL); 1820 uri = xmlCreateURI(); 1821 if (uri != NULL) { 1822 if (raw) { 1823 uri->cleanup |= 2; 1824 } 1825 ret = xmlParseURIReference(uri, str); 1826 if (ret) { 1827 xmlFreeURI(uri); 1828 return(NULL); 1829 } 1830 } 1831 return(uri); 1832} 1833 1834/************************************************************************ 1835 * * 1836 * Public functions * 1837 * * 1838 ************************************************************************/ 1839 1840/** 1841 * xmlBuildURI: 1842 * @URI: the URI instance found in the document 1843 * @base: the base value 1844 * 1845 * Computes he final URI of the reference done by checking that 1846 * the given URI is valid, and building the final URI using the 1847 * base URI. This is processed according to section 5.2 of the 1848 * RFC 2396 1849 * 1850 * 5.2. Resolving Relative References to Absolute Form 1851 * 1852 * Returns a new URI string (to be freed by the caller) or NULL in case 1853 * of error. 1854 */ 1855xmlChar * 1856xmlBuildURI(const xmlChar *URI, const xmlChar *base) { 1857 xmlChar *val = NULL; 1858 int ret, len, indx, cur, out; 1859 xmlURIPtr ref = NULL; 1860 xmlURIPtr bas = NULL; 1861 xmlURIPtr res = NULL; 1862 1863 /* 1864 * 1) The URI reference is parsed into the potential four components and 1865 * fragment identifier, as described in Section 4.3. 1866 * 1867 * NOTE that a completely empty URI is treated by modern browsers 1868 * as a reference to "." rather than as a synonym for the current 1869 * URI. Should we do that here? 1870 */ 1871 if (URI == NULL) 1872 ret = -1; 1873 else { 1874 if (*URI) { 1875 ref = xmlCreateURI(); 1876 if (ref == NULL) 1877 goto done; 1878 ret = xmlParseURIReference(ref, (const char *) URI); 1879 } 1880 else 1881 ret = 0; 1882 } 1883 if (ret != 0) 1884 goto done; 1885 if ((ref != NULL) && (ref->scheme != NULL)) { 1886 /* 1887 * The URI is absolute don't modify. 1888 */ 1889 val = xmlStrdup(URI); 1890 goto done; 1891 } 1892 if (base == NULL) 1893 ret = -1; 1894 else { 1895 bas = xmlCreateURI(); 1896 if (bas == NULL) 1897 goto done; 1898 ret = xmlParseURIReference(bas, (const char *) base); 1899 } 1900 if (ret != 0) { 1901 if (ref) 1902 val = xmlSaveUri(ref); 1903 goto done; 1904 } 1905 if (ref == NULL) { 1906 /* 1907 * the base fragment must be ignored 1908 */ 1909 if (bas->fragment != NULL) { 1910 xmlFree(bas->fragment); 1911 bas->fragment = NULL; 1912 } 1913 val = xmlSaveUri(bas); 1914 goto done; 1915 } 1916 1917 /* 1918 * 2) If the path component is empty and the scheme, authority, and 1919 * query components are undefined, then it is a reference to the 1920 * current document and we are done. Otherwise, the reference URI's 1921 * query and fragment components are defined as found (or not found) 1922 * within the URI reference and not inherited from the base URI. 1923 * 1924 * NOTE that in modern browsers, the parsing differs from the above 1925 * in the following aspect: the query component is allowed to be 1926 * defined while still treating this as a reference to the current 1927 * document. 1928 */ 1929 res = xmlCreateURI(); 1930 if (res == NULL) 1931 goto done; 1932 if ((ref->scheme == NULL) && (ref->path == NULL) && 1933 ((ref->authority == NULL) && (ref->server == NULL))) { 1934 if (bas->scheme != NULL) 1935 res->scheme = xmlMemStrdup(bas->scheme); 1936 if (bas->authority != NULL) 1937 res->authority = xmlMemStrdup(bas->authority); 1938 else if (bas->server != NULL) { 1939 res->server = xmlMemStrdup(bas->server); 1940 if (bas->user != NULL) 1941 res->user = xmlMemStrdup(bas->user); 1942 res->port = bas->port; 1943 } 1944 if (bas->path != NULL) 1945 res->path = xmlMemStrdup(bas->path); 1946 if (ref->query != NULL) 1947 res->query = xmlMemStrdup(ref->query); 1948 else if (bas->query != NULL) 1949 res->query = xmlMemStrdup(bas->query); 1950 if (ref->fragment != NULL) 1951 res->fragment = xmlMemStrdup(ref->fragment); 1952 goto step_7; 1953 } 1954 1955 /* 1956 * 3) If the scheme component is defined, indicating that the reference 1957 * starts with a scheme name, then the reference is interpreted as an 1958 * absolute URI and we are done. Otherwise, the reference URI's 1959 * scheme is inherited from the base URI's scheme component. 1960 */ 1961 if (ref->scheme != NULL) { 1962 val = xmlSaveUri(ref); 1963 goto done; 1964 } 1965 if (bas->scheme != NULL) 1966 res->scheme = xmlMemStrdup(bas->scheme); 1967 1968 if (ref->query != NULL) 1969 res->query = xmlMemStrdup(ref->query); 1970 if (ref->fragment != NULL) 1971 res->fragment = xmlMemStrdup(ref->fragment); 1972 1973 /* 1974 * 4) If the authority component is defined, then the reference is a 1975 * network-path and we skip to step 7. Otherwise, the reference 1976 * URI's authority is inherited from the base URI's authority 1977 * component, which will also be undefined if the URI scheme does not 1978 * use an authority component. 1979 */ 1980 if ((ref->authority != NULL) || (ref->server != NULL)) { 1981 if (ref->authority != NULL) 1982 res->authority = xmlMemStrdup(ref->authority); 1983 else { 1984 res->server = xmlMemStrdup(ref->server); 1985 if (ref->user != NULL) 1986 res->user = xmlMemStrdup(ref->user); 1987 res->port = ref->port; 1988 } 1989 if (ref->path != NULL) 1990 res->path = xmlMemStrdup(ref->path); 1991 goto step_7; 1992 } 1993 if (bas->authority != NULL) 1994 res->authority = xmlMemStrdup(bas->authority); 1995 else if (bas->server != NULL) { 1996 res->server = xmlMemStrdup(bas->server); 1997 if (bas->user != NULL) 1998 res->user = xmlMemStrdup(bas->user); 1999 res->port = bas->port; 2000 } 2001 2002 /* 2003 * 5) If the path component begins with a slash character ("/"), then 2004 * the reference is an absolute-path and we skip to step 7. 2005 */ 2006 if ((ref->path != NULL) && (ref->path[0] == '/')) { 2007 res->path = xmlMemStrdup(ref->path); 2008 goto step_7; 2009 } 2010 2011 2012 /* 2013 * 6) If this step is reached, then we are resolving a relative-path 2014 * reference. The relative path needs to be merged with the base 2015 * URI's path. Although there are many ways to do this, we will 2016 * describe a simple method using a separate string buffer. 2017 * 2018 * Allocate a buffer large enough for the result string. 2019 */ 2020 len = 2; /* extra / and 0 */ 2021 if (ref->path != NULL) 2022 len += strlen(ref->path); 2023 if (bas->path != NULL) 2024 len += strlen(bas->path); 2025 res->path = (char *) xmlMallocAtomic(len); 2026 if (res->path == NULL) { 2027 xmlGenericError(xmlGenericErrorContext, 2028 "xmlBuildURI: out of memory\n"); 2029 goto done; 2030 } 2031 res->path[0] = 0; 2032 2033 /* 2034 * a) All but the last segment of the base URI's path component is 2035 * copied to the buffer. In other words, any characters after the 2036 * last (right-most) slash character, if any, are excluded. 2037 */ 2038 cur = 0; 2039 out = 0; 2040 if (bas->path != NULL) { 2041 while (bas->path[cur] != 0) { 2042 while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) 2043 cur++; 2044 if (bas->path[cur] == 0) 2045 break; 2046 2047 cur++; 2048 while (out < cur) { 2049 res->path[out] = bas->path[out]; 2050 out++; 2051 } 2052 } 2053 } 2054 res->path[out] = 0; 2055 2056 /* 2057 * b) The reference's path component is appended to the buffer 2058 * string. 2059 */ 2060 if (ref->path != NULL && ref->path[0] != 0) { 2061 indx = 0; 2062 /* 2063 * Ensure the path includes a '/' 2064 */ 2065 if ((out == 0) && (bas->server != NULL)) 2066 res->path[out++] = '/'; 2067 while (ref->path[indx] != 0) { 2068 res->path[out++] = ref->path[indx++]; 2069 } 2070 } 2071 res->path[out] = 0; 2072 2073 /* 2074 * Steps c) to h) are really path normalization steps 2075 */ 2076 xmlNormalizeURIPath(res->path); 2077 2078step_7: 2079 2080 /* 2081 * 7) The resulting URI components, including any inherited from the 2082 * base URI, are recombined to give the absolute form of the URI 2083 * reference. 2084 */ 2085 val = xmlSaveUri(res); 2086 2087done: 2088 if (ref != NULL) 2089 xmlFreeURI(ref); 2090 if (bas != NULL) 2091 xmlFreeURI(bas); 2092 if (res != NULL) 2093 xmlFreeURI(res); 2094 return(val); 2095} 2096 2097/** 2098 * xmlBuildRelativeURI: 2099 * @URI: the URI reference under consideration 2100 * @base: the base value 2101 * 2102 * Expresses the URI of the reference in terms relative to the 2103 * base. Some examples of this operation include: 2104 * base = "http://site1.com/docs/book1.html" 2105 * URI input URI returned 2106 * docs/pic1.gif pic1.gif 2107 * docs/img/pic1.gif img/pic1.gif 2108 * img/pic1.gif ../img/pic1.gif 2109 * http://site1.com/docs/pic1.gif pic1.gif 2110 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif 2111 * 2112 * base = "docs/book1.html" 2113 * URI input URI returned 2114 * docs/pic1.gif pic1.gif 2115 * docs/img/pic1.gif img/pic1.gif 2116 * img/pic1.gif ../img/pic1.gif 2117 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif 2118 * 2119 * 2120 * Note: if the URI reference is really wierd or complicated, it may be 2121 * worthwhile to first convert it into a "nice" one by calling 2122 * xmlBuildURI (using 'base') before calling this routine, 2123 * since this routine (for reasonable efficiency) assumes URI has 2124 * already been through some validation. 2125 * 2126 * Returns a new URI string (to be freed by the caller) or NULL in case 2127 * error. 2128 */ 2129xmlChar * 2130xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base) 2131{ 2132 xmlChar *val = NULL; 2133 int ret; 2134 int ix; 2135 int pos = 0; 2136 int nbslash = 0; 2137 int len; 2138 xmlURIPtr ref = NULL; 2139 xmlURIPtr bas = NULL; 2140 xmlChar *bptr, *uptr, *vptr; 2141 int remove_path = 0; 2142 2143 if ((URI == NULL) || (*URI == 0)) 2144 return NULL; 2145 2146 /* 2147 * First parse URI into a standard form 2148 */ 2149 ref = xmlCreateURI (); 2150 if (ref == NULL) 2151 return NULL; 2152 /* If URI not already in "relative" form */ 2153 if (URI[0] != '.') { 2154 ret = xmlParseURIReference (ref, (const char *) URI); 2155 if (ret != 0) 2156 goto done; /* Error in URI, return NULL */ 2157 } else 2158 ref->path = (char *)xmlStrdup(URI); 2159 2160 /* 2161 * Next parse base into the same standard form 2162 */ 2163 if ((base == NULL) || (*base == 0)) { 2164 val = xmlStrdup (URI); 2165 goto done; 2166 } 2167 bas = xmlCreateURI (); 2168 if (bas == NULL) 2169 goto done; 2170 if (base[0] != '.') { 2171 ret = xmlParseURIReference (bas, (const char *) base); 2172 if (ret != 0) 2173 goto done; /* Error in base, return NULL */ 2174 } else 2175 bas->path = (char *)xmlStrdup(base); 2176 2177 /* 2178 * If the scheme / server on the URI differs from the base, 2179 * just return the URI 2180 */ 2181 if ((ref->scheme != NULL) && 2182 ((bas->scheme == NULL) || 2183 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) || 2184 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) { 2185 val = xmlStrdup (URI); 2186 goto done; 2187 } 2188 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) { 2189 val = xmlStrdup(BAD_CAST ""); 2190 goto done; 2191 } 2192 if (bas->path == NULL) { 2193 val = xmlStrdup((xmlChar *)ref->path); 2194 goto done; 2195 } 2196 if (ref->path == NULL) { 2197 ref->path = (char *) "/"; 2198 remove_path = 1; 2199 } 2200 2201 /* 2202 * At this point (at last!) we can compare the two paths 2203 * 2204 * First we take care of the special case where either of the 2205 * two path components may be missing (bug 316224) 2206 */ 2207 if (bas->path == NULL) { 2208 if (ref->path != NULL) { 2209 uptr = (xmlChar *) ref->path; 2210 if (*uptr == '/') 2211 uptr++; 2212 val = xmlStrdup(uptr); 2213 } 2214 goto done; 2215 } 2216 bptr = (xmlChar *)bas->path; 2217 if (ref->path == NULL) { 2218 for (ix = 0; bptr[ix] != 0; ix++) { 2219 if (bptr[ix] == '/') 2220 nbslash++; 2221 } 2222 uptr = NULL; 2223 len = 1; /* this is for a string terminator only */ 2224 } else { 2225 /* 2226 * Next we compare the two strings and find where they first differ 2227 */ 2228 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/')) 2229 pos += 2; 2230 if ((*bptr == '.') && (bptr[1] == '/')) 2231 bptr += 2; 2232 else if ((*bptr == '/') && (ref->path[pos] != '/')) 2233 bptr++; 2234 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) 2235 pos++; 2236 2237 if (bptr[pos] == ref->path[pos]) { 2238 val = xmlStrdup(BAD_CAST ""); 2239 goto done; /* (I can't imagine why anyone would do this) */ 2240 } 2241 2242 /* 2243 * In URI, "back up" to the last '/' encountered. This will be the 2244 * beginning of the "unique" suffix of URI 2245 */ 2246 ix = pos; 2247 if ((ref->path[ix] == '/') && (ix > 0)) 2248 ix--; 2249 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/')) 2250 ix -= 2; 2251 for (; ix > 0; ix--) { 2252 if (ref->path[ix] == '/') 2253 break; 2254 } 2255 if (ix == 0) { 2256 uptr = (xmlChar *)ref->path; 2257 } else { 2258 ix++; 2259 uptr = (xmlChar *)&ref->path[ix]; 2260 } 2261 2262 /* 2263 * In base, count the number of '/' from the differing point 2264 */ 2265 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */ 2266 for (; bptr[ix] != 0; ix++) { 2267 if (bptr[ix] == '/') 2268 nbslash++; 2269 } 2270 } 2271 len = xmlStrlen (uptr) + 1; 2272 } 2273 2274 if (nbslash == 0) { 2275 if (uptr != NULL) 2276 val = xmlStrdup (uptr); 2277 goto done; 2278 } 2279 2280 /* 2281 * Allocate just enough space for the returned string - 2282 * length of the remainder of the URI, plus enough space 2283 * for the "../" groups, plus one for the terminator 2284 */ 2285 val = (xmlChar *) xmlMalloc (len + 3 * nbslash); 2286 if (val == NULL) { 2287 xmlGenericError(xmlGenericErrorContext, 2288 "xmlBuildRelativeURI: out of memory\n"); 2289 goto done; 2290 } 2291 vptr = val; 2292 /* 2293 * Put in as many "../" as needed 2294 */ 2295 for (; nbslash>0; nbslash--) { 2296 *vptr++ = '.'; 2297 *vptr++ = '.'; 2298 *vptr++ = '/'; 2299 } 2300 /* 2301 * Finish up with the end of the URI 2302 */ 2303 if (uptr != NULL) { 2304 if ((vptr > val) && (len > 0) && 2305 (uptr[0] == '/') && (vptr[-1] == '/')) { 2306 memcpy (vptr, uptr + 1, len - 1); 2307 vptr[len - 2] = 0; 2308 } else { 2309 memcpy (vptr, uptr, len); 2310 vptr[len - 1] = 0; 2311 } 2312 } else { 2313 vptr[len - 1] = 0; 2314 } 2315 2316done: 2317 /* 2318 * Free the working variables 2319 */ 2320 if (remove_path != 0) 2321 ref->path = NULL; 2322 if (ref != NULL) 2323 xmlFreeURI (ref); 2324 if (bas != NULL) 2325 xmlFreeURI (bas); 2326 2327 return val; 2328} 2329 2330/** 2331 * xmlCanonicPath: 2332 * @path: the resource locator in a filesystem notation 2333 * 2334 * Constructs a canonic path from the specified path. 2335 * 2336 * Returns a new canonic path, or a duplicate of the path parameter if the 2337 * construction fails. The caller is responsible for freeing the memory occupied 2338 * by the returned string. If there is insufficient memory available, or the 2339 * argument is NULL, the function returns NULL. 2340 */ 2341#define IS_WINDOWS_PATH(p) \ 2342 ((p != NULL) && \ 2343 (((p[0] >= 'a') && (p[0] <= 'z')) || \ 2344 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \ 2345 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\'))) 2346xmlChar * 2347xmlCanonicPath(const xmlChar *path) 2348{ 2349#if defined(_WIN32) && !defined(__CYGWIN__) 2350 int len = 0; 2351 int i = 0; 2352 xmlChar *p = NULL; 2353#endif 2354 xmlURIPtr uri; 2355 xmlChar *ret; 2356 const xmlChar *absuri; 2357 2358 if (path == NULL) 2359 return(NULL); 2360 if ((uri = xmlParseURI((const char *) path)) != NULL) { 2361 xmlFreeURI(uri); 2362 return xmlStrdup(path); 2363 } 2364 2365 absuri = xmlStrstr(path, BAD_CAST "://"); 2366 if (absuri != NULL) { 2367 int l, j; 2368 unsigned char c; 2369 xmlChar *escURI; 2370 2371 /* 2372 * this looks like an URI where some parts have not been 2373 * escaped leading to a parsing problem check that the first 2374 * part matches a protocol. 2375 */ 2376 l = absuri - path; 2377 if ((l <= 0) || (l > 20)) 2378 goto path_processing; 2379 for (j = 0;j < l;j++) { 2380 c = path[j]; 2381 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))) 2382 goto path_processing; 2383 } 2384 2385 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;="); 2386 if (escURI != NULL) { 2387 uri = xmlParseURI((const char *) escURI); 2388 if (uri != NULL) { 2389 xmlFreeURI(uri); 2390 return escURI; 2391 } 2392 xmlFreeURI(uri); 2393 } 2394 } 2395 2396path_processing: 2397#if defined(_WIN32) && !defined(__CYGWIN__) 2398 /* 2399 * This really need to be cleaned up by someone with a Windows box 2400 */ 2401 uri = xmlCreateURI(); 2402 if (uri == NULL) { 2403 return(NULL); 2404 } 2405 2406 len = xmlStrlen(path); 2407 if ((len > 2) && IS_WINDOWS_PATH(path)) { 2408 uri->scheme = xmlStrdup(BAD_CAST "file"); 2409 uri->path = xmlMallocAtomic(len + 2); 2410 if (uri->path == NULL) { 2411 xmlFreeURI(uri); 2412 return(NULL); 2413 } 2414 uri->path[0] = '/'; 2415 p = uri->path + 1; 2416 strncpy(p, path, len + 1); 2417 } else { 2418 uri->path = xmlStrdup(path); 2419 if (uri->path == NULL) { 2420 xmlFreeURI(uri); 2421 return(NULL); 2422 } 2423 p = uri->path; 2424 } 2425 while (*p != '\0') { 2426 if (*p == '\\') 2427 *p = '/'; 2428 p++; 2429 } 2430 if (uri->path == NULL) { 2431 xmlFreeURI(uri); 2432 return(NULL); 2433 } 2434 2435 if (uri->scheme == NULL) { 2436 ret = xmlStrdup((const xmlChar *) path); 2437 } else { 2438 ret = xmlSaveUri(uri); 2439 } 2440 2441 xmlFreeURI(uri); 2442#else 2443 ret = xmlStrdup((const xmlChar *) path); 2444#endif 2445 return(ret); 2446} 2447 2448/** 2449 * xmlPathToURI: 2450 * @path: the resource locator in a filesystem notation 2451 * 2452 * Constructs an URI expressing the existing path 2453 * 2454 * Returns a new URI, or a duplicate of the path parameter if the 2455 * construction fails. The caller is responsible for freeing the memory 2456 * occupied by the returned string. If there is insufficient memory available, 2457 * or the argument is NULL, the function returns NULL. 2458 */ 2459xmlChar * 2460xmlPathToURI(const xmlChar *path) 2461{ 2462 xmlURIPtr uri; 2463 xmlURI temp; 2464 xmlChar *ret, *cal; 2465 2466 if (path == NULL) 2467 return(NULL); 2468 2469 if ((uri = xmlParseURI((const char *) path)) != NULL) { 2470 xmlFreeURI(uri); 2471 return xmlStrdup(path); 2472 } 2473 cal = xmlCanonicPath(path); 2474 if (cal == NULL) 2475 return(NULL); 2476 memset(&temp, 0, sizeof(temp)); 2477 temp.path = (char *) cal; 2478 ret = xmlSaveUri(&temp); 2479 xmlFree(cal); 2480 return(ret); 2481} 2482#define bottom_uri 2483#include "elfgcchack.h" 2484