1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * apr_uri.c: URI related utility things 19 * 20 */ 21 22#include <stdlib.h> 23 24#include "apu.h" 25#include "apr.h" 26#include "apr_general.h" 27#include "apr_strings.h" 28 29#define APR_WANT_STRFUNC 30#include "apr_want.h" 31 32#include "apr_uri.h" 33 34typedef struct schemes_t schemes_t; 35 36/** Structure to store various schemes and their default ports */ 37struct schemes_t { 38 /** The name of the scheme */ 39 const char *name; 40 /** The default port for the scheme */ 41 apr_port_t default_port; 42}; 43 44/* Some WWW schemes and their default ports; this is basically /etc/services */ 45/* This will become global when the protocol abstraction comes */ 46/* As the schemes are searched by a linear search, */ 47/* they are sorted by their expected frequency */ 48static schemes_t schemes[] = 49{ 50 {"http", APR_URI_HTTP_DEFAULT_PORT}, 51 {"ftp", APR_URI_FTP_DEFAULT_PORT}, 52 {"https", APR_URI_HTTPS_DEFAULT_PORT}, 53 {"gopher", APR_URI_GOPHER_DEFAULT_PORT}, 54 {"ldap", APR_URI_LDAP_DEFAULT_PORT}, 55 {"nntp", APR_URI_NNTP_DEFAULT_PORT}, 56 {"snews", APR_URI_SNEWS_DEFAULT_PORT}, 57 {"imap", APR_URI_IMAP_DEFAULT_PORT}, 58 {"pop", APR_URI_POP_DEFAULT_PORT}, 59 {"sip", APR_URI_SIP_DEFAULT_PORT}, 60 {"rtsp", APR_URI_RTSP_DEFAULT_PORT}, 61 {"wais", APR_URI_WAIS_DEFAULT_PORT}, 62 {"z39.50r", APR_URI_WAIS_DEFAULT_PORT}, 63 {"z39.50s", APR_URI_WAIS_DEFAULT_PORT}, 64 {"prospero", APR_URI_PROSPERO_DEFAULT_PORT}, 65 {"nfs", APR_URI_NFS_DEFAULT_PORT}, 66 {"tip", APR_URI_TIP_DEFAULT_PORT}, 67 {"acap", APR_URI_ACAP_DEFAULT_PORT}, 68 {"telnet", APR_URI_TELNET_DEFAULT_PORT}, 69 {"ssh", APR_URI_SSH_DEFAULT_PORT}, 70 { NULL, 0xFFFF } /* unknown port */ 71}; 72 73APU_DECLARE(apr_port_t) apr_uri_port_of_scheme(const char *scheme_str) 74{ 75 schemes_t *scheme; 76 77 if (scheme_str) { 78 for (scheme = schemes; scheme->name != NULL; ++scheme) { 79 if (strcasecmp(scheme_str, scheme->name) == 0) { 80 return scheme->default_port; 81 } 82 } 83 } 84 return 0; 85} 86 87/* Unparse a apr_uri_t structure to an URI string. 88 * Optionally suppress the password for security reasons. 89 */ 90APU_DECLARE(char *) apr_uri_unparse(apr_pool_t *p, 91 const apr_uri_t *uptr, 92 unsigned flags) 93{ 94 char *ret = ""; 95 96 /* If suppressing the site part, omit both user name & scheme://hostname */ 97 if (!(flags & APR_URI_UNP_OMITSITEPART)) { 98 99 /* Construct a "user:password@" string, honoring the passed 100 * APR_URI_UNP_ flags: */ 101 if (uptr->user || uptr->password) { 102 ret = apr_pstrcat(p, 103 (uptr->user && !(flags & APR_URI_UNP_OMITUSER)) 104 ? uptr->user : "", 105 (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD)) 106 ? ":" : "", 107 (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD)) 108 ? ((flags & APR_URI_UNP_REVEALPASSWORD) 109 ? uptr->password : "XXXXXXXX") 110 : "", 111 ((uptr->user && !(flags & APR_URI_UNP_OMITUSER)) || 112 (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD))) 113 ? "@" : "", 114 NULL); 115 } 116 117 /* Construct scheme://site string */ 118 if (uptr->hostname) { 119 int is_default_port; 120 const char *lbrk = "", *rbrk = ""; 121 122 if (strchr(uptr->hostname, ':')) { /* v6 literal */ 123 lbrk = "["; 124 rbrk = "]"; 125 } 126 127 is_default_port = 128 (uptr->port_str == NULL || 129 uptr->port == 0 || 130 uptr->port == apr_uri_port_of_scheme(uptr->scheme)); 131 132 ret = apr_pstrcat(p, "//", ret, lbrk, uptr->hostname, rbrk, 133 is_default_port ? "" : ":", 134 is_default_port ? "" : uptr->port_str, 135 NULL); 136 } 137 if (uptr->scheme) { 138 ret = apr_pstrcat(p, uptr->scheme, ":", ret, NULL); 139 } 140 } 141 142 /* Should we suppress all path info? */ 143 if (!(flags & APR_URI_UNP_OMITPATHINFO)) { 144 /* Append path, query and fragment strings: */ 145 ret = apr_pstrcat(p, 146 ret, 147 (uptr->path) 148 ? uptr->path : "", 149 (uptr->query && !(flags & APR_URI_UNP_OMITQUERY)) 150 ? "?" : "", 151 (uptr->query && !(flags & APR_URI_UNP_OMITQUERY)) 152 ? uptr->query : "", 153 (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY)) 154 ? "#" : NULL, 155 (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY)) 156 ? uptr->fragment : NULL, 157 NULL); 158 } 159 return ret; 160} 161 162/* Here is the hand-optimized parse_uri_components(). There are some wild 163 * tricks we could pull in assembly language that we don't pull here... like we 164 * can do word-at-time scans for delimiter characters using the same technique 165 * that fast memchr()s use. But that would be way non-portable. -djg 166 */ 167 168/* We have a apr_table_t that we can index by character and it tells us if the 169 * character is one of the interesting delimiters. Note that we even get 170 * compares for NUL for free -- it's just another delimiter. 171 */ 172 173#define T_SLASH 0x01 /* '/' */ 174#define T_QUESTION 0x02 /* '?' */ 175#define T_HASH 0x04 /* '#' */ 176#define T_ALPHA 0x08 /* 'A' ... 'Z', 'a' ... 'z' */ 177#define T_SCHEME 0x10 /* '0' ... '9', '-', '+', '.' 178 * (allowed in scheme except first char) 179 */ 180#define T_NUL 0x80 /* '\0' */ 181 182#if APR_CHARSET_EBCDIC 183/* Delimiter table for the EBCDIC character set */ 184static const unsigned char uri_delims[256] = { 185 T_NUL, /* 0x00 */ 186 0, /* 0x01 */ 187 0, /* 0x02 */ 188 0, /* 0x03 */ 189 0, /* 0x04 */ 190 0, /* 0x05 */ 191 0, /* 0x06 */ 192 0, /* 0x07 */ 193 0, /* 0x08 */ 194 0, /* 0x09 */ 195 0, /* 0x0a */ 196 0, /* 0x0b */ 197 0, /* 0x0c */ 198 0, /* 0x0d */ 199 0, /* 0x0e */ 200 0, /* 0x0f */ 201 0, /* 0x10 */ 202 0, /* 0x11 */ 203 0, /* 0x12 */ 204 0, /* 0x13 */ 205 0, /* 0x14 */ 206 0, /* 0x15 */ 207 0, /* 0x16 */ 208 0, /* 0x17 */ 209 0, /* 0x18 */ 210 0, /* 0x19 */ 211 0, /* 0x1a */ 212 0, /* 0x1b */ 213 0, /* 0x1c */ 214 0, /* 0x1d */ 215 0, /* 0x1e */ 216 0, /* 0x1f */ 217 0, /* 0x20 */ 218 0, /* 0x21 */ 219 0, /* 0x22 */ 220 0, /* 0x23 */ 221 0, /* 0x24 */ 222 0, /* 0x25 */ 223 0, /* 0x26 */ 224 0, /* 0x27 */ 225 0, /* 0x28 */ 226 0, /* 0x29 */ 227 0, /* 0x2a */ 228 0, /* 0x2b */ 229 0, /* 0x2c */ 230 0, /* 0x2d */ 231 0, /* 0x2e */ 232 0, /* 0x2f */ 233 0, /* 0x30 */ 234 0, /* 0x31 */ 235 0, /* 0x32 */ 236 0, /* 0x33 */ 237 0, /* 0x34 */ 238 0, /* 0x35 */ 239 0, /* 0x36 */ 240 0, /* 0x37 */ 241 0, /* 0x38 */ 242 0, /* 0x39 */ 243 0, /* 0x3a */ 244 0, /* 0x3b */ 245 0, /* 0x3c */ 246 0, /* 0x3d */ 247 0, /* 0x3e */ 248 0, /* 0x3f */ 249 0, /* 0x40 ' ' */ 250 0, /* 0x41 */ 251 0, /* 0x42 */ 252 0, /* 0x43 */ 253 0, /* 0x44 */ 254 0, /* 0x45 */ 255 0, /* 0x46 */ 256 0, /* 0x47 */ 257 0, /* 0x48 */ 258 0, /* 0x49 */ 259 0, /* 0x4a '[' */ 260 T_SCHEME, /* 0x4b '.' */ 261 0, /* 0x4c '<' */ 262 0, /* 0x4d '(' */ 263 T_SCHEME, /* 0x4e '+' */ 264 0, /* 0x4f '!' */ 265 0, /* 0x50 '&' */ 266 0, /* 0x51 */ 267 0, /* 0x52 */ 268 0, /* 0x53 */ 269 0, /* 0x54 */ 270 0, /* 0x55 */ 271 0, /* 0x56 */ 272 0, /* 0x57 */ 273 0, /* 0x58 */ 274 0, /* 0x59 */ 275 0, /* 0x5a ']' */ 276 0, /* 0x5b '$' */ 277 0, /* 0x5c '*' */ 278 0, /* 0x5d ')' */ 279 0, /* 0x5e ';' */ 280 0, /* 0x5f '^' */ 281 T_SCHEME, /* 0x60 '-' */ 282 T_SLASH, /* 0x61 '/' */ 283 0, /* 0x62 */ 284 0, /* 0x63 */ 285 0, /* 0x64 */ 286 0, /* 0x65 */ 287 0, /* 0x66 */ 288 0, /* 0x67 */ 289 0, /* 0x68 */ 290 0, /* 0x69 */ 291 0, /* 0x6a '|' */ 292 0, /* 0x6b ',' */ 293 0, /* 0x6c '%' */ 294 0, /* 0x6d '_' */ 295 0, /* 0x6e '>' */ 296 T_QUESTION, /* 0x6f '?' */ 297 0, /* 0x70 */ 298 0, /* 0x71 */ 299 0, /* 0x72 */ 300 0, /* 0x73 */ 301 0, /* 0x74 */ 302 0, /* 0x75 */ 303 0, /* 0x76 */ 304 0, /* 0x77 */ 305 0, /* 0x78 */ 306 0, /* 0x79 '`' */ 307 0, /* 0x7a ':' */ 308 T_HASH, /* 0x7b '#' */ 309 0, /* 0x7c '@' */ 310 0, /* 0x7d ''' */ 311 0, /* 0x7e '=' */ 312 0, /* 0x7f '"' */ 313 0, /* 0x80 */ 314 T_ALPHA, /* 0x81 'a' */ 315 T_ALPHA, /* 0x82 'b' */ 316 T_ALPHA, /* 0x83 'c' */ 317 T_ALPHA, /* 0x84 'd' */ 318 T_ALPHA, /* 0x85 'e' */ 319 T_ALPHA, /* 0x86 'f' */ 320 T_ALPHA, /* 0x87 'g' */ 321 T_ALPHA, /* 0x88 'h' */ 322 T_ALPHA, /* 0x89 'i' */ 323 0, /* 0x8a */ 324 0, /* 0x8b */ 325 0, /* 0x8c */ 326 0, /* 0x8d */ 327 0, /* 0x8e */ 328 0, /* 0x8f */ 329 0, /* 0x90 */ 330 T_ALPHA, /* 0x91 'j' */ 331 T_ALPHA, /* 0x92 'k' */ 332 T_ALPHA, /* 0x93 'l' */ 333 T_ALPHA, /* 0x94 'm' */ 334 T_ALPHA, /* 0x95 'n' */ 335 T_ALPHA, /* 0x96 'o' */ 336 T_ALPHA, /* 0x97 'p' */ 337 T_ALPHA, /* 0x98 'q' */ 338 T_ALPHA, /* 0x99 'r' */ 339 0, /* 0x9a */ 340 0, /* 0x9b */ 341 0, /* 0x9c */ 342 0, /* 0x9d */ 343 0, /* 0x9e */ 344 0, /* 0x9f */ 345 0, /* 0xa0 */ 346 0, /* 0xa1 '~' */ 347 T_ALPHA, /* 0xa2 's' */ 348 T_ALPHA, /* 0xa3 't' */ 349 T_ALPHA, /* 0xa4 'u' */ 350 T_ALPHA, /* 0xa5 'v' */ 351 T_ALPHA, /* 0xa6 'w' */ 352 T_ALPHA, /* 0xa7 'x' */ 353 T_ALPHA, /* 0xa8 'y' */ 354 T_ALPHA, /* 0xa9 'z' */ 355 0, /* 0xaa */ 356 0, /* 0xab */ 357 0, /* 0xac */ 358 0, /* 0xad */ 359 0, /* 0xae */ 360 0, /* 0xaf */ 361 0, /* 0xb0 */ 362 0, /* 0xb1 */ 363 0, /* 0xb2 */ 364 0, /* 0xb3 */ 365 0, /* 0xb4 */ 366 0, /* 0xb5 */ 367 0, /* 0xb6 */ 368 0, /* 0xb7 */ 369 0, /* 0xb8 */ 370 0, /* 0xb9 */ 371 0, /* 0xba */ 372 0, /* 0xbb */ 373 0, /* 0xbc */ 374 0, /* 0xbd */ 375 0, /* 0xbe */ 376 0, /* 0xbf */ 377 0, /* 0xc0 '{' */ 378 T_ALPHA, /* 0xc1 'A' */ 379 T_ALPHA, /* 0xc2 'B' */ 380 T_ALPHA, /* 0xc3 'C' */ 381 T_ALPHA, /* 0xc4 'D' */ 382 T_ALPHA, /* 0xc5 'E' */ 383 T_ALPHA, /* 0xc6 'F' */ 384 T_ALPHA, /* 0xc7 'G' */ 385 T_ALPHA, /* 0xc8 'H' */ 386 T_ALPHA, /* 0xc9 'I' */ 387 0, /* 0xca */ 388 0, /* 0xcb */ 389 0, /* 0xcc */ 390 0, /* 0xcd */ 391 0, /* 0xce */ 392 0, /* 0xcf */ 393 0, /* 0xd0 '}' */ 394 T_ALPHA, /* 0xd1 'J' */ 395 T_ALPHA, /* 0xd2 'K' */ 396 T_ALPHA, /* 0xd3 'L' */ 397 T_ALPHA, /* 0xd4 'M' */ 398 T_ALPHA, /* 0xd5 'N' */ 399 T_ALPHA, /* 0xd6 'O' */ 400 T_ALPHA, /* 0xd7 'P' */ 401 T_ALPHA, /* 0xd8 'Q' */ 402 T_ALPHA, /* 0xd9 'R' */ 403 0, /* 0xda */ 404 0, /* 0xdb */ 405 0, /* 0xdc */ 406 0, /* 0xdd */ 407 0, /* 0xde */ 408 0, /* 0xdf */ 409 0, /* 0xe0 '\' */ 410 0, /* 0xe1 */ 411 T_ALPHA, /* 0xe2 'S' */ 412 T_ALPHA, /* 0xe3 'T' */ 413 T_ALPHA, /* 0xe4 'U' */ 414 T_ALPHA, /* 0xe5 'V' */ 415 T_ALPHA, /* 0xe6 'W' */ 416 T_ALPHA, /* 0xe7 'X' */ 417 T_ALPHA, /* 0xe8 'Y' */ 418 T_ALPHA, /* 0xe9 'Z' */ 419 0, /* 0xea */ 420 0, /* 0xeb */ 421 0, /* 0xec */ 422 0, /* 0xed */ 423 0, /* 0xee */ 424 0, /* 0xef */ 425 T_SCHEME, /* 0xf0 '0' */ 426 T_SCHEME, /* 0xf1 '1' */ 427 T_SCHEME, /* 0xf2 '2' */ 428 T_SCHEME, /* 0xf3 '3' */ 429 T_SCHEME, /* 0xf4 '4' */ 430 T_SCHEME, /* 0xf5 '5' */ 431 T_SCHEME, /* 0xf6 '6' */ 432 T_SCHEME, /* 0xf7 '7' */ 433 T_SCHEME, /* 0xf8 '8' */ 434 T_SCHEME, /* 0xf9 '9' */ 435 0, /* 0xfa */ 436 0, /* 0xfb */ 437 0, /* 0xfc */ 438 0, /* 0xfd */ 439 0, /* 0xfe */ 440 0 /* 0xff */ 441}; 442#else 443/* Delimiter table for the ASCII character set */ 444static const unsigned char uri_delims[256] = { 445 T_NUL, /* 0x00 */ 446 0, /* 0x01 */ 447 0, /* 0x02 */ 448 0, /* 0x03 */ 449 0, /* 0x04 */ 450 0, /* 0x05 */ 451 0, /* 0x06 */ 452 0, /* 0x07 */ 453 0, /* 0x08 */ 454 0, /* 0x09 */ 455 0, /* 0x0a */ 456 0, /* 0x0b */ 457 0, /* 0x0c */ 458 0, /* 0x0d */ 459 0, /* 0x0e */ 460 0, /* 0x0f */ 461 0, /* 0x10 */ 462 0, /* 0x11 */ 463 0, /* 0x12 */ 464 0, /* 0x13 */ 465 0, /* 0x14 */ 466 0, /* 0x15 */ 467 0, /* 0x16 */ 468 0, /* 0x17 */ 469 0, /* 0x18 */ 470 0, /* 0x19 */ 471 0, /* 0x1a */ 472 0, /* 0x1b */ 473 0, /* 0x1c */ 474 0, /* 0x1d */ 475 0, /* 0x1e */ 476 0, /* 0x1f */ 477 0, /* 0x20 ' ' */ 478 0, /* 0x21 '!' */ 479 0, /* 0x22 '"' */ 480 T_HASH, /* 0x23 '#' */ 481 0, /* 0x24 '$' */ 482 0, /* 0x25 '%' */ 483 0, /* 0x26 '&' */ 484 0, /* 0x27 ''' */ 485 0, /* 0x28 '(' */ 486 0, /* 0x29 ')' */ 487 0, /* 0x2a '*' */ 488 T_SCHEME, /* 0x2b '+' */ 489 0, /* 0x2c ',' */ 490 T_SCHEME, /* 0x2d '-' */ 491 T_SCHEME, /* 0x2e '.' */ 492 T_SLASH, /* 0x2f '/' */ 493 T_SCHEME, /* 0x30 '0' */ 494 T_SCHEME, /* 0x31 '1' */ 495 T_SCHEME, /* 0x32 '2' */ 496 T_SCHEME, /* 0x33 '3' */ 497 T_SCHEME, /* 0x34 '4' */ 498 T_SCHEME, /* 0x35 '5' */ 499 T_SCHEME, /* 0x36 '6' */ 500 T_SCHEME, /* 0x37 '7' */ 501 T_SCHEME, /* 0x38 '8' */ 502 T_SCHEME, /* 0x39 '9' */ 503 0, /* 0x3a ':' */ 504 0, /* 0x3b ';' */ 505 0, /* 0x3c '<' */ 506 0, /* 0x3d '=' */ 507 0, /* 0x3e '>' */ 508 T_QUESTION, /* 0x3f '?' */ 509 0, /* 0x40 '@' */ 510 T_ALPHA, /* 0x41 'A' */ 511 T_ALPHA, /* 0x42 'B' */ 512 T_ALPHA, /* 0x43 'C' */ 513 T_ALPHA, /* 0x44 'D' */ 514 T_ALPHA, /* 0x45 'E' */ 515 T_ALPHA, /* 0x46 'F' */ 516 T_ALPHA, /* 0x47 'G' */ 517 T_ALPHA, /* 0x48 'H' */ 518 T_ALPHA, /* 0x49 'I' */ 519 T_ALPHA, /* 0x4a 'J' */ 520 T_ALPHA, /* 0x4b 'K' */ 521 T_ALPHA, /* 0x4c 'L' */ 522 T_ALPHA, /* 0x4d 'M' */ 523 T_ALPHA, /* 0x4e 'N' */ 524 T_ALPHA, /* 0x4f 'O' */ 525 T_ALPHA, /* 0x50 'P' */ 526 T_ALPHA, /* 0x51 'Q' */ 527 T_ALPHA, /* 0x52 'R' */ 528 T_ALPHA, /* 0x53 'S' */ 529 T_ALPHA, /* 0x54 'T' */ 530 T_ALPHA, /* 0x55 'U' */ 531 T_ALPHA, /* 0x56 'V' */ 532 T_ALPHA, /* 0x57 'W' */ 533 T_ALPHA, /* 0x58 'X' */ 534 T_ALPHA, /* 0x59 'Y' */ 535 T_ALPHA, /* 0x5a 'Z' */ 536 0, /* 0x5b '[' */ 537 0, /* 0x5c '\' */ 538 0, /* 0x5d ']' */ 539 0, /* 0x5e '^' */ 540 0, /* 0x5f '_' */ 541 0, /* 0x60 '`' */ 542 T_ALPHA, /* 0x61 'a' */ 543 T_ALPHA, /* 0x62 'b' */ 544 T_ALPHA, /* 0x63 'c' */ 545 T_ALPHA, /* 0x64 'd' */ 546 T_ALPHA, /* 0x65 'e' */ 547 T_ALPHA, /* 0x66 'f' */ 548 T_ALPHA, /* 0x67 'g' */ 549 T_ALPHA, /* 0x68 'h' */ 550 T_ALPHA, /* 0x69 'i' */ 551 T_ALPHA, /* 0x6a 'j' */ 552 T_ALPHA, /* 0x6b 'k' */ 553 T_ALPHA, /* 0x6c 'l' */ 554 T_ALPHA, /* 0x6d 'm' */ 555 T_ALPHA, /* 0x6e 'n' */ 556 T_ALPHA, /* 0x6f 'o' */ 557 T_ALPHA, /* 0x70 'p' */ 558 T_ALPHA, /* 0x71 'q' */ 559 T_ALPHA, /* 0x72 'r' */ 560 T_ALPHA, /* 0x73 's' */ 561 T_ALPHA, /* 0x74 't' */ 562 T_ALPHA, /* 0x75 'u' */ 563 T_ALPHA, /* 0x76 'v' */ 564 T_ALPHA, /* 0x77 'w' */ 565 T_ALPHA, /* 0x78 'x' */ 566 T_ALPHA, /* 0x79 'y' */ 567 T_ALPHA, /* 0x7a 'z' */ 568 0, /* 0x7b '{' */ 569 0, /* 0x7c '|' */ 570 0, /* 0x7d '}' */ 571 0, /* 0x7e '~' */ 572 0, /* 0x7f */ 573 0, /* 0x80 */ 574 0, /* 0x81 */ 575 0, /* 0x82 */ 576 0, /* 0x83 */ 577 0, /* 0x84 */ 578 0, /* 0x85 */ 579 0, /* 0x86 */ 580 0, /* 0x87 */ 581 0, /* 0x88 */ 582 0, /* 0x89 */ 583 0, /* 0x8a */ 584 0, /* 0x8b */ 585 0, /* 0x8c */ 586 0, /* 0x8d */ 587 0, /* 0x8e */ 588 0, /* 0x8f */ 589 0, /* 0x90 */ 590 0, /* 0x91 */ 591 0, /* 0x92 */ 592 0, /* 0x93 */ 593 0, /* 0x94 */ 594 0, /* 0x95 */ 595 0, /* 0x96 */ 596 0, /* 0x97 */ 597 0, /* 0x98 */ 598 0, /* 0x99 */ 599 0, /* 0x9a */ 600 0, /* 0x9b */ 601 0, /* 0x9c */ 602 0, /* 0x9d */ 603 0, /* 0x9e */ 604 0, /* 0x9f */ 605 0, /* 0xa0 */ 606 0, /* 0xa1 */ 607 0, /* 0xa2 */ 608 0, /* 0xa3 */ 609 0, /* 0xa4 */ 610 0, /* 0xa5 */ 611 0, /* 0xa6 */ 612 0, /* 0xa7 */ 613 0, /* 0xa8 */ 614 0, /* 0xa9 */ 615 0, /* 0xaa */ 616 0, /* 0xab */ 617 0, /* 0xac */ 618 0, /* 0xad */ 619 0, /* 0xae */ 620 0, /* 0xaf */ 621 0, /* 0xb0 */ 622 0, /* 0xb1 */ 623 0, /* 0xb2 */ 624 0, /* 0xb3 */ 625 0, /* 0xb4 */ 626 0, /* 0xb5 */ 627 0, /* 0xb6 */ 628 0, /* 0xb7 */ 629 0, /* 0xb8 */ 630 0, /* 0xb9 */ 631 0, /* 0xba */ 632 0, /* 0xbb */ 633 0, /* 0xbc */ 634 0, /* 0xbd */ 635 0, /* 0xbe */ 636 0, /* 0xbf */ 637 0, /* 0xc0 */ 638 0, /* 0xc1 */ 639 0, /* 0xc2 */ 640 0, /* 0xc3 */ 641 0, /* 0xc4 */ 642 0, /* 0xc5 */ 643 0, /* 0xc6 */ 644 0, /* 0xc7 */ 645 0, /* 0xc8 */ 646 0, /* 0xc9 */ 647 0, /* 0xca */ 648 0, /* 0xcb */ 649 0, /* 0xcc */ 650 0, /* 0xcd */ 651 0, /* 0xce */ 652 0, /* 0xcf */ 653 0, /* 0xd0 */ 654 0, /* 0xd1 */ 655 0, /* 0xd2 */ 656 0, /* 0xd3 */ 657 0, /* 0xd4 */ 658 0, /* 0xd5 */ 659 0, /* 0xd6 */ 660 0, /* 0xd7 */ 661 0, /* 0xd8 */ 662 0, /* 0xd9 */ 663 0, /* 0xda */ 664 0, /* 0xdb */ 665 0, /* 0xdc */ 666 0, /* 0xdd */ 667 0, /* 0xde */ 668 0, /* 0xdf */ 669 0, /* 0xe0 */ 670 0, /* 0xe1 */ 671 0, /* 0xe2 */ 672 0, /* 0xe3 */ 673 0, /* 0xe4 */ 674 0, /* 0xe5 */ 675 0, /* 0xe6 */ 676 0, /* 0xe7 */ 677 0, /* 0xe8 */ 678 0, /* 0xe9 */ 679 0, /* 0xea */ 680 0, /* 0xeb */ 681 0, /* 0xec */ 682 0, /* 0xed */ 683 0, /* 0xee */ 684 0, /* 0xef */ 685 0, /* 0xf0 */ 686 0, /* 0xf1 */ 687 0, /* 0xf2 */ 688 0, /* 0xf3 */ 689 0, /* 0xf4 */ 690 0, /* 0xf5 */ 691 0, /* 0xf6 */ 692 0, /* 0xf7 */ 693 0, /* 0xf8 */ 694 0, /* 0xf9 */ 695 0, /* 0xfa */ 696 0, /* 0xfb */ 697 0, /* 0xfc */ 698 0, /* 0xfd */ 699 0, /* 0xfe */ 700 0 /* 0xff */ 701}; 702#endif 703 704 705/* it works like this: 706 if (uri_delims[ch] & NOTEND_foobar) { 707 then we're not at a delimiter for foobar 708 } 709*/ 710 711#define NOTEND_HOSTINFO (T_SLASH | T_QUESTION | T_HASH | T_NUL) 712#define NOTEND_PATH (T_QUESTION | T_HASH | T_NUL) 713 714/* parse_uri_components(): 715 * Parse a given URI, fill in all supplied fields of a uri_components 716 * structure. This eliminates the necessity of extracting host, port, 717 * path, query info repeatedly in the modules. 718 * Side effects: 719 * - fills in fields of uri_components *uptr 720 * - none on any of the r->* fields 721 */ 722APU_DECLARE(apr_status_t) apr_uri_parse(apr_pool_t *p, const char *uri, 723 apr_uri_t *uptr) 724{ 725 const char *s; 726 const char *s1; 727 const char *hostinfo; 728 char *endstr; 729 int port; 730 int v6_offset1 = 0, v6_offset2 = 0; 731 732 /* Initialize the structure. parse_uri() and parse_uri_components() 733 * can be called more than once per request. 734 */ 735 memset (uptr, '\0', sizeof(*uptr)); 736 uptr->is_initialized = 1; 737 738 /* We assume the processor has a branch predictor like most -- 739 * it assumes forward branches are untaken and backwards are taken. That's 740 * the reason for the gotos. -djg 741 */ 742 if (uri[0] == '/') { 743 /* RFC2396 #4.3 says that two leading slashes mean we have an 744 * authority component, not a path! Fixing this looks scary 745 * with the gotos here. But if the existing logic is valid, 746 * then presumably a goto pointing to deal_with_authority works. 747 * 748 * RFC2396 describes this as resolving an ambiguity. In the 749 * case of three or more slashes there would seem to be no 750 * ambiguity, so it is a path after all. 751 */ 752 if (uri[1] == '/' && uri[2] != '/') { 753 s = uri + 2 ; 754 goto deal_with_authority ; 755 } 756 757deal_with_path: 758 /* we expect uri to point to first character of path ... remember 759 * that the path could be empty -- http://foobar?query for example 760 */ 761 s = uri; 762 while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) { 763 ++s; 764 } 765 if (s != uri) { 766 uptr->path = apr_pstrmemdup(p, uri, s - uri); 767 } 768 if (*s == 0) { 769 return APR_SUCCESS; 770 } 771 if (*s == '?') { 772 ++s; 773 s1 = strchr(s, '#'); 774 if (s1) { 775 uptr->fragment = apr_pstrdup(p, s1 + 1); 776 uptr->query = apr_pstrmemdup(p, s, s1 - s); 777 } 778 else { 779 uptr->query = apr_pstrdup(p, s); 780 } 781 return APR_SUCCESS; 782 } 783 /* otherwise it's a fragment */ 784 uptr->fragment = apr_pstrdup(p, s + 1); 785 return APR_SUCCESS; 786 } 787 788 /* find the scheme: */ 789 s = uri; 790 /* first char must be letter */ 791 if (uri_delims[*(unsigned char *)s] & T_ALPHA) { 792 ++s; 793 while ((uri_delims[*(unsigned char *)s] & (T_ALPHA|T_SCHEME))) 794 ++s; 795 } 796 /* scheme must be non-empty and followed by : */ 797 if (s != uri && s[0] == ':') { 798 uptr->scheme = apr_pstrmemdup(p, uri, s - uri); 799 s++; 800 } 801 else { 802 /* No valid scheme, restart from the beginning */ 803 s = uri; 804 } 805 806 if (s[0] != '/' || s[1] != '/') { 807 if (uri == s) { 808 /* 809 * RFC 3986 3.3: If we have no scheme and no authority, 810 * the leading segment of a relative path must not contain a ':'. 811 */ 812 char *first_slash = strchr(uri, '/'); 813 if (first_slash) { 814 while (s < first_slash) { 815 if (s[0] == ':') 816 return APR_EGENERAL; 817 ++s; 818 } 819 /* no scheme but relative path, e.g. '../image.jpg' */ 820 } 821 else { 822 if (strchr(uri, ':') != NULL) 823 return APR_EGENERAL; 824 /* no scheme, no slash, but relative path, e.g. 'image.jpg' */ 825 } 826 goto deal_with_path; 827 } 828 /* scheme and relative path */ 829 uri = s; 830 goto deal_with_path; 831 } 832 833 s += 2; 834 835deal_with_authority: 836 hostinfo = s; 837 while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) { 838 ++s; 839 } 840 uri = s; /* whatever follows hostinfo is start of uri */ 841 uptr->hostinfo = apr_pstrmemdup(p, hostinfo, uri - hostinfo); 842 843 /* If there's a username:password@host:port, the @ we want is the last @... 844 * too bad there's no memrchr()... For the C purists, note that hostinfo 845 * is definitely not the first character of the original uri so therefore 846 * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C. 847 */ 848 do { 849 --s; 850 } while (s >= hostinfo && *s != '@'); 851 if (s < hostinfo) { 852 /* again we want the common case to be fall through */ 853deal_with_host: 854 /* We expect hostinfo to point to the first character of 855 * the hostname. If there's a port it is the first colon, 856 * except with IPv6. 857 */ 858 if (*hostinfo == '[') { 859 v6_offset1 = 1; 860 v6_offset2 = 2; 861 s = memchr(hostinfo, ']', uri - hostinfo); 862 if (s == NULL) { 863 return APR_EGENERAL; 864 } 865 if (*++s != ':') { 866 s = NULL; /* no port */ 867 } 868 } 869 else { 870 s = memchr(hostinfo, ':', uri - hostinfo); 871 } 872 if (s == NULL) { 873 /* we expect the common case to have no port */ 874 uptr->hostname = apr_pstrmemdup(p, 875 hostinfo + v6_offset1, 876 uri - hostinfo - v6_offset2); 877 goto deal_with_path; 878 } 879 uptr->hostname = apr_pstrmemdup(p, 880 hostinfo + v6_offset1, 881 s - hostinfo - v6_offset2); 882 ++s; 883 uptr->port_str = apr_pstrmemdup(p, s, uri - s); 884 if (uri != s) { 885 port = strtol(uptr->port_str, &endstr, 10); 886 uptr->port = port; 887 if (*endstr == '\0') { 888 goto deal_with_path; 889 } 890 /* Invalid characters after ':' found */ 891 return APR_EGENERAL; 892 } 893 uptr->port = apr_uri_port_of_scheme(uptr->scheme); 894 goto deal_with_path; 895 } 896 897 /* first colon delimits username:password */ 898 s1 = memchr(hostinfo, ':', s - hostinfo); 899 if (s1) { 900 uptr->user = apr_pstrmemdup(p, hostinfo, s1 - hostinfo); 901 ++s1; 902 uptr->password = apr_pstrmemdup(p, s1, s - s1); 903 } 904 else { 905 uptr->user = apr_pstrmemdup(p, hostinfo, s - hostinfo); 906 } 907 hostinfo = s + 1; 908 goto deal_with_host; 909} 910 911/* Special case for CONNECT parsing: it comes with the hostinfo part only */ 912/* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy" 913 * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html 914 * for the format of the "CONNECT host:port HTTP/1.0" request 915 */ 916APU_DECLARE(apr_status_t) apr_uri_parse_hostinfo(apr_pool_t *p, 917 const char *hostinfo, 918 apr_uri_t *uptr) 919{ 920 const char *s; 921 char *endstr; 922 const char *rsb; 923 int v6_offset1 = 0; 924 925 /* Initialize the structure. parse_uri() and parse_uri_components() 926 * can be called more than once per request. 927 */ 928 memset(uptr, '\0', sizeof(*uptr)); 929 uptr->is_initialized = 1; 930 uptr->hostinfo = apr_pstrdup(p, hostinfo); 931 932 /* We expect hostinfo to point to the first character of 933 * the hostname. There must be a port, separated by a colon 934 */ 935 if (*hostinfo == '[') { 936 if ((rsb = strchr(hostinfo, ']')) == NULL || 937 *(rsb + 1) != ':') { 938 return APR_EGENERAL; 939 } 940 /* literal IPv6 address */ 941 s = rsb + 1; 942 ++hostinfo; 943 v6_offset1 = 1; 944 } 945 else { 946 s = strchr(hostinfo, ':'); 947 } 948 if (s == NULL) { 949 return APR_EGENERAL; 950 } 951 uptr->hostname = apr_pstrndup(p, hostinfo, s - hostinfo - v6_offset1); 952 ++s; 953 uptr->port_str = apr_pstrdup(p, s); 954 if (*s != '\0') { 955 uptr->port = (unsigned short) strtol(uptr->port_str, &endstr, 10); 956 if (*endstr == '\0') { 957 return APR_SUCCESS; 958 } 959 /* Invalid characters after ':' found */ 960 } 961 return APR_EGENERAL; 962} 963