1251876Speter/* Licensed to the Apache Software Foundation (ASF) under one or more 2251876Speter * contributor license agreements. See the NOTICE file distributed with 3251876Speter * this work for additional information regarding copyright ownership. 4251876Speter * The ASF licenses this file to You under the Apache License, Version 2.0 5251876Speter * (the "License"); you may not use this file except in compliance with 6251876Speter * the License. You may obtain a copy of the License at 7251876Speter * 8251876Speter * http://www.apache.org/licenses/LICENSE-2.0 9251876Speter * 10251876Speter * Unless required by applicable law or agreed to in writing, software 11251876Speter * distributed under the License is distributed on an "AS IS" BASIS, 12251876Speter * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13251876Speter * See the License for the specific language governing permissions and 14251876Speter * limitations under the License. 15251876Speter */ 16251876Speter 17251876Speter/* 18251876Speter * apr_uri.c: URI related utility things 19251876Speter * 20251876Speter */ 21251876Speter 22251876Speter#include <stdlib.h> 23251876Speter 24251876Speter#include "apu.h" 25251876Speter#include "apr.h" 26251876Speter#include "apr_general.h" 27251876Speter#include "apr_strings.h" 28251876Speter 29251876Speter#define APR_WANT_STRFUNC 30251876Speter#include "apr_want.h" 31251876Speter 32251876Speter#include "apr_uri.h" 33251876Speter 34251876Spetertypedef struct schemes_t schemes_t; 35251876Speter 36251876Speter/** Structure to store various schemes and their default ports */ 37251876Speterstruct schemes_t { 38251876Speter /** The name of the scheme */ 39251876Speter const char *name; 40251876Speter /** The default port for the scheme */ 41251876Speter apr_port_t default_port; 42251876Speter}; 43251876Speter 44251876Speter/* Some WWW schemes and their default ports; this is basically /etc/services */ 45251876Speter/* This will become global when the protocol abstraction comes */ 46251876Speter/* As the schemes are searched by a linear search, */ 47251876Speter/* they are sorted by their expected frequency */ 48251876Speterstatic schemes_t schemes[] = 49251876Speter{ 50251876Speter {"http", APR_URI_HTTP_DEFAULT_PORT}, 51251876Speter {"ftp", APR_URI_FTP_DEFAULT_PORT}, 52251876Speter {"https", APR_URI_HTTPS_DEFAULT_PORT}, 53251876Speter {"gopher", APR_URI_GOPHER_DEFAULT_PORT}, 54251876Speter {"ldap", APR_URI_LDAP_DEFAULT_PORT}, 55251876Speter {"nntp", APR_URI_NNTP_DEFAULT_PORT}, 56251876Speter {"snews", APR_URI_SNEWS_DEFAULT_PORT}, 57251876Speter {"imap", APR_URI_IMAP_DEFAULT_PORT}, 58251876Speter {"pop", APR_URI_POP_DEFAULT_PORT}, 59251876Speter {"sip", APR_URI_SIP_DEFAULT_PORT}, 60251876Speter {"rtsp", APR_URI_RTSP_DEFAULT_PORT}, 61251876Speter {"wais", APR_URI_WAIS_DEFAULT_PORT}, 62251876Speter {"z39.50r", APR_URI_WAIS_DEFAULT_PORT}, 63251876Speter {"z39.50s", APR_URI_WAIS_DEFAULT_PORT}, 64251876Speter {"prospero", APR_URI_PROSPERO_DEFAULT_PORT}, 65251876Speter {"nfs", APR_URI_NFS_DEFAULT_PORT}, 66251876Speter {"tip", APR_URI_TIP_DEFAULT_PORT}, 67251876Speter {"acap", APR_URI_ACAP_DEFAULT_PORT}, 68251876Speter {"telnet", APR_URI_TELNET_DEFAULT_PORT}, 69251876Speter {"ssh", APR_URI_SSH_DEFAULT_PORT}, 70251876Speter { NULL, 0xFFFF } /* unknown port */ 71251876Speter}; 72251876Speter 73251876SpeterAPU_DECLARE(apr_port_t) apr_uri_port_of_scheme(const char *scheme_str) 74251876Speter{ 75251876Speter schemes_t *scheme; 76251876Speter 77251876Speter if (scheme_str) { 78251876Speter for (scheme = schemes; scheme->name != NULL; ++scheme) { 79251876Speter if (strcasecmp(scheme_str, scheme->name) == 0) { 80251876Speter return scheme->default_port; 81251876Speter } 82251876Speter } 83251876Speter } 84251876Speter return 0; 85251876Speter} 86251876Speter 87251876Speter/* Unparse a apr_uri_t structure to an URI string. 88251876Speter * Optionally suppress the password for security reasons. 89251876Speter */ 90251876SpeterAPU_DECLARE(char *) apr_uri_unparse(apr_pool_t *p, 91251876Speter const apr_uri_t *uptr, 92251876Speter unsigned flags) 93251876Speter{ 94251876Speter char *ret = ""; 95251876Speter 96251876Speter /* If suppressing the site part, omit both user name & scheme://hostname */ 97251876Speter if (!(flags & APR_URI_UNP_OMITSITEPART)) { 98251876Speter 99251876Speter /* Construct a "user:password@" string, honoring the passed 100251876Speter * APR_URI_UNP_ flags: */ 101251876Speter if (uptr->user || uptr->password) { 102251876Speter ret = apr_pstrcat(p, 103251876Speter (uptr->user && !(flags & APR_URI_UNP_OMITUSER)) 104251876Speter ? uptr->user : "", 105251876Speter (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD)) 106251876Speter ? ":" : "", 107251876Speter (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD)) 108251876Speter ? ((flags & APR_URI_UNP_REVEALPASSWORD) 109251876Speter ? uptr->password : "XXXXXXXX") 110251876Speter : "", 111251876Speter ((uptr->user && !(flags & APR_URI_UNP_OMITUSER)) || 112251876Speter (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD))) 113251876Speter ? "@" : "", 114251876Speter NULL); 115251876Speter } 116251876Speter 117251876Speter /* Construct scheme://site string */ 118251876Speter if (uptr->hostname) { 119251876Speter int is_default_port; 120251876Speter const char *lbrk = "", *rbrk = ""; 121251876Speter 122251876Speter if (strchr(uptr->hostname, ':')) { /* v6 literal */ 123251876Speter lbrk = "["; 124251876Speter rbrk = "]"; 125251876Speter } 126251876Speter 127251876Speter is_default_port = 128251876Speter (uptr->port_str == NULL || 129251876Speter uptr->port == 0 || 130251876Speter uptr->port == apr_uri_port_of_scheme(uptr->scheme)); 131251876Speter 132251876Speter ret = apr_pstrcat(p, "//", ret, lbrk, uptr->hostname, rbrk, 133251876Speter is_default_port ? "" : ":", 134251876Speter is_default_port ? "" : uptr->port_str, 135251876Speter NULL); 136251876Speter } 137251876Speter if (uptr->scheme) { 138251876Speter ret = apr_pstrcat(p, uptr->scheme, ":", ret, NULL); 139251876Speter } 140251876Speter } 141251876Speter 142251876Speter /* Should we suppress all path info? */ 143251876Speter if (!(flags & APR_URI_UNP_OMITPATHINFO)) { 144251876Speter /* Append path, query and fragment strings: */ 145251876Speter ret = apr_pstrcat(p, 146251876Speter ret, 147251876Speter (uptr->path) 148251876Speter ? uptr->path : "", 149251876Speter (uptr->query && !(flags & APR_URI_UNP_OMITQUERY)) 150251876Speter ? "?" : "", 151251876Speter (uptr->query && !(flags & APR_URI_UNP_OMITQUERY)) 152251876Speter ? uptr->query : "", 153251876Speter (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY)) 154251876Speter ? "#" : NULL, 155251876Speter (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY)) 156251876Speter ? uptr->fragment : NULL, 157251876Speter NULL); 158251876Speter } 159251876Speter return ret; 160251876Speter} 161251876Speter 162251876Speter/* Here is the hand-optimized parse_uri_components(). There are some wild 163251876Speter * tricks we could pull in assembly language that we don't pull here... like we 164251876Speter * can do word-at-time scans for delimiter characters using the same technique 165251876Speter * that fast memchr()s use. But that would be way non-portable. -djg 166251876Speter */ 167251876Speter 168251876Speter/* We have a apr_table_t that we can index by character and it tells us if the 169251876Speter * character is one of the interesting delimiters. Note that we even get 170251876Speter * compares for NUL for free -- it's just another delimiter. 171251876Speter */ 172251876Speter 173253734Speter#define T_SLASH 0x01 /* '/' */ 174253734Speter#define T_QUESTION 0x02 /* '?' */ 175253734Speter#define T_HASH 0x04 /* '#' */ 176253734Speter#define T_ALPHA 0x08 /* 'A' ... 'Z', 'a' ... 'z' */ 177253734Speter#define T_SCHEME 0x10 /* '0' ... '9', '-', '+', '.' 178253734Speter * (allowed in scheme except first char) 179253734Speter */ 180251876Speter#define T_NUL 0x80 /* '\0' */ 181251876Speter 182251876Speter#if APR_CHARSET_EBCDIC 183251876Speter/* Delimiter table for the EBCDIC character set */ 184251876Speterstatic const unsigned char uri_delims[256] = { 185253734Speter T_NUL, /* 0x00 */ 186253734Speter 0, /* 0x01 */ 187253734Speter 0, /* 0x02 */ 188253734Speter 0, /* 0x03 */ 189253734Speter 0, /* 0x04 */ 190253734Speter 0, /* 0x05 */ 191253734Speter 0, /* 0x06 */ 192253734Speter 0, /* 0x07 */ 193253734Speter 0, /* 0x08 */ 194253734Speter 0, /* 0x09 */ 195253734Speter 0, /* 0x0a */ 196253734Speter 0, /* 0x0b */ 197253734Speter 0, /* 0x0c */ 198253734Speter 0, /* 0x0d */ 199253734Speter 0, /* 0x0e */ 200253734Speter 0, /* 0x0f */ 201253734Speter 0, /* 0x10 */ 202253734Speter 0, /* 0x11 */ 203253734Speter 0, /* 0x12 */ 204253734Speter 0, /* 0x13 */ 205253734Speter 0, /* 0x14 */ 206253734Speter 0, /* 0x15 */ 207253734Speter 0, /* 0x16 */ 208253734Speter 0, /* 0x17 */ 209253734Speter 0, /* 0x18 */ 210253734Speter 0, /* 0x19 */ 211253734Speter 0, /* 0x1a */ 212253734Speter 0, /* 0x1b */ 213253734Speter 0, /* 0x1c */ 214253734Speter 0, /* 0x1d */ 215253734Speter 0, /* 0x1e */ 216253734Speter 0, /* 0x1f */ 217253734Speter 0, /* 0x20 */ 218253734Speter 0, /* 0x21 */ 219253734Speter 0, /* 0x22 */ 220253734Speter 0, /* 0x23 */ 221253734Speter 0, /* 0x24 */ 222253734Speter 0, /* 0x25 */ 223253734Speter 0, /* 0x26 */ 224253734Speter 0, /* 0x27 */ 225253734Speter 0, /* 0x28 */ 226253734Speter 0, /* 0x29 */ 227253734Speter 0, /* 0x2a */ 228253734Speter 0, /* 0x2b */ 229253734Speter 0, /* 0x2c */ 230253734Speter 0, /* 0x2d */ 231253734Speter 0, /* 0x2e */ 232253734Speter 0, /* 0x2f */ 233253734Speter 0, /* 0x30 */ 234253734Speter 0, /* 0x31 */ 235253734Speter 0, /* 0x32 */ 236253734Speter 0, /* 0x33 */ 237253734Speter 0, /* 0x34 */ 238253734Speter 0, /* 0x35 */ 239253734Speter 0, /* 0x36 */ 240253734Speter 0, /* 0x37 */ 241253734Speter 0, /* 0x38 */ 242253734Speter 0, /* 0x39 */ 243253734Speter 0, /* 0x3a */ 244253734Speter 0, /* 0x3b */ 245253734Speter 0, /* 0x3c */ 246253734Speter 0, /* 0x3d */ 247253734Speter 0, /* 0x3e */ 248253734Speter 0, /* 0x3f */ 249253734Speter 0, /* 0x40 ' ' */ 250253734Speter 0, /* 0x41 */ 251253734Speter 0, /* 0x42 */ 252253734Speter 0, /* 0x43 */ 253253734Speter 0, /* 0x44 */ 254253734Speter 0, /* 0x45 */ 255253734Speter 0, /* 0x46 */ 256253734Speter 0, /* 0x47 */ 257253734Speter 0, /* 0x48 */ 258253734Speter 0, /* 0x49 */ 259253734Speter 0, /* 0x4a '[' */ 260253734Speter T_SCHEME, /* 0x4b '.' */ 261253734Speter 0, /* 0x4c '<' */ 262253734Speter 0, /* 0x4d '(' */ 263253734Speter T_SCHEME, /* 0x4e '+' */ 264253734Speter 0, /* 0x4f '!' */ 265253734Speter 0, /* 0x50 '&' */ 266253734Speter 0, /* 0x51 */ 267253734Speter 0, /* 0x52 */ 268253734Speter 0, /* 0x53 */ 269253734Speter 0, /* 0x54 */ 270253734Speter 0, /* 0x55 */ 271253734Speter 0, /* 0x56 */ 272253734Speter 0, /* 0x57 */ 273253734Speter 0, /* 0x58 */ 274253734Speter 0, /* 0x59 */ 275253734Speter 0, /* 0x5a ']' */ 276253734Speter 0, /* 0x5b '$' */ 277253734Speter 0, /* 0x5c '*' */ 278253734Speter 0, /* 0x5d ')' */ 279253734Speter 0, /* 0x5e ';' */ 280253734Speter 0, /* 0x5f '^' */ 281253734Speter T_SCHEME, /* 0x60 '-' */ 282253734Speter T_SLASH, /* 0x61 '/' */ 283253734Speter 0, /* 0x62 */ 284253734Speter 0, /* 0x63 */ 285253734Speter 0, /* 0x64 */ 286253734Speter 0, /* 0x65 */ 287253734Speter 0, /* 0x66 */ 288253734Speter 0, /* 0x67 */ 289253734Speter 0, /* 0x68 */ 290253734Speter 0, /* 0x69 */ 291253734Speter 0, /* 0x6a '|' */ 292253734Speter 0, /* 0x6b ',' */ 293253734Speter 0, /* 0x6c '%' */ 294253734Speter 0, /* 0x6d '_' */ 295253734Speter 0, /* 0x6e '>' */ 296253734Speter T_QUESTION, /* 0x6f '?' */ 297253734Speter 0, /* 0x70 */ 298253734Speter 0, /* 0x71 */ 299253734Speter 0, /* 0x72 */ 300253734Speter 0, /* 0x73 */ 301253734Speter 0, /* 0x74 */ 302253734Speter 0, /* 0x75 */ 303253734Speter 0, /* 0x76 */ 304253734Speter 0, /* 0x77 */ 305253734Speter 0, /* 0x78 */ 306253734Speter 0, /* 0x79 '`' */ 307253734Speter 0, /* 0x7a ':' */ 308253734Speter T_HASH, /* 0x7b '#' */ 309253734Speter 0, /* 0x7c '@' */ 310253734Speter 0, /* 0x7d ''' */ 311253734Speter 0, /* 0x7e '=' */ 312253734Speter 0, /* 0x7f '"' */ 313253734Speter 0, /* 0x80 */ 314253734Speter T_ALPHA, /* 0x81 'a' */ 315253734Speter T_ALPHA, /* 0x82 'b' */ 316253734Speter T_ALPHA, /* 0x83 'c' */ 317253734Speter T_ALPHA, /* 0x84 'd' */ 318253734Speter T_ALPHA, /* 0x85 'e' */ 319253734Speter T_ALPHA, /* 0x86 'f' */ 320253734Speter T_ALPHA, /* 0x87 'g' */ 321253734Speter T_ALPHA, /* 0x88 'h' */ 322253734Speter T_ALPHA, /* 0x89 'i' */ 323253734Speter 0, /* 0x8a */ 324253734Speter 0, /* 0x8b */ 325253734Speter 0, /* 0x8c */ 326253734Speter 0, /* 0x8d */ 327253734Speter 0, /* 0x8e */ 328253734Speter 0, /* 0x8f */ 329253734Speter 0, /* 0x90 */ 330253734Speter T_ALPHA, /* 0x91 'j' */ 331253734Speter T_ALPHA, /* 0x92 'k' */ 332253734Speter T_ALPHA, /* 0x93 'l' */ 333253734Speter T_ALPHA, /* 0x94 'm' */ 334253734Speter T_ALPHA, /* 0x95 'n' */ 335253734Speter T_ALPHA, /* 0x96 'o' */ 336253734Speter T_ALPHA, /* 0x97 'p' */ 337253734Speter T_ALPHA, /* 0x98 'q' */ 338253734Speter T_ALPHA, /* 0x99 'r' */ 339253734Speter 0, /* 0x9a */ 340253734Speter 0, /* 0x9b */ 341253734Speter 0, /* 0x9c */ 342253734Speter 0, /* 0x9d */ 343253734Speter 0, /* 0x9e */ 344253734Speter 0, /* 0x9f */ 345253734Speter 0, /* 0xa0 */ 346253734Speter 0, /* 0xa1 '~' */ 347253734Speter T_ALPHA, /* 0xa2 's' */ 348253734Speter T_ALPHA, /* 0xa3 't' */ 349253734Speter T_ALPHA, /* 0xa4 'u' */ 350253734Speter T_ALPHA, /* 0xa5 'v' */ 351253734Speter T_ALPHA, /* 0xa6 'w' */ 352253734Speter T_ALPHA, /* 0xa7 'x' */ 353253734Speter T_ALPHA, /* 0xa8 'y' */ 354253734Speter T_ALPHA, /* 0xa9 'z' */ 355253734Speter 0, /* 0xaa */ 356253734Speter 0, /* 0xab */ 357253734Speter 0, /* 0xac */ 358253734Speter 0, /* 0xad */ 359253734Speter 0, /* 0xae */ 360253734Speter 0, /* 0xaf */ 361253734Speter 0, /* 0xb0 */ 362253734Speter 0, /* 0xb1 */ 363253734Speter 0, /* 0xb2 */ 364253734Speter 0, /* 0xb3 */ 365253734Speter 0, /* 0xb4 */ 366253734Speter 0, /* 0xb5 */ 367253734Speter 0, /* 0xb6 */ 368253734Speter 0, /* 0xb7 */ 369253734Speter 0, /* 0xb8 */ 370253734Speter 0, /* 0xb9 */ 371253734Speter 0, /* 0xba */ 372253734Speter 0, /* 0xbb */ 373253734Speter 0, /* 0xbc */ 374253734Speter 0, /* 0xbd */ 375253734Speter 0, /* 0xbe */ 376253734Speter 0, /* 0xbf */ 377253734Speter 0, /* 0xc0 '{' */ 378253734Speter T_ALPHA, /* 0xc1 'A' */ 379253734Speter T_ALPHA, /* 0xc2 'B' */ 380253734Speter T_ALPHA, /* 0xc3 'C' */ 381253734Speter T_ALPHA, /* 0xc4 'D' */ 382253734Speter T_ALPHA, /* 0xc5 'E' */ 383253734Speter T_ALPHA, /* 0xc6 'F' */ 384253734Speter T_ALPHA, /* 0xc7 'G' */ 385253734Speter T_ALPHA, /* 0xc8 'H' */ 386253734Speter T_ALPHA, /* 0xc9 'I' */ 387253734Speter 0, /* 0xca */ 388253734Speter 0, /* 0xcb */ 389253734Speter 0, /* 0xcc */ 390253734Speter 0, /* 0xcd */ 391253734Speter 0, /* 0xce */ 392253734Speter 0, /* 0xcf */ 393253734Speter 0, /* 0xd0 '}' */ 394253734Speter T_ALPHA, /* 0xd1 'J' */ 395253734Speter T_ALPHA, /* 0xd2 'K' */ 396253734Speter T_ALPHA, /* 0xd3 'L' */ 397253734Speter T_ALPHA, /* 0xd4 'M' */ 398253734Speter T_ALPHA, /* 0xd5 'N' */ 399253734Speter T_ALPHA, /* 0xd6 'O' */ 400253734Speter T_ALPHA, /* 0xd7 'P' */ 401253734Speter T_ALPHA, /* 0xd8 'Q' */ 402253734Speter T_ALPHA, /* 0xd9 'R' */ 403253734Speter 0, /* 0xda */ 404253734Speter 0, /* 0xdb */ 405253734Speter 0, /* 0xdc */ 406253734Speter 0, /* 0xdd */ 407253734Speter 0, /* 0xde */ 408253734Speter 0, /* 0xdf */ 409253734Speter 0, /* 0xe0 '\' */ 410253734Speter 0, /* 0xe1 */ 411253734Speter T_ALPHA, /* 0xe2 'S' */ 412253734Speter T_ALPHA, /* 0xe3 'T' */ 413253734Speter T_ALPHA, /* 0xe4 'U' */ 414253734Speter T_ALPHA, /* 0xe5 'V' */ 415253734Speter T_ALPHA, /* 0xe6 'W' */ 416253734Speter T_ALPHA, /* 0xe7 'X' */ 417253734Speter T_ALPHA, /* 0xe8 'Y' */ 418253734Speter T_ALPHA, /* 0xe9 'Z' */ 419253734Speter 0, /* 0xea */ 420253734Speter 0, /* 0xeb */ 421253734Speter 0, /* 0xec */ 422253734Speter 0, /* 0xed */ 423253734Speter 0, /* 0xee */ 424253734Speter 0, /* 0xef */ 425253734Speter T_SCHEME, /* 0xf0 '0' */ 426253734Speter T_SCHEME, /* 0xf1 '1' */ 427253734Speter T_SCHEME, /* 0xf2 '2' */ 428253734Speter T_SCHEME, /* 0xf3 '3' */ 429253734Speter T_SCHEME, /* 0xf4 '4' */ 430253734Speter T_SCHEME, /* 0xf5 '5' */ 431253734Speter T_SCHEME, /* 0xf6 '6' */ 432253734Speter T_SCHEME, /* 0xf7 '7' */ 433253734Speter T_SCHEME, /* 0xf8 '8' */ 434253734Speter T_SCHEME, /* 0xf9 '9' */ 435253734Speter 0, /* 0xfa */ 436253734Speter 0, /* 0xfb */ 437253734Speter 0, /* 0xfc */ 438253734Speter 0, /* 0xfd */ 439253734Speter 0, /* 0xfe */ 440253734Speter 0 /* 0xff */ 441251876Speter}; 442251876Speter#else 443251876Speter/* Delimiter table for the ASCII character set */ 444251876Speterstatic const unsigned char uri_delims[256] = { 445253734Speter T_NUL, /* 0x00 */ 446253734Speter 0, /* 0x01 */ 447253734Speter 0, /* 0x02 */ 448253734Speter 0, /* 0x03 */ 449253734Speter 0, /* 0x04 */ 450253734Speter 0, /* 0x05 */ 451253734Speter 0, /* 0x06 */ 452253734Speter 0, /* 0x07 */ 453253734Speter 0, /* 0x08 */ 454253734Speter 0, /* 0x09 */ 455253734Speter 0, /* 0x0a */ 456253734Speter 0, /* 0x0b */ 457253734Speter 0, /* 0x0c */ 458253734Speter 0, /* 0x0d */ 459253734Speter 0, /* 0x0e */ 460253734Speter 0, /* 0x0f */ 461253734Speter 0, /* 0x10 */ 462253734Speter 0, /* 0x11 */ 463253734Speter 0, /* 0x12 */ 464253734Speter 0, /* 0x13 */ 465253734Speter 0, /* 0x14 */ 466253734Speter 0, /* 0x15 */ 467253734Speter 0, /* 0x16 */ 468253734Speter 0, /* 0x17 */ 469253734Speter 0, /* 0x18 */ 470253734Speter 0, /* 0x19 */ 471253734Speter 0, /* 0x1a */ 472253734Speter 0, /* 0x1b */ 473253734Speter 0, /* 0x1c */ 474253734Speter 0, /* 0x1d */ 475253734Speter 0, /* 0x1e */ 476253734Speter 0, /* 0x1f */ 477253734Speter 0, /* 0x20 ' ' */ 478253734Speter 0, /* 0x21 '!' */ 479253734Speter 0, /* 0x22 '"' */ 480253734Speter T_HASH, /* 0x23 '#' */ 481253734Speter 0, /* 0x24 '$' */ 482253734Speter 0, /* 0x25 '%' */ 483253734Speter 0, /* 0x26 '&' */ 484253734Speter 0, /* 0x27 ''' */ 485253734Speter 0, /* 0x28 '(' */ 486253734Speter 0, /* 0x29 ')' */ 487253734Speter 0, /* 0x2a '*' */ 488253734Speter T_SCHEME, /* 0x2b '+' */ 489253734Speter 0, /* 0x2c ',' */ 490253734Speter T_SCHEME, /* 0x2d '-' */ 491253734Speter T_SCHEME, /* 0x2e '.' */ 492253734Speter T_SLASH, /* 0x2f '/' */ 493253734Speter T_SCHEME, /* 0x30 '0' */ 494253734Speter T_SCHEME, /* 0x31 '1' */ 495253734Speter T_SCHEME, /* 0x32 '2' */ 496253734Speter T_SCHEME, /* 0x33 '3' */ 497253734Speter T_SCHEME, /* 0x34 '4' */ 498253734Speter T_SCHEME, /* 0x35 '5' */ 499253734Speter T_SCHEME, /* 0x36 '6' */ 500253734Speter T_SCHEME, /* 0x37 '7' */ 501253734Speter T_SCHEME, /* 0x38 '8' */ 502253734Speter T_SCHEME, /* 0x39 '9' */ 503253734Speter 0, /* 0x3a ':' */ 504253734Speter 0, /* 0x3b ';' */ 505253734Speter 0, /* 0x3c '<' */ 506253734Speter 0, /* 0x3d '=' */ 507253734Speter 0, /* 0x3e '>' */ 508253734Speter T_QUESTION, /* 0x3f '?' */ 509253734Speter 0, /* 0x40 '@' */ 510253734Speter T_ALPHA, /* 0x41 'A' */ 511253734Speter T_ALPHA, /* 0x42 'B' */ 512253734Speter T_ALPHA, /* 0x43 'C' */ 513253734Speter T_ALPHA, /* 0x44 'D' */ 514253734Speter T_ALPHA, /* 0x45 'E' */ 515253734Speter T_ALPHA, /* 0x46 'F' */ 516253734Speter T_ALPHA, /* 0x47 'G' */ 517253734Speter T_ALPHA, /* 0x48 'H' */ 518253734Speter T_ALPHA, /* 0x49 'I' */ 519253734Speter T_ALPHA, /* 0x4a 'J' */ 520253734Speter T_ALPHA, /* 0x4b 'K' */ 521253734Speter T_ALPHA, /* 0x4c 'L' */ 522253734Speter T_ALPHA, /* 0x4d 'M' */ 523253734Speter T_ALPHA, /* 0x4e 'N' */ 524253734Speter T_ALPHA, /* 0x4f 'O' */ 525253734Speter T_ALPHA, /* 0x50 'P' */ 526253734Speter T_ALPHA, /* 0x51 'Q' */ 527253734Speter T_ALPHA, /* 0x52 'R' */ 528253734Speter T_ALPHA, /* 0x53 'S' */ 529253734Speter T_ALPHA, /* 0x54 'T' */ 530253734Speter T_ALPHA, /* 0x55 'U' */ 531253734Speter T_ALPHA, /* 0x56 'V' */ 532253734Speter T_ALPHA, /* 0x57 'W' */ 533253734Speter T_ALPHA, /* 0x58 'X' */ 534253734Speter T_ALPHA, /* 0x59 'Y' */ 535253734Speter T_ALPHA, /* 0x5a 'Z' */ 536253734Speter 0, /* 0x5b '[' */ 537253734Speter 0, /* 0x5c '\' */ 538253734Speter 0, /* 0x5d ']' */ 539253734Speter 0, /* 0x5e '^' */ 540253734Speter 0, /* 0x5f '_' */ 541253734Speter 0, /* 0x60 '`' */ 542253734Speter T_ALPHA, /* 0x61 'a' */ 543253734Speter T_ALPHA, /* 0x62 'b' */ 544253734Speter T_ALPHA, /* 0x63 'c' */ 545253734Speter T_ALPHA, /* 0x64 'd' */ 546253734Speter T_ALPHA, /* 0x65 'e' */ 547253734Speter T_ALPHA, /* 0x66 'f' */ 548253734Speter T_ALPHA, /* 0x67 'g' */ 549253734Speter T_ALPHA, /* 0x68 'h' */ 550253734Speter T_ALPHA, /* 0x69 'i' */ 551253734Speter T_ALPHA, /* 0x6a 'j' */ 552253734Speter T_ALPHA, /* 0x6b 'k' */ 553253734Speter T_ALPHA, /* 0x6c 'l' */ 554253734Speter T_ALPHA, /* 0x6d 'm' */ 555253734Speter T_ALPHA, /* 0x6e 'n' */ 556253734Speter T_ALPHA, /* 0x6f 'o' */ 557253734Speter T_ALPHA, /* 0x70 'p' */ 558253734Speter T_ALPHA, /* 0x71 'q' */ 559253734Speter T_ALPHA, /* 0x72 'r' */ 560253734Speter T_ALPHA, /* 0x73 's' */ 561253734Speter T_ALPHA, /* 0x74 't' */ 562253734Speter T_ALPHA, /* 0x75 'u' */ 563253734Speter T_ALPHA, /* 0x76 'v' */ 564253734Speter T_ALPHA, /* 0x77 'w' */ 565253734Speter T_ALPHA, /* 0x78 'x' */ 566253734Speter T_ALPHA, /* 0x79 'y' */ 567253734Speter T_ALPHA, /* 0x7a 'z' */ 568253734Speter 0, /* 0x7b '{' */ 569253734Speter 0, /* 0x7c '|' */ 570253734Speter 0, /* 0x7d '}' */ 571253734Speter 0, /* 0x7e '~' */ 572253734Speter 0, /* 0x7f */ 573253734Speter 0, /* 0x80 */ 574253734Speter 0, /* 0x81 */ 575253734Speter 0, /* 0x82 */ 576253734Speter 0, /* 0x83 */ 577253734Speter 0, /* 0x84 */ 578253734Speter 0, /* 0x85 */ 579253734Speter 0, /* 0x86 */ 580253734Speter 0, /* 0x87 */ 581253734Speter 0, /* 0x88 */ 582253734Speter 0, /* 0x89 */ 583253734Speter 0, /* 0x8a */ 584253734Speter 0, /* 0x8b */ 585253734Speter 0, /* 0x8c */ 586253734Speter 0, /* 0x8d */ 587253734Speter 0, /* 0x8e */ 588253734Speter 0, /* 0x8f */ 589253734Speter 0, /* 0x90 */ 590253734Speter 0, /* 0x91 */ 591253734Speter 0, /* 0x92 */ 592253734Speter 0, /* 0x93 */ 593253734Speter 0, /* 0x94 */ 594253734Speter 0, /* 0x95 */ 595253734Speter 0, /* 0x96 */ 596253734Speter 0, /* 0x97 */ 597253734Speter 0, /* 0x98 */ 598253734Speter 0, /* 0x99 */ 599253734Speter 0, /* 0x9a */ 600253734Speter 0, /* 0x9b */ 601253734Speter 0, /* 0x9c */ 602253734Speter 0, /* 0x9d */ 603253734Speter 0, /* 0x9e */ 604253734Speter 0, /* 0x9f */ 605253734Speter 0, /* 0xa0 */ 606253734Speter 0, /* 0xa1 */ 607253734Speter 0, /* 0xa2 */ 608253734Speter 0, /* 0xa3 */ 609253734Speter 0, /* 0xa4 */ 610253734Speter 0, /* 0xa5 */ 611253734Speter 0, /* 0xa6 */ 612253734Speter 0, /* 0xa7 */ 613253734Speter 0, /* 0xa8 */ 614253734Speter 0, /* 0xa9 */ 615253734Speter 0, /* 0xaa */ 616253734Speter 0, /* 0xab */ 617253734Speter 0, /* 0xac */ 618253734Speter 0, /* 0xad */ 619253734Speter 0, /* 0xae */ 620253734Speter 0, /* 0xaf */ 621253734Speter 0, /* 0xb0 */ 622253734Speter 0, /* 0xb1 */ 623253734Speter 0, /* 0xb2 */ 624253734Speter 0, /* 0xb3 */ 625253734Speter 0, /* 0xb4 */ 626253734Speter 0, /* 0xb5 */ 627253734Speter 0, /* 0xb6 */ 628253734Speter 0, /* 0xb7 */ 629253734Speter 0, /* 0xb8 */ 630253734Speter 0, /* 0xb9 */ 631253734Speter 0, /* 0xba */ 632253734Speter 0, /* 0xbb */ 633253734Speter 0, /* 0xbc */ 634253734Speter 0, /* 0xbd */ 635253734Speter 0, /* 0xbe */ 636253734Speter 0, /* 0xbf */ 637253734Speter 0, /* 0xc0 */ 638253734Speter 0, /* 0xc1 */ 639253734Speter 0, /* 0xc2 */ 640253734Speter 0, /* 0xc3 */ 641253734Speter 0, /* 0xc4 */ 642253734Speter 0, /* 0xc5 */ 643253734Speter 0, /* 0xc6 */ 644253734Speter 0, /* 0xc7 */ 645253734Speter 0, /* 0xc8 */ 646253734Speter 0, /* 0xc9 */ 647253734Speter 0, /* 0xca */ 648253734Speter 0, /* 0xcb */ 649253734Speter 0, /* 0xcc */ 650253734Speter 0, /* 0xcd */ 651253734Speter 0, /* 0xce */ 652253734Speter 0, /* 0xcf */ 653253734Speter 0, /* 0xd0 */ 654253734Speter 0, /* 0xd1 */ 655253734Speter 0, /* 0xd2 */ 656253734Speter 0, /* 0xd3 */ 657253734Speter 0, /* 0xd4 */ 658253734Speter 0, /* 0xd5 */ 659253734Speter 0, /* 0xd6 */ 660253734Speter 0, /* 0xd7 */ 661253734Speter 0, /* 0xd8 */ 662253734Speter 0, /* 0xd9 */ 663253734Speter 0, /* 0xda */ 664253734Speter 0, /* 0xdb */ 665253734Speter 0, /* 0xdc */ 666253734Speter 0, /* 0xdd */ 667253734Speter 0, /* 0xde */ 668253734Speter 0, /* 0xdf */ 669253734Speter 0, /* 0xe0 */ 670253734Speter 0, /* 0xe1 */ 671253734Speter 0, /* 0xe2 */ 672253734Speter 0, /* 0xe3 */ 673253734Speter 0, /* 0xe4 */ 674253734Speter 0, /* 0xe5 */ 675253734Speter 0, /* 0xe6 */ 676253734Speter 0, /* 0xe7 */ 677253734Speter 0, /* 0xe8 */ 678253734Speter 0, /* 0xe9 */ 679253734Speter 0, /* 0xea */ 680253734Speter 0, /* 0xeb */ 681253734Speter 0, /* 0xec */ 682253734Speter 0, /* 0xed */ 683253734Speter 0, /* 0xee */ 684253734Speter 0, /* 0xef */ 685253734Speter 0, /* 0xf0 */ 686253734Speter 0, /* 0xf1 */ 687253734Speter 0, /* 0xf2 */ 688253734Speter 0, /* 0xf3 */ 689253734Speter 0, /* 0xf4 */ 690253734Speter 0, /* 0xf5 */ 691253734Speter 0, /* 0xf6 */ 692253734Speter 0, /* 0xf7 */ 693253734Speter 0, /* 0xf8 */ 694253734Speter 0, /* 0xf9 */ 695253734Speter 0, /* 0xfa */ 696253734Speter 0, /* 0xfb */ 697253734Speter 0, /* 0xfc */ 698253734Speter 0, /* 0xfd */ 699253734Speter 0, /* 0xfe */ 700253734Speter 0 /* 0xff */ 701251876Speter}; 702251876Speter#endif 703251876Speter 704251876Speter 705251876Speter/* it works like this: 706251876Speter if (uri_delims[ch] & NOTEND_foobar) { 707251876Speter then we're not at a delimiter for foobar 708251876Speter } 709251876Speter*/ 710251876Speter 711251876Speter#define NOTEND_HOSTINFO (T_SLASH | T_QUESTION | T_HASH | T_NUL) 712251876Speter#define NOTEND_PATH (T_QUESTION | T_HASH | T_NUL) 713251876Speter 714251876Speter/* parse_uri_components(): 715251876Speter * Parse a given URI, fill in all supplied fields of a uri_components 716251876Speter * structure. This eliminates the necessity of extracting host, port, 717251876Speter * path, query info repeatedly in the modules. 718251876Speter * Side effects: 719251876Speter * - fills in fields of uri_components *uptr 720251876Speter * - none on any of the r->* fields 721251876Speter */ 722251876SpeterAPU_DECLARE(apr_status_t) apr_uri_parse(apr_pool_t *p, const char *uri, 723251876Speter apr_uri_t *uptr) 724251876Speter{ 725251876Speter const char *s; 726251876Speter const char *s1; 727251876Speter const char *hostinfo; 728251876Speter char *endstr; 729251876Speter int port; 730251876Speter int v6_offset1 = 0, v6_offset2 = 0; 731251876Speter 732251876Speter /* Initialize the structure. parse_uri() and parse_uri_components() 733251876Speter * can be called more than once per request. 734251876Speter */ 735251876Speter memset (uptr, '\0', sizeof(*uptr)); 736251876Speter uptr->is_initialized = 1; 737251876Speter 738251876Speter /* We assume the processor has a branch predictor like most -- 739251876Speter * it assumes forward branches are untaken and backwards are taken. That's 740251876Speter * the reason for the gotos. -djg 741251876Speter */ 742251876Speter if (uri[0] == '/') { 743251876Speter /* RFC2396 #4.3 says that two leading slashes mean we have an 744251876Speter * authority component, not a path! Fixing this looks scary 745251876Speter * with the gotos here. But if the existing logic is valid, 746251876Speter * then presumably a goto pointing to deal_with_authority works. 747251876Speter * 748251876Speter * RFC2396 describes this as resolving an ambiguity. In the 749251876Speter * case of three or more slashes there would seem to be no 750251876Speter * ambiguity, so it is a path after all. 751251876Speter */ 752251876Speter if (uri[1] == '/' && uri[2] != '/') { 753251876Speter s = uri + 2 ; 754251876Speter goto deal_with_authority ; 755251876Speter } 756251876Speter 757251876Speterdeal_with_path: 758251876Speter /* we expect uri to point to first character of path ... remember 759251876Speter * that the path could be empty -- http://foobar?query for example 760251876Speter */ 761251876Speter s = uri; 762251876Speter while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) { 763251876Speter ++s; 764251876Speter } 765251876Speter if (s != uri) { 766251876Speter uptr->path = apr_pstrmemdup(p, uri, s - uri); 767251876Speter } 768251876Speter if (*s == 0) { 769251876Speter return APR_SUCCESS; 770251876Speter } 771251876Speter if (*s == '?') { 772251876Speter ++s; 773251876Speter s1 = strchr(s, '#'); 774251876Speter if (s1) { 775251876Speter uptr->fragment = apr_pstrdup(p, s1 + 1); 776251876Speter uptr->query = apr_pstrmemdup(p, s, s1 - s); 777251876Speter } 778251876Speter else { 779251876Speter uptr->query = apr_pstrdup(p, s); 780251876Speter } 781251876Speter return APR_SUCCESS; 782251876Speter } 783251876Speter /* otherwise it's a fragment */ 784251876Speter uptr->fragment = apr_pstrdup(p, s + 1); 785251876Speter return APR_SUCCESS; 786251876Speter } 787251876Speter 788251876Speter /* find the scheme: */ 789251876Speter s = uri; 790253734Speter /* first char must be letter */ 791253734Speter if (uri_delims[*(unsigned char *)s] & T_ALPHA) { 792251876Speter ++s; 793253734Speter while ((uri_delims[*(unsigned char *)s] & (T_ALPHA|T_SCHEME))) 794253734Speter ++s; 795251876Speter } 796251876Speter /* scheme must be non-empty and followed by : */ 797253734Speter if (s != uri && s[0] == ':') { 798253734Speter uptr->scheme = apr_pstrmemdup(p, uri, s - uri); 799253734Speter s++; 800251876Speter } 801253734Speter else { 802253734Speter /* No valid scheme, restart from the beginning */ 803253734Speter s = uri; 804253734Speter } 805251876Speter 806253734Speter if (s[0] != '/' || s[1] != '/') { 807253734Speter if (uri == s) { 808253734Speter /* 809253734Speter * RFC 3986 3.3: If we have no scheme and no authority, 810253734Speter * the leading segment of a relative path must not contain a ':'. 811253734Speter */ 812253734Speter char *first_slash = strchr(uri, '/'); 813253734Speter if (first_slash) { 814253734Speter while (s < first_slash) { 815253734Speter if (s[0] == ':') 816253734Speter return APR_EGENERAL; 817253734Speter ++s; 818253734Speter } 819253734Speter /* no scheme but relative path, e.g. '../image.jpg' */ 820253734Speter } 821253734Speter else { 822253734Speter if (strchr(uri, ':') != NULL) 823253734Speter return APR_EGENERAL; 824253734Speter /* no scheme, no slash, but relative path, e.g. 'image.jpg' */ 825253734Speter } 826253734Speter goto deal_with_path; 827253734Speter } 828253734Speter /* scheme and relative path */ 829253734Speter uri = s; 830251876Speter goto deal_with_path; 831251876Speter } 832251876Speter 833253734Speter s += 2; 834251876Speter 835251876Speterdeal_with_authority: 836251876Speter hostinfo = s; 837251876Speter while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) { 838251876Speter ++s; 839251876Speter } 840251876Speter uri = s; /* whatever follows hostinfo is start of uri */ 841251876Speter uptr->hostinfo = apr_pstrmemdup(p, hostinfo, uri - hostinfo); 842251876Speter 843251876Speter /* If there's a username:password@host:port, the @ we want is the last @... 844251876Speter * too bad there's no memrchr()... For the C purists, note that hostinfo 845253734Speter * is definitely not the first character of the original uri so therefore 846251876Speter * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C. 847251876Speter */ 848251876Speter do { 849251876Speter --s; 850251876Speter } while (s >= hostinfo && *s != '@'); 851251876Speter if (s < hostinfo) { 852251876Speter /* again we want the common case to be fall through */ 853251876Speterdeal_with_host: 854251876Speter /* We expect hostinfo to point to the first character of 855251876Speter * the hostname. If there's a port it is the first colon, 856251876Speter * except with IPv6. 857251876Speter */ 858251876Speter if (*hostinfo == '[') { 859251876Speter v6_offset1 = 1; 860251876Speter v6_offset2 = 2; 861251876Speter s = memchr(hostinfo, ']', uri - hostinfo); 862251876Speter if (s == NULL) { 863251876Speter return APR_EGENERAL; 864251876Speter } 865251876Speter if (*++s != ':') { 866251876Speter s = NULL; /* no port */ 867251876Speter } 868251876Speter } 869251876Speter else { 870251876Speter s = memchr(hostinfo, ':', uri - hostinfo); 871251876Speter } 872251876Speter if (s == NULL) { 873251876Speter /* we expect the common case to have no port */ 874251876Speter uptr->hostname = apr_pstrmemdup(p, 875251876Speter hostinfo + v6_offset1, 876251876Speter uri - hostinfo - v6_offset2); 877251876Speter goto deal_with_path; 878251876Speter } 879251876Speter uptr->hostname = apr_pstrmemdup(p, 880251876Speter hostinfo + v6_offset1, 881251876Speter s - hostinfo - v6_offset2); 882251876Speter ++s; 883251876Speter uptr->port_str = apr_pstrmemdup(p, s, uri - s); 884251876Speter if (uri != s) { 885251876Speter port = strtol(uptr->port_str, &endstr, 10); 886251876Speter uptr->port = port; 887251876Speter if (*endstr == '\0') { 888251876Speter goto deal_with_path; 889251876Speter } 890251876Speter /* Invalid characters after ':' found */ 891251876Speter return APR_EGENERAL; 892251876Speter } 893251876Speter uptr->port = apr_uri_port_of_scheme(uptr->scheme); 894251876Speter goto deal_with_path; 895251876Speter } 896251876Speter 897251876Speter /* first colon delimits username:password */ 898251876Speter s1 = memchr(hostinfo, ':', s - hostinfo); 899251876Speter if (s1) { 900251876Speter uptr->user = apr_pstrmemdup(p, hostinfo, s1 - hostinfo); 901251876Speter ++s1; 902251876Speter uptr->password = apr_pstrmemdup(p, s1, s - s1); 903251876Speter } 904251876Speter else { 905251876Speter uptr->user = apr_pstrmemdup(p, hostinfo, s - hostinfo); 906251876Speter } 907251876Speter hostinfo = s + 1; 908251876Speter goto deal_with_host; 909251876Speter} 910251876Speter 911251876Speter/* Special case for CONNECT parsing: it comes with the hostinfo part only */ 912251876Speter/* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy" 913251876Speter * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html 914251876Speter * for the format of the "CONNECT host:port HTTP/1.0" request 915251876Speter */ 916251876SpeterAPU_DECLARE(apr_status_t) apr_uri_parse_hostinfo(apr_pool_t *p, 917251876Speter const char *hostinfo, 918251876Speter apr_uri_t *uptr) 919251876Speter{ 920251876Speter const char *s; 921251876Speter char *endstr; 922251876Speter const char *rsb; 923251876Speter int v6_offset1 = 0; 924251876Speter 925251876Speter /* Initialize the structure. parse_uri() and parse_uri_components() 926251876Speter * can be called more than once per request. 927251876Speter */ 928251876Speter memset(uptr, '\0', sizeof(*uptr)); 929251876Speter uptr->is_initialized = 1; 930251876Speter uptr->hostinfo = apr_pstrdup(p, hostinfo); 931251876Speter 932251876Speter /* We expect hostinfo to point to the first character of 933251876Speter * the hostname. There must be a port, separated by a colon 934251876Speter */ 935251876Speter if (*hostinfo == '[') { 936251876Speter if ((rsb = strchr(hostinfo, ']')) == NULL || 937251876Speter *(rsb + 1) != ':') { 938251876Speter return APR_EGENERAL; 939251876Speter } 940251876Speter /* literal IPv6 address */ 941251876Speter s = rsb + 1; 942251876Speter ++hostinfo; 943251876Speter v6_offset1 = 1; 944251876Speter } 945251876Speter else { 946251876Speter s = strchr(hostinfo, ':'); 947251876Speter } 948251876Speter if (s == NULL) { 949251876Speter return APR_EGENERAL; 950251876Speter } 951251876Speter uptr->hostname = apr_pstrndup(p, hostinfo, s - hostinfo - v6_offset1); 952251876Speter ++s; 953251876Speter uptr->port_str = apr_pstrdup(p, s); 954251876Speter if (*s != '\0') { 955251876Speter uptr->port = (unsigned short) strtol(uptr->port_str, &endstr, 10); 956251876Speter if (*endstr == '\0') { 957251876Speter return APR_SUCCESS; 958251876Speter } 959251876Speter /* Invalid characters after ':' found */ 960251876Speter } 961251876Speter return APR_EGENERAL; 962251876Speter} 963