1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "apr.h" 18#include "apr_lib.h" 19#include "apr_pools.h" 20#include "apr_strings.h" 21#include "ap_config.h" 22#include "ap_regex.h" 23#include "httpd.h" 24 25static apr_status_t rxplus_cleanup(void *preg) 26{ 27 ap_regfree((ap_regex_t *) preg); 28 return APR_SUCCESS; 29} 30 31AP_DECLARE(ap_rxplus_t*) ap_rxplus_compile(apr_pool_t *pool, 32 const char *pattern) 33{ 34 /* perl style patterns 35 * add support for more as and when wanted 36 * substitute: s/rx/subs/ 37 * match: m/rx/ or just /rx/ 38 */ 39 40 /* allow any nonalnum delimiter as first or second char. 41 * If we ever use this with non-string pattern we'll need an extra check 42 */ 43 const char *endp = 0; 44 const char *str = pattern; 45 const char *rxstr; 46 ap_rxplus_t *ret = apr_pcalloc(pool, sizeof(ap_rxplus_t)); 47 char delim = 0; 48 enum { SUBSTITUTE = 's', MATCH = 'm'} action = MATCH; 49 if (!apr_isalnum(pattern[0])) { 50 delim = *str++; 51 } 52 else if (pattern[0] == 's' && !apr_isalnum(pattern[1])) { 53 action = SUBSTITUTE; 54 delim = pattern[1]; 55 str += 2; 56 } 57 else if (pattern[0] == 'm' && !apr_isalnum(pattern[1])) { 58 delim = pattern[1]; 59 str += 2; 60 } 61 /* TODO: support perl's after/before */ 62 /* FIXME: fix these simplminded delims */ 63 64 /* we think there's a delimiter. Allow for it not to be if unmatched */ 65 if (delim) { 66 endp = ap_strchr_c(str, delim); 67 } 68 if (!endp) { /* there's no delim or flags */ 69 if (ap_regcomp(&ret->rx, pattern, 0) == 0) { 70 apr_pool_cleanup_register(pool, &ret->rx, rxplus_cleanup, 71 apr_pool_cleanup_null); 72 return ret; 73 } 74 else { 75 return NULL; 76 } 77 } 78 79 /* We have a delimiter. Use it to extract the regexp */ 80 rxstr = apr_pstrndup(pool, str, endp-str); 81 82 /* If it's a substitution, we need the replacement string 83 * TODO: possible future enhancement - support other parsing 84 * in the replacement string. 85 */ 86 if (action == SUBSTITUTE) { 87 str = endp+1; 88 if (!*str || (endp = ap_strchr_c(str, delim), !endp)) { 89 /* missing replacement string is an error */ 90 return NULL; 91 } 92 ret->subs = apr_pstrndup(pool, str, (endp-str)); 93 } 94 95 /* anything after the current delimiter is flags */ 96 while (*++endp) { 97 switch (*endp) { 98 case 'i': ret->flags |= AP_REG_ICASE; break; 99 case 'm': ret->flags |= AP_REG_NEWLINE; break; 100 case 'n': ret->flags |= AP_REG_NOMEM; break; 101 case 'g': ret->flags |= AP_REG_MULTI; break; 102 case 's': ret->flags |= AP_REG_DOTALL; break; 103 case '^': ret->flags |= AP_REG_NOTBOL; break; 104 case '$': ret->flags |= AP_REG_NOTEOL; break; 105 default: break; /* we should probably be stricter here */ 106 } 107 } 108 if (ap_regcomp(&ret->rx, rxstr, ret->flags) == 0) { 109 apr_pool_cleanup_register(pool, &ret->rx, rxplus_cleanup, 110 apr_pool_cleanup_null); 111 } 112 else { 113 return NULL; 114 } 115 if (!(ret->flags & AP_REG_NOMEM)) { 116 /* count size of memory required, starting at 1 for the whole-match 117 * Simpleminded should be fine 'cos regcomp already checked syntax 118 */ 119 ret->nmatch = 1; 120 while (*rxstr) { 121 switch (*rxstr++) { 122 case '\\': /* next char is escaped - skip it */ 123 if (*rxstr != 0) { 124 ++rxstr; 125 } 126 break; 127 case '(': /* unescaped bracket implies memory */ 128 ++ret->nmatch; 129 break; 130 default: 131 break; 132 } 133 } 134 ret->pmatch = apr_palloc(pool, ret->nmatch*sizeof(ap_regmatch_t)); 135 } 136 return ret; 137} 138 139AP_DECLARE(int) ap_rxplus_exec(apr_pool_t *pool, ap_rxplus_t *rx, 140 const char *pattern, char **newpattern) 141{ 142 int ret = 1; 143 int startl, oldl, newl, diffsz; 144 const char *remainder; 145 char *subs; 146/* snrf process_regexp from mod_headers */ 147 if (ap_regexec(&rx->rx, pattern, rx->nmatch, rx->pmatch, rx->flags) != 0) { 148 rx->match = NULL; 149 return 0; /* no match, nothing to do */ 150 } 151 rx->match = pattern; 152 if (rx->subs) { 153 *newpattern = ap_pregsub(pool, rx->subs, pattern, 154 rx->nmatch, rx->pmatch); 155 if (!*newpattern) { 156 return 0; /* FIXME - should we do more to handle error? */ 157 } 158 startl = rx->pmatch[0].rm_so; 159 oldl = rx->pmatch[0].rm_eo - startl; 160 newl = strlen(*newpattern); 161 diffsz = newl - oldl; 162 remainder = pattern + startl + oldl; 163 if (rx->flags & AP_REG_MULTI) { 164 /* recurse to do any further matches */ 165 ret += ap_rxplus_exec(pool, rx, remainder, &subs); 166 if (ret > 1) { 167 /* a further substitution happened */ 168 diffsz += strlen(subs) - strlen(remainder); 169 remainder = subs; 170 } 171 } 172 subs = apr_palloc(pool, strlen(pattern) + 1 + diffsz); 173 memcpy(subs, pattern, startl); 174 memcpy(subs+startl, *newpattern, newl); 175 strcpy(subs+startl+newl, remainder); 176 *newpattern = subs; 177 } 178 return ret; 179} 180#ifdef DOXYGEN 181AP_DECLARE(int) ap_rxplus_nmatch(ap_rxplus_t *rx) 182{ 183 return (rx->match != NULL) ? rx->nmatch : 0; 184} 185#endif 186 187/* If this blows up on you, see the notes in the header/apidoc 188 * rx->match is a pointer and it's your responsibility to ensure 189 * it hasn't gone out-of-scope since the last ap_rxplus_exec 190 */ 191AP_DECLARE(void) ap_rxplus_match(ap_rxplus_t *rx, int n, int *len, 192 const char **match) 193{ 194 if (n >= 0 && n < ap_rxplus_nmatch(rx)) { 195 *match = rx->match + rx->pmatch[n].rm_so; 196 *len = rx->pmatch[n].rm_eo - rx->pmatch[n].rm_so; 197 } 198 else { 199 *len = -1; 200 *match = NULL; 201 } 202} 203AP_DECLARE(char*) ap_rxplus_pmatch(apr_pool_t *pool, ap_rxplus_t *rx, int n) 204{ 205 int len; 206 const char *match; 207 ap_rxplus_match(rx, n, &len, &match); 208 return apr_pstrndup(pool, match, len); 209} 210