1/************************************************* 2* Perl-Compatible Regular Expressions * 3*************************************************/ 4 5/* 6This is a library of functions to support regular expressions whose syntax 7and semantics are as close as possible to those of the Perl 5 language. See 8the file Tech.Notes for some information on the internals. 9 10This module is a wrapper that provides a POSIX API to the underlying PCRE 11functions. 12 13Written by: Philip Hazel <ph10@cam.ac.uk> 14 15 Copyright (c) 1997-2004 University of Cambridge 16 17----------------------------------------------------------------------------- 18Redistribution and use in source and binary forms, with or without 19modification, are permitted provided that the following conditions are met: 20 21 * Redistributions of source code must retain the above copyright notice, 22 this list of conditions and the following disclaimer. 23 24 * Redistributions in binary form must reproduce the above copyright 25 notice, this list of conditions and the following disclaimer in the 26 documentation and/or other materials provided with the distribution. 27 28 * Neither the name of the University of Cambridge nor the names of its 29 contributors may be used to endorse or promote products derived from 30 this software without specific prior written permission. 31 32THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 33AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 34IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 36LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 37CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 38SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 39INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 40CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 41ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 42POSSIBILITY OF SUCH DAMAGE. 43----------------------------------------------------------------------------- 44*/ 45 46#include "httpd.h" 47#include "apr_strings.h" 48#include "pcre.h" 49 50#define APR_WANT_STRFUNC 51#include "apr_want.h" 52 53#ifndef POSIX_MALLOC_THRESHOLD 54#define POSIX_MALLOC_THRESHOLD (10) 55#endif 56 57/* Table of error strings corresponding to POSIX error codes; must be 58 * kept in synch with include/ap_regex.h's AP_REG_E* definitions. */ 59 60static const char *const pstring[] = { 61 "", /* Dummy for value 0 */ 62 "internal error", /* AP_REG_ASSERT */ 63 "failed to get memory", /* AP_REG_ESPACE */ 64 "bad argument", /* AP_REG_INVARG */ 65 "match failed" /* AP_REG_NOMATCH */ 66}; 67 68AP_DECLARE(apr_size_t) ap_regerror(int errcode, const ap_regex_t *preg, 69 char *errbuf, apr_size_t errbuf_size) 70{ 71const char *message, *addmessage; 72apr_size_t length, addlength; 73 74message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))? 75 "unknown error code" : pstring[errcode]; 76length = strlen(message) + 1; 77 78addmessage = " at offset "; 79addlength = (preg != NULL && (int)preg->re_erroffset != -1)? 80 strlen(addmessage) + 6 : 0; 81 82if (errbuf_size > 0) 83 { 84 if (addlength > 0 && errbuf_size >= length + addlength) 85 apr_snprintf(errbuf, sizeof errbuf, 86 "%s%s%-6d", message, addmessage, (int)preg->re_erroffset); 87 else 88 { 89 strncpy(errbuf, message, errbuf_size - 1); 90 errbuf[errbuf_size-1] = 0; 91 } 92 } 93 94return length + addlength; 95} 96 97 98 99 100/************************************************* 101* Free store held by a regex * 102*************************************************/ 103 104AP_DECLARE(void) ap_regfree(ap_regex_t *preg) 105{ 106(pcre_free)(preg->re_pcre); 107} 108 109 110 111 112/************************************************* 113* Compile a regular expression * 114*************************************************/ 115 116/* 117Arguments: 118 preg points to a structure for recording the compiled expression 119 pattern the pattern to compile 120 cflags compilation flags 121 122Returns: 0 on success 123 various non-zero codes on failure 124*/ 125 126AP_DECLARE(int) ap_regcomp(ap_regex_t *preg, const char *pattern, int cflags) 127{ 128const char *errorptr; 129int erroffset; 130int options = 0; 131int nsub; 132 133if ((cflags & AP_REG_ICASE) != 0) options |= PCRE_CASELESS; 134if ((cflags & AP_REG_NEWLINE) != 0) options |= PCRE_MULTILINE; 135 136preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL); 137preg->re_erroffset = erroffset; 138 139if (preg->re_pcre == NULL) return AP_REG_INVARG; 140 141pcre_fullinfo((const pcre *)preg->re_pcre, NULL, 142 PCRE_INFO_CAPTURECOUNT, &nsub); 143preg->re_nsub = (apr_size_t)nsub; 144return 0; 145} 146 147 148 149 150/************************************************* 151* Match a regular expression * 152*************************************************/ 153 154/* Unfortunately, PCRE requires 3 ints of working space for each captured 155substring, so we have to get and release working store instead of just using 156the POSIX structures as was done in earlier releases when PCRE needed only 2 157ints. However, if the number of possible capturing brackets is small, use a 158block of store on the stack, to reduce the use of malloc/free. The threshold is 159in a macro that can be changed at configure time. */ 160 161AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string, 162 apr_size_t nmatch, ap_regmatch_t pmatch[], 163 int eflags) 164{ 165int rc; 166int options = 0; 167int *ovector = NULL; 168int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; 169int allocated_ovector = 0; 170 171if ((eflags & AP_REG_NOTBOL) != 0) options |= PCRE_NOTBOL; 172if ((eflags & AP_REG_NOTEOL) != 0) options |= PCRE_NOTEOL; 173 174((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */ 175 176if (nmatch > 0) 177 { 178 if (nmatch <= POSIX_MALLOC_THRESHOLD) 179 { 180 ovector = &(small_ovector[0]); 181 } 182 else 183 { 184 ovector = (int *)malloc(sizeof(int) * nmatch * 3); 185 if (ovector == NULL) return AP_REG_ESPACE; 186 allocated_ovector = 1; 187 } 188 } 189 190rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string), 191 0, options, ovector, nmatch * 3); 192 193if (rc == 0) rc = nmatch; /* All captured slots were filled in */ 194 195if (rc >= 0) 196 { 197 apr_size_t i; 198 for (i = 0; i < (apr_size_t)rc; i++) 199 { 200 pmatch[i].rm_so = ovector[i*2]; 201 pmatch[i].rm_eo = ovector[i*2+1]; 202 } 203 if (allocated_ovector) free(ovector); 204 for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; 205 return 0; 206 } 207 208else 209 { 210 if (allocated_ovector) free(ovector); 211 switch(rc) 212 { 213 case PCRE_ERROR_NOMATCH: return AP_REG_NOMATCH; 214 case PCRE_ERROR_NULL: return AP_REG_INVARG; 215 case PCRE_ERROR_BADOPTION: return AP_REG_INVARG; 216 case PCRE_ERROR_BADMAGIC: return AP_REG_INVARG; 217 case PCRE_ERROR_UNKNOWN_NODE: return AP_REG_ASSERT; 218 case PCRE_ERROR_NOMEMORY: return AP_REG_ESPACE; 219#ifdef PCRE_ERROR_MATCHLIMIT 220 case PCRE_ERROR_MATCHLIMIT: return AP_REG_ESPACE; 221#endif 222#ifdef PCRE_ERROR_BADUTF8 223 case PCRE_ERROR_BADUTF8: return AP_REG_INVARG; 224#endif 225#ifdef PCRE_ERROR_BADUTF8_OFFSET 226 case PCRE_ERROR_BADUTF8_OFFSET: return AP_REG_INVARG; 227#endif 228 default: return AP_REG_ASSERT; 229 } 230 } 231} 232 233/* End of pcreposix.c */ 234