1/************************************************* 2* Perl-Compatible Regular Expressions * 3*************************************************/ 4 5/* PCRE is a library of functions to support regular expressions whose syntax 6and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Copyright (c) 1997-2008 University of Cambridge 10 11----------------------------------------------------------------------------- 12Redistribution and use in source and binary forms, with or without 13modification, are permitted provided that the following conditions are met: 14 15 * Redistributions of source code must retain the above copyright notice, 16 this list of conditions and the following disclaimer. 17 18 * Redistributions in binary form must reproduce the above copyright 19 notice, this list of conditions and the following disclaimer in the 20 documentation and/or other materials provided with the distribution. 21 22 * Neither the name of the University of Cambridge nor the names of its 23 contributors may be used to endorse or promote products derived from 24 this software without specific prior written permission. 25 26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36POSSIBILITY OF SUCH DAMAGE. 37----------------------------------------------------------------------------- 38*/ 39 40 41/* This module contains some convenience functions for extracting substrings 42from the subject string after a regex match has succeeded. The original idea 43for these functions came from Scott Wimer. */ 44 45 46#ifdef HAVE_CONFIG_H 47#include "config.h" 48#endif 49 50#include "pcre_internal.h" 51 52 53/************************************************* 54* Find number for named string * 55*************************************************/ 56 57/* This function is used by the get_first_set() function below, as well 58as being generally available. It assumes that names are unique. 59 60Arguments: 61 code the compiled regex 62 stringname the name whose number is required 63 64Returns: the number of the named parentheses, or a negative number 65 (PCRE_ERROR_NOSUBSTRING) if not found 66*/ 67 68PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 69pcre_get_stringnumber(const pcre *code, const char *stringname) 70{ 71int rc; 72int entrysize; 73int top, bot; 74uschar *nametable; 75 76if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) 77 return rc; 78if (top <= 0) return PCRE_ERROR_NOSUBSTRING; 79 80if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) 81 return rc; 82if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) 83 return rc; 84 85bot = 0; 86while (top > bot) 87 { 88 int mid = (top + bot) / 2; 89 uschar *entry = nametable + entrysize*mid; 90 int c = strcmp(stringname, (char *)(entry + 2)); 91 if (c == 0) return (entry[0] << 8) + entry[1]; 92 if (c > 0) bot = mid + 1; else top = mid; 93 } 94 95return PCRE_ERROR_NOSUBSTRING; 96} 97 98 99 100/************************************************* 101* Find (multiple) entries for named string * 102*************************************************/ 103 104/* This is used by the get_first_set() function below, as well as being 105generally available. It is used when duplicated names are permitted. 106 107Arguments: 108 code the compiled regex 109 stringname the name whose entries required 110 firstptr where to put the pointer to the first entry 111 lastptr where to put the pointer to the last entry 112 113Returns: the length of each entry, or a negative number 114 (PCRE_ERROR_NOSUBSTRING) if not found 115*/ 116 117PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 118pcre_get_stringtable_entries(const pcre *code, const char *stringname, 119 char **firstptr, char **lastptr) 120{ 121int rc; 122int entrysize; 123int top, bot; 124uschar *nametable, *lastentry; 125 126if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) 127 return rc; 128if (top <= 0) return PCRE_ERROR_NOSUBSTRING; 129 130if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) 131 return rc; 132if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) 133 return rc; 134 135lastentry = nametable + entrysize * (top - 1); 136bot = 0; 137while (top > bot) 138 { 139 int mid = (top + bot) / 2; 140 uschar *entry = nametable + entrysize*mid; 141 int c = strcmp(stringname, (char *)(entry + 2)); 142 if (c == 0) 143 { 144 uschar *first = entry; 145 uschar *last = entry; 146 while (first > nametable) 147 { 148 if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break; 149 first -= entrysize; 150 } 151 while (last < lastentry) 152 { 153 if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break; 154 last += entrysize; 155 } 156 *firstptr = (char *)first; 157 *lastptr = (char *)last; 158 return entrysize; 159 } 160 if (c > 0) bot = mid + 1; else top = mid; 161 } 162 163return PCRE_ERROR_NOSUBSTRING; 164} 165 166 167 168/************************************************* 169* Find first set of multiple named strings * 170*************************************************/ 171 172/* This function allows for duplicate names in the table of named substrings. 173It returns the number of the first one that was set in a pattern match. 174 175Arguments: 176 code the compiled regex 177 stringname the name of the capturing substring 178 ovector the vector of matched substrings 179 180Returns: the number of the first that is set, 181 or the number of the last one if none are set, 182 or a negative number on error 183*/ 184 185static int 186get_first_set(const pcre *code, const char *stringname, int *ovector) 187{ 188const real_pcre *re = (const real_pcre *)code; 189int entrysize; 190char *first, *last; 191uschar *entry; 192if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) 193 return pcre_get_stringnumber(code, stringname); 194entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); 195if (entrysize <= 0) return entrysize; 196for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize) 197 { 198 int n = (entry[0] << 8) + entry[1]; 199 if (ovector[n*2] >= 0) return n; 200 } 201return (first[0] << 8) + first[1]; 202} 203 204 205 206 207/************************************************* 208* Copy captured string to given buffer * 209*************************************************/ 210 211/* This function copies a single captured substring into a given buffer. 212Note that we use memcpy() rather than strncpy() in case there are binary zeros 213in the string. 214 215Arguments: 216 subject the subject string that was matched 217 ovector pointer to the offsets table 218 stringcount the number of substrings that were captured 219 (i.e. the yield of the pcre_exec call, unless 220 that was zero, in which case it should be 1/3 221 of the offset table size) 222 stringnumber the number of the required substring 223 buffer where to put the substring 224 size the size of the buffer 225 226Returns: if successful: 227 the length of the copied string, not including the zero 228 that is put on the end; can be zero 229 if not successful: 230 PCRE_ERROR_NOMEMORY (-6) buffer too small 231 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 232*/ 233 234PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 235pcre_copy_substring(const char *subject, int *ovector, int stringcount, 236 int stringnumber, char *buffer, int size) 237{ 238int yield; 239if (stringnumber < 0 || stringnumber >= stringcount) 240 return PCRE_ERROR_NOSUBSTRING; 241stringnumber *= 2; 242yield = ovector[stringnumber+1] - ovector[stringnumber]; 243if (size < yield + 1) return PCRE_ERROR_NOMEMORY; 244memcpy(buffer, subject + ovector[stringnumber], yield); 245buffer[yield] = 0; 246return yield; 247} 248 249 250 251/************************************************* 252* Copy named captured string to given buffer * 253*************************************************/ 254 255/* This function copies a single captured substring into a given buffer, 256identifying it by name. If the regex permits duplicate names, the first 257substring that is set is chosen. 258 259Arguments: 260 code the compiled regex 261 subject the subject string that was matched 262 ovector pointer to the offsets table 263 stringcount the number of substrings that were captured 264 (i.e. the yield of the pcre_exec call, unless 265 that was zero, in which case it should be 1/3 266 of the offset table size) 267 stringname the name of the required substring 268 buffer where to put the substring 269 size the size of the buffer 270 271Returns: if successful: 272 the length of the copied string, not including the zero 273 that is put on the end; can be zero 274 if not successful: 275 PCRE_ERROR_NOMEMORY (-6) buffer too small 276 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 277*/ 278 279PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 280pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector, 281 int stringcount, const char *stringname, char *buffer, int size) 282{ 283int n = get_first_set(code, stringname, ovector); 284if (n <= 0) return n; 285return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); 286} 287 288 289 290/************************************************* 291* Copy all captured strings to new store * 292*************************************************/ 293 294/* This function gets one chunk of store and builds a list of pointers and all 295of the captured substrings in it. A NULL pointer is put on the end of the list. 296 297Arguments: 298 subject the subject string that was matched 299 ovector pointer to the offsets table 300 stringcount the number of substrings that were captured 301 (i.e. the yield of the pcre_exec call, unless 302 that was zero, in which case it should be 1/3 303 of the offset table size) 304 listptr set to point to the list of pointers 305 306Returns: if successful: 0 307 if not successful: 308 PCRE_ERROR_NOMEMORY (-6) failed to get store 309*/ 310 311PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 312pcre_get_substring_list(const char *subject, int *ovector, int stringcount, 313 const char ***listptr) 314{ 315int i; 316int size = sizeof(char *); 317int double_count = stringcount * 2; 318char **stringlist; 319char *p; 320 321for (i = 0; i < double_count; i += 2) 322 size += sizeof(char *) + ovector[i+1] - ovector[i] + 1; 323 324stringlist = (char **)(pcre_malloc)(size); 325if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; 326 327*listptr = (const char **)stringlist; 328p = (char *)(stringlist + stringcount + 1); 329 330for (i = 0; i < double_count; i += 2) 331 { 332 int len = ovector[i+1] - ovector[i]; 333 memcpy(p, subject + ovector[i], len); 334 *stringlist++ = p; 335 p += len; 336 *p++ = 0; 337 } 338 339*stringlist = NULL; 340return 0; 341} 342 343 344 345/************************************************* 346* Free store obtained by get_substring_list * 347*************************************************/ 348 349/* This function exists for the benefit of people calling PCRE from non-C 350programs that can call its functions, but not free() or (pcre_free)() directly. 351 352Argument: the result of a previous pcre_get_substring_list() 353Returns: nothing 354*/ 355 356PCRE_EXP_DEFN void PCRE_CALL_CONVENTION 357pcre_free_substring_list(const char **pointer) 358{ 359(pcre_free)((void *)pointer); 360} 361 362 363 364/************************************************* 365* Copy captured string to new store * 366*************************************************/ 367 368/* This function copies a single captured substring into a piece of new 369store 370 371Arguments: 372 subject the subject string that was matched 373 ovector pointer to the offsets table 374 stringcount the number of substrings that were captured 375 (i.e. the yield of the pcre_exec call, unless 376 that was zero, in which case it should be 1/3 377 of the offset table size) 378 stringnumber the number of the required substring 379 stringptr where to put a pointer to the substring 380 381Returns: if successful: 382 the length of the string, not including the zero that 383 is put on the end; can be zero 384 if not successful: 385 PCRE_ERROR_NOMEMORY (-6) failed to get store 386 PCRE_ERROR_NOSUBSTRING (-7) substring not present 387*/ 388 389PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 390pcre_get_substring(const char *subject, int *ovector, int stringcount, 391 int stringnumber, const char **stringptr) 392{ 393int yield; 394char *substring; 395if (stringnumber < 0 || stringnumber >= stringcount) 396 return PCRE_ERROR_NOSUBSTRING; 397stringnumber *= 2; 398yield = ovector[stringnumber+1] - ovector[stringnumber]; 399substring = (char *)(pcre_malloc)(yield + 1); 400if (substring == NULL) return PCRE_ERROR_NOMEMORY; 401memcpy(substring, subject + ovector[stringnumber], yield); 402substring[yield] = 0; 403*stringptr = substring; 404return yield; 405} 406 407 408 409/************************************************* 410* Copy named captured string to new store * 411*************************************************/ 412 413/* This function copies a single captured substring, identified by name, into 414new store. If the regex permits duplicate names, the first substring that is 415set is chosen. 416 417Arguments: 418 code the compiled regex 419 subject the subject string that was matched 420 ovector pointer to the offsets table 421 stringcount the number of substrings that were captured 422 (i.e. the yield of the pcre_exec call, unless 423 that was zero, in which case it should be 1/3 424 of the offset table size) 425 stringname the name of the required substring 426 stringptr where to put the pointer 427 428Returns: if successful: 429 the length of the copied string, not including the zero 430 that is put on the end; can be zero 431 if not successful: 432 PCRE_ERROR_NOMEMORY (-6) couldn't get memory 433 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 434*/ 435 436PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 437pcre_get_named_substring(const pcre *code, const char *subject, int *ovector, 438 int stringcount, const char *stringname, const char **stringptr) 439{ 440int n = get_first_set(code, stringname, ovector); 441if (n <= 0) return n; 442return pcre_get_substring(subject, ovector, stringcount, n, stringptr); 443} 444 445 446 447 448/************************************************* 449* Free store obtained by get_substring * 450*************************************************/ 451 452/* This function exists for the benefit of people calling PCRE from non-C 453programs that can call its functions, but not free() or (pcre_free)() directly. 454 455Argument: the result of a previous pcre_get_substring() 456Returns: nothing 457*/ 458 459PCRE_EXP_DEFN void PCRE_CALL_CONVENTION 460pcre_free_substring(const char *pointer) 461{ 462(pcre_free)((void *)pointer); 463} 464 465/* End of pcre_get.c */ 466