1/************************************************* 2* Perl-Compatible Regular Expressions * 3*************************************************/ 4 5/* PCRE is a library of functions to support regular expressions whose syntax 6and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Copyright (c) 1997-2012 University of Cambridge 10 11----------------------------------------------------------------------------- 12Redistribution and use in source and binary forms, with or without 13modification, are permitted provided that the following conditions are met: 14 15 * Redistributions of source code must retain the above copyright notice, 16 this list of conditions and the following disclaimer. 17 18 * Redistributions in binary form must reproduce the above copyright 19 notice, this list of conditions and the following disclaimer in the 20 documentation and/or other materials provided with the distribution. 21 22 * Neither the name of the University of Cambridge nor the names of its 23 contributors may be used to endorse or promote products derived from 24 this software without specific prior written permission. 25 26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36POSSIBILITY OF SUCH DAMAGE. 37----------------------------------------------------------------------------- 38*/ 39 40 41/* This module contains some convenience functions for extracting substrings 42from the subject string after a regex match has succeeded. The original idea 43for these functions came from Scott Wimer. */ 44 45 46#ifdef HAVE_CONFIG_H 47#include "config.h" 48#endif 49 50#include "pcre_internal.h" 51 52 53/************************************************* 54* Find number for named string * 55*************************************************/ 56 57/* This function is used by the get_first_set() function below, as well 58as being generally available. It assumes that names are unique. 59 60Arguments: 61 code the compiled regex 62 stringname the name whose number is required 63 64Returns: the number of the named parentheses, or a negative number 65 (PCRE_ERROR_NOSUBSTRING) if not found 66*/ 67 68#ifdef COMPILE_PCRE8 69PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 70pcre_get_stringnumber(const pcre *code, const char *stringname) 71#else 72PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 73pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname) 74#endif 75{ 76int rc; 77int entrysize; 78int top, bot; 79pcre_uchar *nametable; 80 81#ifdef COMPILE_PCRE8 82if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) 83 return rc; 84if (top <= 0) return PCRE_ERROR_NOSUBSTRING; 85 86if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) 87 return rc; 88if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) 89 return rc; 90#endif 91#ifdef COMPILE_PCRE16 92if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) 93 return rc; 94if (top <= 0) return PCRE_ERROR_NOSUBSTRING; 95 96if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) 97 return rc; 98if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) 99 return rc; 100#endif 101 102bot = 0; 103while (top > bot) 104 { 105 int mid = (top + bot) / 2; 106 pcre_uchar *entry = nametable + entrysize*mid; 107 int c = STRCMP_UC_UC((pcre_uchar *)stringname, 108 (pcre_uchar *)(entry + IMM2_SIZE)); 109 if (c == 0) return GET2(entry, 0); 110 if (c > 0) bot = mid + 1; else top = mid; 111 } 112 113return PCRE_ERROR_NOSUBSTRING; 114} 115 116 117 118/************************************************* 119* Find (multiple) entries for named string * 120*************************************************/ 121 122/* This is used by the get_first_set() function below, as well as being 123generally available. It is used when duplicated names are permitted. 124 125Arguments: 126 code the compiled regex 127 stringname the name whose entries required 128 firstptr where to put the pointer to the first entry 129 lastptr where to put the pointer to the last entry 130 131Returns: the length of each entry, or a negative number 132 (PCRE_ERROR_NOSUBSTRING) if not found 133*/ 134 135#ifdef COMPILE_PCRE8 136PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 137pcre_get_stringtable_entries(const pcre *code, const char *stringname, 138 char **firstptr, char **lastptr) 139#else 140PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 141pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname, 142 PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr) 143#endif 144{ 145int rc; 146int entrysize; 147int top, bot; 148pcre_uchar *nametable, *lastentry; 149 150#ifdef COMPILE_PCRE8 151if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) 152 return rc; 153if (top <= 0) return PCRE_ERROR_NOSUBSTRING; 154 155if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) 156 return rc; 157if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) 158 return rc; 159#endif 160#ifdef COMPILE_PCRE16 161if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) 162 return rc; 163if (top <= 0) return PCRE_ERROR_NOSUBSTRING; 164 165if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) 166 return rc; 167if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) 168 return rc; 169#endif 170 171lastentry = nametable + entrysize * (top - 1); 172bot = 0; 173while (top > bot) 174 { 175 int mid = (top + bot) / 2; 176 pcre_uchar *entry = nametable + entrysize*mid; 177 int c = STRCMP_UC_UC((pcre_uchar *)stringname, 178 (pcre_uchar *)(entry + IMM2_SIZE)); 179 if (c == 0) 180 { 181 pcre_uchar *first = entry; 182 pcre_uchar *last = entry; 183 while (first > nametable) 184 { 185 if (STRCMP_UC_UC((pcre_uchar *)stringname, 186 (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break; 187 first -= entrysize; 188 } 189 while (last < lastentry) 190 { 191 if (STRCMP_UC_UC((pcre_uchar *)stringname, 192 (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break; 193 last += entrysize; 194 } 195#ifdef COMPILE_PCRE8 196 *firstptr = (char *)first; 197 *lastptr = (char *)last; 198#else 199 *firstptr = (PCRE_UCHAR16 *)first; 200 *lastptr = (PCRE_UCHAR16 *)last; 201#endif 202 return entrysize; 203 } 204 if (c > 0) bot = mid + 1; else top = mid; 205 } 206 207return PCRE_ERROR_NOSUBSTRING; 208} 209 210 211 212/************************************************* 213* Find first set of multiple named strings * 214*************************************************/ 215 216/* This function allows for duplicate names in the table of named substrings. 217It returns the number of the first one that was set in a pattern match. 218 219Arguments: 220 code the compiled regex 221 stringname the name of the capturing substring 222 ovector the vector of matched substrings 223 224Returns: the number of the first that is set, 225 or the number of the last one if none are set, 226 or a negative number on error 227*/ 228 229#ifdef COMPILE_PCRE8 230static int 231get_first_set(const pcre *code, const char *stringname, int *ovector) 232#else 233static int 234get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector) 235#endif 236{ 237const REAL_PCRE *re = (const REAL_PCRE *)code; 238int entrysize; 239pcre_uchar *entry; 240#ifdef COMPILE_PCRE8 241char *first, *last; 242#else 243PCRE_UCHAR16 *first, *last; 244#endif 245 246#ifdef COMPILE_PCRE8 247if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) 248 return pcre_get_stringnumber(code, stringname); 249entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); 250#else 251if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) 252 return pcre16_get_stringnumber(code, stringname); 253entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last); 254#endif 255if (entrysize <= 0) return entrysize; 256for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize) 257 { 258 int n = GET2(entry, 0); 259 if (ovector[n*2] >= 0) return n; 260 } 261return GET2(entry, 0); 262} 263 264 265 266 267/************************************************* 268* Copy captured string to given buffer * 269*************************************************/ 270 271/* This function copies a single captured substring into a given buffer. 272Note that we use memcpy() rather than strncpy() in case there are binary zeros 273in the string. 274 275Arguments: 276 subject the subject string that was matched 277 ovector pointer to the offsets table 278 stringcount the number of substrings that were captured 279 (i.e. the yield of the pcre_exec call, unless 280 that was zero, in which case it should be 1/3 281 of the offset table size) 282 stringnumber the number of the required substring 283 buffer where to put the substring 284 size the size of the buffer 285 286Returns: if successful: 287 the length of the copied string, not including the zero 288 that is put on the end; can be zero 289 if not successful: 290 PCRE_ERROR_NOMEMORY (-6) buffer too small 291 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 292*/ 293 294#ifdef COMPILE_PCRE8 295PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 296pcre_copy_substring(const char *subject, int *ovector, int stringcount, 297 int stringnumber, char *buffer, int size) 298#else 299PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 300pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount, 301 int stringnumber, PCRE_UCHAR16 *buffer, int size) 302#endif 303{ 304int yield; 305if (stringnumber < 0 || stringnumber >= stringcount) 306 return PCRE_ERROR_NOSUBSTRING; 307stringnumber *= 2; 308yield = ovector[stringnumber+1] - ovector[stringnumber]; 309if (size < yield + 1) return PCRE_ERROR_NOMEMORY; 310memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield)); 311buffer[yield] = 0; 312return yield; 313} 314 315 316 317/************************************************* 318* Copy named captured string to given buffer * 319*************************************************/ 320 321/* This function copies a single captured substring into a given buffer, 322identifying it by name. If the regex permits duplicate names, the first 323substring that is set is chosen. 324 325Arguments: 326 code the compiled regex 327 subject the subject string that was matched 328 ovector pointer to the offsets table 329 stringcount the number of substrings that were captured 330 (i.e. the yield of the pcre_exec call, unless 331 that was zero, in which case it should be 1/3 332 of the offset table size) 333 stringname the name of the required substring 334 buffer where to put the substring 335 size the size of the buffer 336 337Returns: if successful: 338 the length of the copied string, not including the zero 339 that is put on the end; can be zero 340 if not successful: 341 PCRE_ERROR_NOMEMORY (-6) buffer too small 342 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 343*/ 344 345#ifdef COMPILE_PCRE8 346PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 347pcre_copy_named_substring(const pcre *code, const char *subject, 348 int *ovector, int stringcount, const char *stringname, 349 char *buffer, int size) 350#else 351PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 352pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject, 353 int *ovector, int stringcount, PCRE_SPTR16 stringname, 354 PCRE_UCHAR16 *buffer, int size) 355#endif 356{ 357int n = get_first_set(code, stringname, ovector); 358if (n <= 0) return n; 359#ifdef COMPILE_PCRE8 360return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); 361#else 362return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size); 363#endif 364} 365 366 367 368/************************************************* 369* Copy all captured strings to new store * 370*************************************************/ 371 372/* This function gets one chunk of store and builds a list of pointers and all 373of the captured substrings in it. A NULL pointer is put on the end of the list. 374 375Arguments: 376 subject the subject string that was matched 377 ovector pointer to the offsets table 378 stringcount the number of substrings that were captured 379 (i.e. the yield of the pcre_exec call, unless 380 that was zero, in which case it should be 1/3 381 of the offset table size) 382 listptr set to point to the list of pointers 383 384Returns: if successful: 0 385 if not successful: 386 PCRE_ERROR_NOMEMORY (-6) failed to get store 387*/ 388 389#ifdef COMPILE_PCRE8 390PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 391pcre_get_substring_list(const char *subject, int *ovector, int stringcount, 392 const char ***listptr) 393#else 394PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 395pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount, 396 PCRE_SPTR16 **listptr) 397#endif 398{ 399int i; 400int size = sizeof(pcre_uchar *); 401int double_count = stringcount * 2; 402pcre_uchar **stringlist; 403pcre_uchar *p; 404 405for (i = 0; i < double_count; i += 2) 406 size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1); 407 408stringlist = (pcre_uchar **)(PUBL(malloc))(size); 409if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; 410 411#ifdef COMPILE_PCRE8 412*listptr = (const char **)stringlist; 413#else 414*listptr = (PCRE_SPTR16 *)stringlist; 415#endif 416p = (pcre_uchar *)(stringlist + stringcount + 1); 417 418for (i = 0; i < double_count; i += 2) 419 { 420 int len = ovector[i+1] - ovector[i]; 421 memcpy(p, subject + ovector[i], IN_UCHARS(len)); 422 *stringlist++ = p; 423 p += len; 424 *p++ = 0; 425 } 426 427*stringlist = NULL; 428return 0; 429} 430 431 432 433/************************************************* 434* Free store obtained by get_substring_list * 435*************************************************/ 436 437/* This function exists for the benefit of people calling PCRE from non-C 438programs that can call its functions, but not free() or (PUBL(free))() 439directly. 440 441Argument: the result of a previous pcre_get_substring_list() 442Returns: nothing 443*/ 444 445#ifdef COMPILE_PCRE8 446PCRE_EXP_DEFN void PCRE_CALL_CONVENTION 447pcre_free_substring_list(const char **pointer) 448#else 449PCRE_EXP_DEFN void PCRE_CALL_CONVENTION 450pcre16_free_substring_list(PCRE_SPTR16 *pointer) 451#endif 452{ 453(PUBL(free))((void *)pointer); 454} 455 456 457 458/************************************************* 459* Copy captured string to new store * 460*************************************************/ 461 462/* This function copies a single captured substring into a piece of new 463store 464 465Arguments: 466 subject the subject string that was matched 467 ovector pointer to the offsets table 468 stringcount the number of substrings that were captured 469 (i.e. the yield of the pcre_exec call, unless 470 that was zero, in which case it should be 1/3 471 of the offset table size) 472 stringnumber the number of the required substring 473 stringptr where to put a pointer to the substring 474 475Returns: if successful: 476 the length of the string, not including the zero that 477 is put on the end; can be zero 478 if not successful: 479 PCRE_ERROR_NOMEMORY (-6) failed to get store 480 PCRE_ERROR_NOSUBSTRING (-7) substring not present 481*/ 482 483#ifdef COMPILE_PCRE8 484PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 485pcre_get_substring(const char *subject, int *ovector, int stringcount, 486 int stringnumber, const char **stringptr) 487#else 488PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 489pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount, 490 int stringnumber, PCRE_SPTR16 *stringptr) 491#endif 492{ 493int yield; 494pcre_uchar *substring; 495if (stringnumber < 0 || stringnumber >= stringcount) 496 return PCRE_ERROR_NOSUBSTRING; 497stringnumber *= 2; 498yield = ovector[stringnumber+1] - ovector[stringnumber]; 499substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1)); 500if (substring == NULL) return PCRE_ERROR_NOMEMORY; 501memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield)); 502substring[yield] = 0; 503#ifdef COMPILE_PCRE8 504*stringptr = (const char *)substring; 505#else 506*stringptr = (PCRE_SPTR16)substring; 507#endif 508return yield; 509} 510 511 512 513/************************************************* 514* Copy named captured string to new store * 515*************************************************/ 516 517/* This function copies a single captured substring, identified by name, into 518new store. If the regex permits duplicate names, the first substring that is 519set is chosen. 520 521Arguments: 522 code the compiled regex 523 subject the subject string that was matched 524 ovector pointer to the offsets table 525 stringcount the number of substrings that were captured 526 (i.e. the yield of the pcre_exec call, unless 527 that was zero, in which case it should be 1/3 528 of the offset table size) 529 stringname the name of the required substring 530 stringptr where to put the pointer 531 532Returns: if successful: 533 the length of the copied string, not including the zero 534 that is put on the end; can be zero 535 if not successful: 536 PCRE_ERROR_NOMEMORY (-6) couldn't get memory 537 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 538*/ 539 540#ifdef COMPILE_PCRE8 541PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 542pcre_get_named_substring(const pcre *code, const char *subject, 543 int *ovector, int stringcount, const char *stringname, 544 const char **stringptr) 545#else 546PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 547pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject, 548 int *ovector, int stringcount, PCRE_SPTR16 stringname, 549 PCRE_SPTR16 *stringptr) 550#endif 551{ 552int n = get_first_set(code, stringname, ovector); 553if (n <= 0) return n; 554#ifdef COMPILE_PCRE8 555return pcre_get_substring(subject, ovector, stringcount, n, stringptr); 556#else 557return pcre16_get_substring(subject, ovector, stringcount, n, stringptr); 558#endif 559} 560 561 562 563 564/************************************************* 565* Free store obtained by get_substring * 566*************************************************/ 567 568/* This function exists for the benefit of people calling PCRE from non-C 569programs that can call its functions, but not free() or (PUBL(free))() 570directly. 571 572Argument: the result of a previous pcre_get_substring() 573Returns: nothing 574*/ 575 576#ifdef COMPILE_PCRE8 577PCRE_EXP_DEFN void PCRE_CALL_CONVENTION 578pcre_free_substring(const char *pointer) 579#else 580PCRE_EXP_DEFN void PCRE_CALL_CONVENTION 581pcre16_free_substring(PCRE_SPTR16 pointer) 582#endif 583{ 584(PUBL(free))((void *)pointer); 585} 586 587/* End of pcre_get.c */ 588