1/*************************************************
2*      Perl-Compatible Regular Expressions       *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8                       Written by Philip Hazel
9           Copyright (c) 1997-2012 University of Cambridge
10
11-----------------------------------------------------------------------------
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14
15    * Redistributions of source code must retain the above copyright notice,
16      this list of conditions and the following disclaimer.
17
18    * Redistributions in binary form must reproduce the above copyright
19      notice, this list of conditions and the following disclaimer in the
20      documentation and/or other materials provided with the distribution.
21
22    * Neither the name of the University of Cambridge nor the names of its
23      contributors may be used to endorse or promote products derived from
24      this software without specific prior written permission.
25
26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36POSSIBILITY OF SUCH DAMAGE.
37-----------------------------------------------------------------------------
38*/
39
40
41/* This module contains some convenience functions for extracting substrings
42from the subject string after a regex match has succeeded. The original idea
43for these functions came from Scott Wimer. */
44
45
46#ifdef HAVE_CONFIG_H
47#include "config.h"
48#endif
49
50#include "pcre_internal.h"
51
52
53/*************************************************
54*           Find number for named string         *
55*************************************************/
56
57/* This function is used by the get_first_set() function below, as well
58as being generally available. It assumes that names are unique.
59
60Arguments:
61  code        the compiled regex
62  stringname  the name whose number is required
63
64Returns:      the number of the named parentheses, or a negative number
65                (PCRE_ERROR_NOSUBSTRING) if not found
66*/
67
68#ifdef COMPILE_PCRE8
69PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
70pcre_get_stringnumber(const pcre *code, const char *stringname)
71#else
72PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
73pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
74#endif
75{
76int rc;
77int entrysize;
78int top, bot;
79pcre_uchar *nametable;
80
81#ifdef COMPILE_PCRE8
82if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
83  return rc;
84if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
85
86if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
87  return rc;
88if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
89  return rc;
90#endif
91#ifdef COMPILE_PCRE16
92if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
93  return rc;
94if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
95
96if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
97  return rc;
98if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
99  return rc;
100#endif
101
102bot = 0;
103while (top > bot)
104  {
105  int mid = (top + bot) / 2;
106  pcre_uchar *entry = nametable + entrysize*mid;
107  int c = STRCMP_UC_UC((pcre_uchar *)stringname,
108    (pcre_uchar *)(entry + IMM2_SIZE));
109  if (c == 0) return GET2(entry, 0);
110  if (c > 0) bot = mid + 1; else top = mid;
111  }
112
113return PCRE_ERROR_NOSUBSTRING;
114}
115
116
117
118/*************************************************
119*     Find (multiple) entries for named string   *
120*************************************************/
121
122/* This is used by the get_first_set() function below, as well as being
123generally available. It is used when duplicated names are permitted.
124
125Arguments:
126  code        the compiled regex
127  stringname  the name whose entries required
128  firstptr    where to put the pointer to the first entry
129  lastptr     where to put the pointer to the last entry
130
131Returns:      the length of each entry, or a negative number
132                (PCRE_ERROR_NOSUBSTRING) if not found
133*/
134
135#ifdef COMPILE_PCRE8
136PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
137pcre_get_stringtable_entries(const pcre *code, const char *stringname,
138  char **firstptr, char **lastptr)
139#else
140PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
141pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
142  PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
143#endif
144{
145int rc;
146int entrysize;
147int top, bot;
148pcre_uchar *nametable, *lastentry;
149
150#ifdef COMPILE_PCRE8
151if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
152  return rc;
153if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
154
155if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
156  return rc;
157if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
158  return rc;
159#endif
160#ifdef COMPILE_PCRE16
161if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
162  return rc;
163if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
164
165if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
166  return rc;
167if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
168  return rc;
169#endif
170
171lastentry = nametable + entrysize * (top - 1);
172bot = 0;
173while (top > bot)
174  {
175  int mid = (top + bot) / 2;
176  pcre_uchar *entry = nametable + entrysize*mid;
177  int c = STRCMP_UC_UC((pcre_uchar *)stringname,
178    (pcre_uchar *)(entry + IMM2_SIZE));
179  if (c == 0)
180    {
181    pcre_uchar *first = entry;
182    pcre_uchar *last = entry;
183    while (first > nametable)
184      {
185      if (STRCMP_UC_UC((pcre_uchar *)stringname,
186        (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
187      first -= entrysize;
188      }
189    while (last < lastentry)
190      {
191      if (STRCMP_UC_UC((pcre_uchar *)stringname,
192        (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
193      last += entrysize;
194      }
195#ifdef COMPILE_PCRE8
196    *firstptr = (char *)first;
197    *lastptr = (char *)last;
198#else
199    *firstptr = (PCRE_UCHAR16 *)first;
200    *lastptr = (PCRE_UCHAR16 *)last;
201#endif
202    return entrysize;
203    }
204  if (c > 0) bot = mid + 1; else top = mid;
205  }
206
207return PCRE_ERROR_NOSUBSTRING;
208}
209
210
211
212/*************************************************
213*    Find first set of multiple named strings    *
214*************************************************/
215
216/* This function allows for duplicate names in the table of named substrings.
217It returns the number of the first one that was set in a pattern match.
218
219Arguments:
220  code         the compiled regex
221  stringname   the name of the capturing substring
222  ovector      the vector of matched substrings
223
224Returns:       the number of the first that is set,
225               or the number of the last one if none are set,
226               or a negative number on error
227*/
228
229#ifdef COMPILE_PCRE8
230static int
231get_first_set(const pcre *code, const char *stringname, int *ovector)
232#else
233static int
234get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
235#endif
236{
237const REAL_PCRE *re = (const REAL_PCRE *)code;
238int entrysize;
239pcre_uchar *entry;
240#ifdef COMPILE_PCRE8
241char *first, *last;
242#else
243PCRE_UCHAR16 *first, *last;
244#endif
245
246#ifdef COMPILE_PCRE8
247if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
248  return pcre_get_stringnumber(code, stringname);
249entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
250#else
251if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
252  return pcre16_get_stringnumber(code, stringname);
253entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
254#endif
255if (entrysize <= 0) return entrysize;
256for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
257  {
258  int n = GET2(entry, 0);
259  if (ovector[n*2] >= 0) return n;
260  }
261return GET2(entry, 0);
262}
263
264
265
266
267/*************************************************
268*      Copy captured string to given buffer      *
269*************************************************/
270
271/* This function copies a single captured substring into a given buffer.
272Note that we use memcpy() rather than strncpy() in case there are binary zeros
273in the string.
274
275Arguments:
276  subject        the subject string that was matched
277  ovector        pointer to the offsets table
278  stringcount    the number of substrings that were captured
279                   (i.e. the yield of the pcre_exec call, unless
280                   that was zero, in which case it should be 1/3
281                   of the offset table size)
282  stringnumber   the number of the required substring
283  buffer         where to put the substring
284  size           the size of the buffer
285
286Returns:         if successful:
287                   the length of the copied string, not including the zero
288                   that is put on the end; can be zero
289                 if not successful:
290                   PCRE_ERROR_NOMEMORY (-6) buffer too small
291                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
292*/
293
294#ifdef COMPILE_PCRE8
295PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
296pcre_copy_substring(const char *subject, int *ovector, int stringcount,
297  int stringnumber, char *buffer, int size)
298#else
299PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
300pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
301  int stringnumber, PCRE_UCHAR16 *buffer, int size)
302#endif
303{
304int yield;
305if (stringnumber < 0 || stringnumber >= stringcount)
306  return PCRE_ERROR_NOSUBSTRING;
307stringnumber *= 2;
308yield = ovector[stringnumber+1] - ovector[stringnumber];
309if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
310memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
311buffer[yield] = 0;
312return yield;
313}
314
315
316
317/*************************************************
318*   Copy named captured string to given buffer   *
319*************************************************/
320
321/* This function copies a single captured substring into a given buffer,
322identifying it by name. If the regex permits duplicate names, the first
323substring that is set is chosen.
324
325Arguments:
326  code           the compiled regex
327  subject        the subject string that was matched
328  ovector        pointer to the offsets table
329  stringcount    the number of substrings that were captured
330                   (i.e. the yield of the pcre_exec call, unless
331                   that was zero, in which case it should be 1/3
332                   of the offset table size)
333  stringname     the name of the required substring
334  buffer         where to put the substring
335  size           the size of the buffer
336
337Returns:         if successful:
338                   the length of the copied string, not including the zero
339                   that is put on the end; can be zero
340                 if not successful:
341                   PCRE_ERROR_NOMEMORY (-6) buffer too small
342                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
343*/
344
345#ifdef COMPILE_PCRE8
346PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
347pcre_copy_named_substring(const pcre *code, const char *subject,
348  int *ovector, int stringcount, const char *stringname,
349  char *buffer, int size)
350#else
351PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
352pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
353  int *ovector, int stringcount, PCRE_SPTR16 stringname,
354  PCRE_UCHAR16 *buffer, int size)
355#endif
356{
357int n = get_first_set(code, stringname, ovector);
358if (n <= 0) return n;
359#ifdef COMPILE_PCRE8
360return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
361#else
362return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
363#endif
364}
365
366
367
368/*************************************************
369*      Copy all captured strings to new store    *
370*************************************************/
371
372/* This function gets one chunk of store and builds a list of pointers and all
373of the captured substrings in it. A NULL pointer is put on the end of the list.
374
375Arguments:
376  subject        the subject string that was matched
377  ovector        pointer to the offsets table
378  stringcount    the number of substrings that were captured
379                   (i.e. the yield of the pcre_exec call, unless
380                   that was zero, in which case it should be 1/3
381                   of the offset table size)
382  listptr        set to point to the list of pointers
383
384Returns:         if successful: 0
385                 if not successful:
386                   PCRE_ERROR_NOMEMORY (-6) failed to get store
387*/
388
389#ifdef COMPILE_PCRE8
390PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
391pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
392  const char ***listptr)
393#else
394PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
395pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
396  PCRE_SPTR16 **listptr)
397#endif
398{
399int i;
400int size = sizeof(pcre_uchar *);
401int double_count = stringcount * 2;
402pcre_uchar **stringlist;
403pcre_uchar *p;
404
405for (i = 0; i < double_count; i += 2)
406  size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
407
408stringlist = (pcre_uchar **)(PUBL(malloc))(size);
409if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
410
411#ifdef COMPILE_PCRE8
412*listptr = (const char **)stringlist;
413#else
414*listptr = (PCRE_SPTR16 *)stringlist;
415#endif
416p = (pcre_uchar *)(stringlist + stringcount + 1);
417
418for (i = 0; i < double_count; i += 2)
419  {
420  int len = ovector[i+1] - ovector[i];
421  memcpy(p, subject + ovector[i], IN_UCHARS(len));
422  *stringlist++ = p;
423  p += len;
424  *p++ = 0;
425  }
426
427*stringlist = NULL;
428return 0;
429}
430
431
432
433/*************************************************
434*   Free store obtained by get_substring_list    *
435*************************************************/
436
437/* This function exists for the benefit of people calling PCRE from non-C
438programs that can call its functions, but not free() or (PUBL(free))()
439directly.
440
441Argument:   the result of a previous pcre_get_substring_list()
442Returns:    nothing
443*/
444
445#ifdef COMPILE_PCRE8
446PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
447pcre_free_substring_list(const char **pointer)
448#else
449PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
450pcre16_free_substring_list(PCRE_SPTR16 *pointer)
451#endif
452{
453(PUBL(free))((void *)pointer);
454}
455
456
457
458/*************************************************
459*      Copy captured string to new store         *
460*************************************************/
461
462/* This function copies a single captured substring into a piece of new
463store
464
465Arguments:
466  subject        the subject string that was matched
467  ovector        pointer to the offsets table
468  stringcount    the number of substrings that were captured
469                   (i.e. the yield of the pcre_exec call, unless
470                   that was zero, in which case it should be 1/3
471                   of the offset table size)
472  stringnumber   the number of the required substring
473  stringptr      where to put a pointer to the substring
474
475Returns:         if successful:
476                   the length of the string, not including the zero that
477                   is put on the end; can be zero
478                 if not successful:
479                   PCRE_ERROR_NOMEMORY (-6) failed to get store
480                   PCRE_ERROR_NOSUBSTRING (-7) substring not present
481*/
482
483#ifdef COMPILE_PCRE8
484PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
485pcre_get_substring(const char *subject, int *ovector, int stringcount,
486  int stringnumber, const char **stringptr)
487#else
488PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
489pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
490  int stringnumber, PCRE_SPTR16 *stringptr)
491#endif
492{
493int yield;
494pcre_uchar *substring;
495if (stringnumber < 0 || stringnumber >= stringcount)
496  return PCRE_ERROR_NOSUBSTRING;
497stringnumber *= 2;
498yield = ovector[stringnumber+1] - ovector[stringnumber];
499substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
500if (substring == NULL) return PCRE_ERROR_NOMEMORY;
501memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
502substring[yield] = 0;
503#ifdef COMPILE_PCRE8
504*stringptr = (const char *)substring;
505#else
506*stringptr = (PCRE_SPTR16)substring;
507#endif
508return yield;
509}
510
511
512
513/*************************************************
514*   Copy named captured string to new store      *
515*************************************************/
516
517/* This function copies a single captured substring, identified by name, into
518new store. If the regex permits duplicate names, the first substring that is
519set is chosen.
520
521Arguments:
522  code           the compiled regex
523  subject        the subject string that was matched
524  ovector        pointer to the offsets table
525  stringcount    the number of substrings that were captured
526                   (i.e. the yield of the pcre_exec call, unless
527                   that was zero, in which case it should be 1/3
528                   of the offset table size)
529  stringname     the name of the required substring
530  stringptr      where to put the pointer
531
532Returns:         if successful:
533                   the length of the copied string, not including the zero
534                   that is put on the end; can be zero
535                 if not successful:
536                   PCRE_ERROR_NOMEMORY (-6) couldn't get memory
537                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
538*/
539
540#ifdef COMPILE_PCRE8
541PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
542pcre_get_named_substring(const pcre *code, const char *subject,
543  int *ovector, int stringcount, const char *stringname,
544  const char **stringptr)
545#else
546PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
547pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
548  int *ovector, int stringcount, PCRE_SPTR16 stringname,
549  PCRE_SPTR16 *stringptr)
550#endif
551{
552int n = get_first_set(code, stringname, ovector);
553if (n <= 0) return n;
554#ifdef COMPILE_PCRE8
555return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
556#else
557return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
558#endif
559}
560
561
562
563
564/*************************************************
565*       Free store obtained by get_substring     *
566*************************************************/
567
568/* This function exists for the benefit of people calling PCRE from non-C
569programs that can call its functions, but not free() or (PUBL(free))()
570directly.
571
572Argument:   the result of a previous pcre_get_substring()
573Returns:    nothing
574*/
575
576#ifdef COMPILE_PCRE8
577PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
578pcre_free_substring(const char *pointer)
579#else
580PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
581pcre16_free_substring(PCRE_SPTR16 pointer)
582#endif
583{
584(PUBL(free))((void *)pointer);
585}
586
587/* End of pcre_get.c */
588