1/*************************************************
2*      Perl-Compatible Regular Expressions       *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8                       Written by Philip Hazel
9           Copyright (c) 1997-2008 University of Cambridge
10
11-----------------------------------------------------------------------------
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14
15    * Redistributions of source code must retain the above copyright notice,
16      this list of conditions and the following disclaimer.
17
18    * Redistributions in binary form must reproduce the above copyright
19      notice, this list of conditions and the following disclaimer in the
20      documentation and/or other materials provided with the distribution.
21
22    * Neither the name of the University of Cambridge nor the names of its
23      contributors may be used to endorse or promote products derived from
24      this software without specific prior written permission.
25
26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36POSSIBILITY OF SUCH DAMAGE.
37-----------------------------------------------------------------------------
38*/
39
40
41/* This module contains some convenience functions for extracting substrings
42from the subject string after a regex match has succeeded. The original idea
43for these functions came from Scott Wimer. */
44
45
46#ifdef HAVE_CONFIG_H
47#include "config.h"
48#endif
49
50#include "pcre_internal.h"
51
52
53/*************************************************
54*           Find number for named string         *
55*************************************************/
56
57/* This function is used by the get_first_set() function below, as well
58as being generally available. It assumes that names are unique.
59
60Arguments:
61  code        the compiled regex
62  stringname  the name whose number is required
63
64Returns:      the number of the named parentheses, or a negative number
65                (PCRE_ERROR_NOSUBSTRING) if not found
66*/
67
68PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
69pcre_get_stringnumber(const pcre *code, const char *stringname)
70{
71int rc;
72int entrysize;
73int top, bot;
74uschar *nametable;
75
76if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
77  return rc;
78if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
79
80if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
81  return rc;
82if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
83  return rc;
84
85bot = 0;
86while (top > bot)
87  {
88  int mid = (top + bot) / 2;
89  uschar *entry = nametable + entrysize*mid;
90  int c = strcmp(stringname, (char *)(entry + 2));
91  if (c == 0) return (entry[0] << 8) + entry[1];
92  if (c > 0) bot = mid + 1; else top = mid;
93  }
94
95return PCRE_ERROR_NOSUBSTRING;
96}
97
98
99
100/*************************************************
101*     Find (multiple) entries for named string   *
102*************************************************/
103
104/* This is used by the get_first_set() function below, as well as being
105generally available. It is used when duplicated names are permitted.
106
107Arguments:
108  code        the compiled regex
109  stringname  the name whose entries required
110  firstptr    where to put the pointer to the first entry
111  lastptr     where to put the pointer to the last entry
112
113Returns:      the length of each entry, or a negative number
114                (PCRE_ERROR_NOSUBSTRING) if not found
115*/
116
117PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
118pcre_get_stringtable_entries(const pcre *code, const char *stringname,
119  char **firstptr, char **lastptr)
120{
121int rc;
122int entrysize;
123int top, bot;
124uschar *nametable, *lastentry;
125
126if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
127  return rc;
128if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
129
130if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
131  return rc;
132if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
133  return rc;
134
135lastentry = nametable + entrysize * (top - 1);
136bot = 0;
137while (top > bot)
138  {
139  int mid = (top + bot) / 2;
140  uschar *entry = nametable + entrysize*mid;
141  int c = strcmp(stringname, (char *)(entry + 2));
142  if (c == 0)
143    {
144    uschar *first = entry;
145    uschar *last = entry;
146    while (first > nametable)
147      {
148      if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
149      first -= entrysize;
150      }
151    while (last < lastentry)
152      {
153      if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
154      last += entrysize;
155      }
156    *firstptr = (char *)first;
157    *lastptr = (char *)last;
158    return entrysize;
159    }
160  if (c > 0) bot = mid + 1; else top = mid;
161  }
162
163return PCRE_ERROR_NOSUBSTRING;
164}
165
166
167
168/*************************************************
169*    Find first set of multiple named strings    *
170*************************************************/
171
172/* This function allows for duplicate names in the table of named substrings.
173It returns the number of the first one that was set in a pattern match.
174
175Arguments:
176  code         the compiled regex
177  stringname   the name of the capturing substring
178  ovector      the vector of matched substrings
179
180Returns:       the number of the first that is set,
181               or the number of the last one if none are set,
182               or a negative number on error
183*/
184
185static int
186get_first_set(const pcre *code, const char *stringname, int *ovector)
187{
188const real_pcre *re = (const real_pcre *)code;
189int entrysize;
190char *first, *last;
191uschar *entry;
192if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
193  return pcre_get_stringnumber(code, stringname);
194entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
195if (entrysize <= 0) return entrysize;
196for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
197  {
198  int n = (entry[0] << 8) + entry[1];
199  if (ovector[n*2] >= 0) return n;
200  }
201return (first[0] << 8) + first[1];
202}
203
204
205
206
207/*************************************************
208*      Copy captured string to given buffer      *
209*************************************************/
210
211/* This function copies a single captured substring into a given buffer.
212Note that we use memcpy() rather than strncpy() in case there are binary zeros
213in the string.
214
215Arguments:
216  subject        the subject string that was matched
217  ovector        pointer to the offsets table
218  stringcount    the number of substrings that were captured
219                   (i.e. the yield of the pcre_exec call, unless
220                   that was zero, in which case it should be 1/3
221                   of the offset table size)
222  stringnumber   the number of the required substring
223  buffer         where to put the substring
224  size           the size of the buffer
225
226Returns:         if successful:
227                   the length of the copied string, not including the zero
228                   that is put on the end; can be zero
229                 if not successful:
230                   PCRE_ERROR_NOMEMORY (-6) buffer too small
231                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
232*/
233
234PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
235pcre_copy_substring(const char *subject, int *ovector, int stringcount,
236  int stringnumber, char *buffer, int size)
237{
238int yield;
239if (stringnumber < 0 || stringnumber >= stringcount)
240  return PCRE_ERROR_NOSUBSTRING;
241stringnumber *= 2;
242yield = ovector[stringnumber+1] - ovector[stringnumber];
243if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
244memcpy(buffer, subject + ovector[stringnumber], yield);
245buffer[yield] = 0;
246return yield;
247}
248
249
250
251/*************************************************
252*   Copy named captured string to given buffer   *
253*************************************************/
254
255/* This function copies a single captured substring into a given buffer,
256identifying it by name. If the regex permits duplicate names, the first
257substring that is set is chosen.
258
259Arguments:
260  code           the compiled regex
261  subject        the subject string that was matched
262  ovector        pointer to the offsets table
263  stringcount    the number of substrings that were captured
264                   (i.e. the yield of the pcre_exec call, unless
265                   that was zero, in which case it should be 1/3
266                   of the offset table size)
267  stringname     the name of the required substring
268  buffer         where to put the substring
269  size           the size of the buffer
270
271Returns:         if successful:
272                   the length of the copied string, not including the zero
273                   that is put on the end; can be zero
274                 if not successful:
275                   PCRE_ERROR_NOMEMORY (-6) buffer too small
276                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
277*/
278
279PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
280pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
281  int stringcount, const char *stringname, char *buffer, int size)
282{
283int n = get_first_set(code, stringname, ovector);
284if (n <= 0) return n;
285return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
286}
287
288
289
290/*************************************************
291*      Copy all captured strings to new store    *
292*************************************************/
293
294/* This function gets one chunk of store and builds a list of pointers and all
295of the captured substrings in it. A NULL pointer is put on the end of the list.
296
297Arguments:
298  subject        the subject string that was matched
299  ovector        pointer to the offsets table
300  stringcount    the number of substrings that were captured
301                   (i.e. the yield of the pcre_exec call, unless
302                   that was zero, in which case it should be 1/3
303                   of the offset table size)
304  listptr        set to point to the list of pointers
305
306Returns:         if successful: 0
307                 if not successful:
308                   PCRE_ERROR_NOMEMORY (-6) failed to get store
309*/
310
311PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
312pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
313  const char ***listptr)
314{
315int i;
316int size = sizeof(char *);
317int double_count = stringcount * 2;
318char **stringlist;
319char *p;
320
321for (i = 0; i < double_count; i += 2)
322  size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
323
324stringlist = (char **)(pcre_malloc)(size);
325if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
326
327*listptr = (const char **)stringlist;
328p = (char *)(stringlist + stringcount + 1);
329
330for (i = 0; i < double_count; i += 2)
331  {
332  int len = ovector[i+1] - ovector[i];
333  memcpy(p, subject + ovector[i], len);
334  *stringlist++ = p;
335  p += len;
336  *p++ = 0;
337  }
338
339*stringlist = NULL;
340return 0;
341}
342
343
344
345/*************************************************
346*   Free store obtained by get_substring_list    *
347*************************************************/
348
349/* This function exists for the benefit of people calling PCRE from non-C
350programs that can call its functions, but not free() or (pcre_free)() directly.
351
352Argument:   the result of a previous pcre_get_substring_list()
353Returns:    nothing
354*/
355
356PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
357pcre_free_substring_list(const char **pointer)
358{
359(pcre_free)((void *)pointer);
360}
361
362
363
364/*************************************************
365*      Copy captured string to new store         *
366*************************************************/
367
368/* This function copies a single captured substring into a piece of new
369store
370
371Arguments:
372  subject        the subject string that was matched
373  ovector        pointer to the offsets table
374  stringcount    the number of substrings that were captured
375                   (i.e. the yield of the pcre_exec call, unless
376                   that was zero, in which case it should be 1/3
377                   of the offset table size)
378  stringnumber   the number of the required substring
379  stringptr      where to put a pointer to the substring
380
381Returns:         if successful:
382                   the length of the string, not including the zero that
383                   is put on the end; can be zero
384                 if not successful:
385                   PCRE_ERROR_NOMEMORY (-6) failed to get store
386                   PCRE_ERROR_NOSUBSTRING (-7) substring not present
387*/
388
389PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
390pcre_get_substring(const char *subject, int *ovector, int stringcount,
391  int stringnumber, const char **stringptr)
392{
393int yield;
394char *substring;
395if (stringnumber < 0 || stringnumber >= stringcount)
396  return PCRE_ERROR_NOSUBSTRING;
397stringnumber *= 2;
398yield = ovector[stringnumber+1] - ovector[stringnumber];
399substring = (char *)(pcre_malloc)(yield + 1);
400if (substring == NULL) return PCRE_ERROR_NOMEMORY;
401memcpy(substring, subject + ovector[stringnumber], yield);
402substring[yield] = 0;
403*stringptr = substring;
404return yield;
405}
406
407
408
409/*************************************************
410*   Copy named captured string to new store      *
411*************************************************/
412
413/* This function copies a single captured substring, identified by name, into
414new store. If the regex permits duplicate names, the first substring that is
415set is chosen.
416
417Arguments:
418  code           the compiled regex
419  subject        the subject string that was matched
420  ovector        pointer to the offsets table
421  stringcount    the number of substrings that were captured
422                   (i.e. the yield of the pcre_exec call, unless
423                   that was zero, in which case it should be 1/3
424                   of the offset table size)
425  stringname     the name of the required substring
426  stringptr      where to put the pointer
427
428Returns:         if successful:
429                   the length of the copied string, not including the zero
430                   that is put on the end; can be zero
431                 if not successful:
432                   PCRE_ERROR_NOMEMORY (-6) couldn't get memory
433                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
434*/
435
436PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
437pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
438  int stringcount, const char *stringname, const char **stringptr)
439{
440int n = get_first_set(code, stringname, ovector);
441if (n <= 0) return n;
442return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
443}
444
445
446
447
448/*************************************************
449*       Free store obtained by get_substring     *
450*************************************************/
451
452/* This function exists for the benefit of people calling PCRE from non-C
453programs that can call its functions, but not free() or (pcre_free)() directly.
454
455Argument:   the result of a previous pcre_get_substring()
456Returns:    nothing
457*/
458
459PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
460pcre_free_substring(const char *pointer)
461{
462(pcre_free)((void *)pointer);
463}
464
465/* End of pcre_get.c */
466