1/*************************************************
2*           PCRE DEMONSTRATION PROGRAM           *
3*************************************************/
4
5/* This is a demonstration program to illustrate the most straightforward ways
6of calling the PCRE regular expression library from a C program. See the
7pcresample documentation for a short discussion ("man pcresample" if you have
8the PCRE man pages installed).
9
10In Unix-like environments, if PCRE is installed in your standard system
11libraries, you should be able to compile this program using this command:
12
13gcc -Wall pcredemo.c -lpcre -o pcredemo
14
15If PCRE is not installed in a standard place, it is likely to be installed with
16support for the pkg-config mechanism. If you have pkg-config, you can compile
17this program using this command:
18
19gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
20
21If you do not have pkg-config, you may have to use this:
22
23gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
24  -R/usr/local/lib -lpcre -o pcredemo
25
26Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
27library files for PCRE are installed on your system. Only some operating
28systems (e.g. Solaris) use the -R option.
29
30Building under Windows:
31
32If you want to statically link this program against a non-dll .a file, you must
33define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
34pcre_free() exported functions will be declared __declspec(dllimport), with
35unwanted results. So in this environment, uncomment the following line. */
36
37/* #define PCRE_STATIC */
38
39#include <stdio.h>
40#include <string.h>
41#include <pcre.h>
42
43#define OVECCOUNT 30    /* should be a multiple of 3 */
44
45
46int main(int argc, char **argv)
47{
48pcre *re;
49const char *error;
50char *pattern;
51char *subject;
52unsigned char *name_table;
53int erroffset;
54int find_all;
55int namecount;
56int name_entry_size;
57int ovector[OVECCOUNT];
58int subject_length;
59int rc, i;
60
61
62/**************************************************************************
63* First, sort out the command line. There is only one possible option at  *
64* the moment, "-g" to request repeated matching to find all occurrences,  *
65* like Perl's /g option. We set the variable find_all to a non-zero value *
66* if the -g option is present. Apart from that, there must be exactly two *
67* arguments.                                                              *
68**************************************************************************/
69
70find_all = 0;
71for (i = 1; i < argc; i++)
72  {
73  if (strcmp(argv[i], "-g") == 0) find_all = 1;
74    else break;
75  }
76
77/* After the options, we require exactly two arguments, which are the pattern,
78and the subject string. */
79
80if (argc - i != 2)
81  {
82  printf("Two arguments required: a regex and a subject string\n");
83  return 1;
84  }
85
86pattern = argv[i];
87subject = argv[i+1];
88subject_length = (int)strlen(subject);
89
90
91/*************************************************************************
92* Now we are going to compile the regular expression pattern, and handle *
93* and errors that are detected.                                          *
94*************************************************************************/
95
96re = pcre_compile(
97  pattern,              /* the pattern */
98  0,                    /* default options */
99  &error,               /* for error message */
100  &erroffset,           /* for error offset */
101  NULL);                /* use default character tables */
102
103/* Compilation failed: print the error message and exit */
104
105if (re == NULL)
106  {
107  printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
108  return 1;
109  }
110
111
112/*************************************************************************
113* If the compilation succeeded, we call PCRE again, in order to do a     *
114* pattern match against the subject string. This does just ONE match. If *
115* further matching is needed, it will be done below.                     *
116*************************************************************************/
117
118rc = pcre_exec(
119  re,                   /* the compiled pattern */
120  NULL,                 /* no extra data - we didn't study the pattern */
121  subject,              /* the subject string */
122  subject_length,       /* the length of the subject */
123  0,                    /* start at offset 0 in the subject */
124  0,                    /* default options */
125  ovector,              /* output vector for substring information */
126  OVECCOUNT);           /* number of elements in the output vector */
127
128/* Matching failed: handle error cases */
129
130if (rc < 0)
131  {
132  switch(rc)
133    {
134    case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
135    /*
136    Handle other special cases if you like
137    */
138    default: printf("Matching error %d\n", rc); break;
139    }
140  pcre_free(re);     /* Release memory used for the compiled pattern */
141  return 1;
142  }
143
144/* Match succeded */
145
146printf("\nMatch succeeded at offset %d\n", ovector[0]);
147
148
149/*************************************************************************
150* We have found the first match within the subject string. If the output *
151* vector wasn't big enough, say so. Then output any substrings that were *
152* captured.                                                              *
153*************************************************************************/
154
155/* The output vector wasn't big enough */
156
157if (rc == 0)
158  {
159  rc = OVECCOUNT/3;
160  printf("ovector only has room for %d captured substrings\n", rc - 1);
161  }
162
163/* Show substrings stored in the output vector by number. Obviously, in a real
164application you might want to do things other than print them. */
165
166for (i = 0; i < rc; i++)
167  {
168  char *substring_start = subject + ovector[2*i];
169  int substring_length = ovector[2*i+1] - ovector[2*i];
170  printf("%2d: %.*s\n", i, substring_length, substring_start);
171  }
172
173
174/**************************************************************************
175* That concludes the basic part of this demonstration program. We have    *
176* compiled a pattern, and performed a single match. The code that follows *
177* shows first how to access named substrings, and then how to code for    *
178* repeated matches on the same subject.                                   *
179**************************************************************************/
180
181/* See if there are any named substrings, and if so, show them by name. First
182we have to extract the count of named parentheses from the pattern. */
183
184(void)pcre_fullinfo(
185  re,                   /* the compiled pattern */
186  NULL,                 /* no extra data - we didn't study the pattern */
187  PCRE_INFO_NAMECOUNT,  /* number of named substrings */
188  &namecount);          /* where to put the answer */
189
190if (namecount <= 0) printf("No named substrings\n"); else
191  {
192  unsigned char *tabptr;
193  printf("Named substrings\n");
194
195  /* Before we can access the substrings, we must extract the table for
196  translating names to numbers, and the size of each entry in the table. */
197
198  (void)pcre_fullinfo(
199    re,                       /* the compiled pattern */
200    NULL,                     /* no extra data - we didn't study the pattern */
201    PCRE_INFO_NAMETABLE,      /* address of the table */
202    &name_table);             /* where to put the answer */
203
204  (void)pcre_fullinfo(
205    re,                       /* the compiled pattern */
206    NULL,                     /* no extra data - we didn't study the pattern */
207    PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
208    &name_entry_size);        /* where to put the answer */
209
210  /* Now we can scan the table and, for each entry, print the number, the name,
211  and the substring itself. */
212
213  tabptr = name_table;
214  for (i = 0; i < namecount; i++)
215    {
216    int n = (tabptr[0] << 8) | tabptr[1];
217    printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
218      ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
219    tabptr += name_entry_size;
220    }
221  }
222
223
224/*************************************************************************
225* If the "-g" option was given on the command line, we want to continue  *
226* to search for additional matches in the subject string, in a similar   *
227* way to the /g option in Perl. This turns out to be trickier than you   *
228* might think because of the possibility of matching an empty string.    *
229* What happens is as follows:                                            *
230*                                                                        *
231* If the previous match was NOT for an empty string, we can just start   *
232* the next match at the end of the previous one.                         *
233*                                                                        *
234* If the previous match WAS for an empty string, we can't do that, as it *
235* would lead to an infinite loop. Instead, a special call of pcre_exec() *
236* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set.    *
237* The first of these tells PCRE that an empty string at the start of the *
238* subject is not a valid match; other possibilities must be tried. The   *
239* second flag restricts PCRE to one match attempt at the initial string  *
240* position. If this match succeeds, an alternative to the empty string   *
241* match has been found, and we can proceed round the loop.               *
242*************************************************************************/
243
244if (!find_all)
245  {
246  pcre_free(re);   /* Release the memory used for the compiled pattern */
247  return 0;        /* Finish unless -g was given */
248  }
249
250/* Loop for second and subsequent matches */
251
252for (;;)
253  {
254  int options = 0;                 /* Normally no options */
255  int start_offset = ovector[1];   /* Start at end of previous match */
256
257  /* If the previous match was for an empty string, we are finished if we are
258  at the end of the subject. Otherwise, arrange to run another match at the
259  same point to see if a non-empty match can be found. */
260
261  if (ovector[0] == ovector[1])
262    {
263    if (ovector[0] == subject_length) break;
264    options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
265    }
266
267  /* Run the next matching operation */
268
269  rc = pcre_exec(
270    re,                   /* the compiled pattern */
271    NULL,                 /* no extra data - we didn't study the pattern */
272    subject,              /* the subject string */
273    subject_length,       /* the length of the subject */
274    start_offset,         /* starting offset in the subject */
275    options,              /* options */
276    ovector,              /* output vector for substring information */
277    OVECCOUNT);           /* number of elements in the output vector */
278
279  /* This time, a result of NOMATCH isn't an error. If the value in "options"
280  is zero, it just means we have found all possible matches, so the loop ends.
281  Otherwise, it means we have failed to find a non-empty-string match at a
282  point where there was a previous empty-string match. In this case, we do what
283  Perl does: advance the matching position by one, and continue. We do this by
284  setting the "end of previous match" offset, because that is picked up at the
285  top of the loop as the point at which to start again. */
286
287  if (rc == PCRE_ERROR_NOMATCH)
288    {
289    if (options == 0) break;
290    ovector[1] = start_offset + 1;
291    continue;    /* Go round the loop again */
292    }
293
294  /* Other matching errors are not recoverable. */
295
296  if (rc < 0)
297    {
298    printf("Matching error %d\n", rc);
299    pcre_free(re);    /* Release memory used for the compiled pattern */
300    return 1;
301    }
302
303  /* Match succeded */
304
305  printf("\nMatch succeeded again at offset %d\n", ovector[0]);
306
307  /* The match succeeded, but the output vector wasn't big enough. */
308
309  if (rc == 0)
310    {
311    rc = OVECCOUNT/3;
312    printf("ovector only has room for %d captured substrings\n", rc - 1);
313    }
314
315  /* As before, show substrings stored in the output vector by number, and then
316  also any named substrings. */
317
318  for (i = 0; i < rc; i++)
319    {
320    char *substring_start = subject + ovector[2*i];
321    int substring_length = ovector[2*i+1] - ovector[2*i];
322    printf("%2d: %.*s\n", i, substring_length, substring_start);
323    }
324
325  if (namecount <= 0) printf("No named substrings\n"); else
326    {
327    unsigned char *tabptr = name_table;
328    printf("Named substrings\n");
329    for (i = 0; i < namecount; i++)
330      {
331      int n = (tabptr[0] << 8) | tabptr[1];
332      printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
333        ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
334      tabptr += name_entry_size;
335      }
336    }
337  }      /* End of loop to find second and subsequent matches */
338
339printf("\n");
340pcre_free(re);       /* Release memory used for the compiled pattern */
341return 0;
342}
343
344/* End of pcredemo.c */
345