1<html>
2<head>
3<title>pcredemo specification</title>
4</head>
5<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
6<h1>pcredemo man page</h1>
7<p>
8Return to the <a href="index.html">PCRE index page</a>.
9</p>
10<p>
11This page is part of the PCRE HTML documentation. It was generated automatically
12from the original man page. If there is any nonsense in it, please consult the
13man page, in case the conversion went wrong.
14<br>
15<ul>
16</ul>
17<PRE>
18/*************************************************
19*           PCRE DEMONSTRATION PROGRAM           *
20*************************************************/
21
22/* This is a demonstration program to illustrate the most straightforward ways
23of calling the PCRE regular expression library from a C program. See the
24pcresample documentation for a short discussion ("man pcresample" if you have
25the PCRE man pages installed).
26
27In Unix-like environments, if PCRE is installed in your standard system
28libraries, you should be able to compile this program using this command:
29
30gcc -Wall pcredemo.c -lpcre -o pcredemo
31
32If PCRE is not installed in a standard place, it is likely to be installed with
33support for the pkg-config mechanism. If you have pkg-config, you can compile
34this program using this command:
35
36gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
37
38If you do not have pkg-config, you may have to use this:
39
40gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
41  -R/usr/local/lib -lpcre -o pcredemo
42
43Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
44library files for PCRE are installed on your system. Only some operating
45systems (e.g. Solaris) use the -R option.
46
47Building under Windows:
48
49If you want to statically link this program against a non-dll .a file, you must
50define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
51pcre_free() exported functions will be declared __declspec(dllimport), with
52unwanted results. So in this environment, uncomment the following line. */
53
54/* #define PCRE_STATIC */
55
56#include &lt;stdio.h&gt;
57#include &lt;string.h&gt;
58#include &lt;pcre.h&gt;
59
60#define OVECCOUNT 30    /* should be a multiple of 3 */
61
62
63int main(int argc, char **argv)
64{
65pcre *re;
66const char *error;
67char *pattern;
68char *subject;
69unsigned char *name_table;
70int erroffset;
71int find_all;
72int namecount;
73int name_entry_size;
74int ovector[OVECCOUNT];
75int subject_length;
76int rc, i;
77
78
79/**************************************************************************
80* First, sort out the command line. There is only one possible option at  *
81* the moment, "-g" to request repeated matching to find all occurrences,  *
82* like Perl's /g option. We set the variable find_all to a non-zero value *
83* if the -g option is present. Apart from that, there must be exactly two *
84* arguments.                                                              *
85**************************************************************************/
86
87find_all = 0;
88for (i = 1; i &lt; argc; i++)
89  {
90  if (strcmp(argv[i], "-g") == 0) find_all = 1;
91    else break;
92  }
93
94/* After the options, we require exactly two arguments, which are the pattern,
95and the subject string. */
96
97if (argc - i != 2)
98  {
99  printf("Two arguments required: a regex and a subject string\n");
100  return 1;
101  }
102
103pattern = argv[i];
104subject = argv[i+1];
105subject_length = (int)strlen(subject);
106
107
108/*************************************************************************
109* Now we are going to compile the regular expression pattern, and handle *
110* and errors that are detected.                                          *
111*************************************************************************/
112
113re = pcre_compile(
114  pattern,              /* the pattern */
115  0,                    /* default options */
116  &amp;error,               /* for error message */
117  &amp;erroffset,           /* for error offset */
118  NULL);                /* use default character tables */
119
120/* Compilation failed: print the error message and exit */
121
122if (re == NULL)
123  {
124  printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
125  return 1;
126  }
127
128
129/*************************************************************************
130* If the compilation succeeded, we call PCRE again, in order to do a     *
131* pattern match against the subject string. This does just ONE match. If *
132* further matching is needed, it will be done below.                     *
133*************************************************************************/
134
135rc = pcre_exec(
136  re,                   /* the compiled pattern */
137  NULL,                 /* no extra data - we didn't study the pattern */
138  subject,              /* the subject string */
139  subject_length,       /* the length of the subject */
140  0,                    /* start at offset 0 in the subject */
141  0,                    /* default options */
142  ovector,              /* output vector for substring information */
143  OVECCOUNT);           /* number of elements in the output vector */
144
145/* Matching failed: handle error cases */
146
147if (rc &lt; 0)
148  {
149  switch(rc)
150    {
151    case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
152    /*
153    Handle other special cases if you like
154    */
155    default: printf("Matching error %d\n", rc); break;
156    }
157  pcre_free(re);     /* Release memory used for the compiled pattern */
158  return 1;
159  }
160
161/* Match succeded */
162
163printf("\nMatch succeeded at offset %d\n", ovector[0]);
164
165
166/*************************************************************************
167* We have found the first match within the subject string. If the output *
168* vector wasn't big enough, say so. Then output any substrings that were *
169* captured.                                                              *
170*************************************************************************/
171
172/* The output vector wasn't big enough */
173
174if (rc == 0)
175  {
176  rc = OVECCOUNT/3;
177  printf("ovector only has room for %d captured substrings\n", rc - 1);
178  }
179
180/* Show substrings stored in the output vector by number. Obviously, in a real
181application you might want to do things other than print them. */
182
183for (i = 0; i &lt; rc; i++)
184  {
185  char *substring_start = subject + ovector[2*i];
186  int substring_length = ovector[2*i+1] - ovector[2*i];
187  printf("%2d: %.*s\n", i, substring_length, substring_start);
188  }
189
190
191/**************************************************************************
192* That concludes the basic part of this demonstration program. We have    *
193* compiled a pattern, and performed a single match. The code that follows *
194* shows first how to access named substrings, and then how to code for    *
195* repeated matches on the same subject.                                   *
196**************************************************************************/
197
198/* See if there are any named substrings, and if so, show them by name. First
199we have to extract the count of named parentheses from the pattern. */
200
201(void)pcre_fullinfo(
202  re,                   /* the compiled pattern */
203  NULL,                 /* no extra data - we didn't study the pattern */
204  PCRE_INFO_NAMECOUNT,  /* number of named substrings */
205  &amp;namecount);          /* where to put the answer */
206
207if (namecount &lt;= 0) printf("No named substrings\n"); else
208  {
209  unsigned char *tabptr;
210  printf("Named substrings\n");
211
212  /* Before we can access the substrings, we must extract the table for
213  translating names to numbers, and the size of each entry in the table. */
214
215  (void)pcre_fullinfo(
216    re,                       /* the compiled pattern */
217    NULL,                     /* no extra data - we didn't study the pattern */
218    PCRE_INFO_NAMETABLE,      /* address of the table */
219    &amp;name_table);             /* where to put the answer */
220
221  (void)pcre_fullinfo(
222    re,                       /* the compiled pattern */
223    NULL,                     /* no extra data - we didn't study the pattern */
224    PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
225    &amp;name_entry_size);        /* where to put the answer */
226
227  /* Now we can scan the table and, for each entry, print the number, the name,
228  and the substring itself. */
229
230  tabptr = name_table;
231  for (i = 0; i &lt; namecount; i++)
232    {
233    int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
234    printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
235      ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
236    tabptr += name_entry_size;
237    }
238  }
239
240
241/*************************************************************************
242* If the "-g" option was given on the command line, we want to continue  *
243* to search for additional matches in the subject string, in a similar   *
244* way to the /g option in Perl. This turns out to be trickier than you   *
245* might think because of the possibility of matching an empty string.    *
246* What happens is as follows:                                            *
247*                                                                        *
248* If the previous match was NOT for an empty string, we can just start   *
249* the next match at the end of the previous one.                         *
250*                                                                        *
251* If the previous match WAS for an empty string, we can't do that, as it *
252* would lead to an infinite loop. Instead, a special call of pcre_exec() *
253* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set.    *
254* The first of these tells PCRE that an empty string at the start of the *
255* subject is not a valid match; other possibilities must be tried. The   *
256* second flag restricts PCRE to one match attempt at the initial string  *
257* position. If this match succeeds, an alternative to the empty string   *
258* match has been found, and we can proceed round the loop.               *
259*************************************************************************/
260
261if (!find_all)
262  {
263  pcre_free(re);   /* Release the memory used for the compiled pattern */
264  return 0;        /* Finish unless -g was given */
265  }
266
267/* Loop for second and subsequent matches */
268
269for (;;)
270  {
271  int options = 0;                 /* Normally no options */
272  int start_offset = ovector[1];   /* Start at end of previous match */
273
274  /* If the previous match was for an empty string, we are finished if we are
275  at the end of the subject. Otherwise, arrange to run another match at the
276  same point to see if a non-empty match can be found. */
277
278  if (ovector[0] == ovector[1])
279    {
280    if (ovector[0] == subject_length) break;
281    options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
282    }
283
284  /* Run the next matching operation */
285
286  rc = pcre_exec(
287    re,                   /* the compiled pattern */
288    NULL,                 /* no extra data - we didn't study the pattern */
289    subject,              /* the subject string */
290    subject_length,       /* the length of the subject */
291    start_offset,         /* starting offset in the subject */
292    options,              /* options */
293    ovector,              /* output vector for substring information */
294    OVECCOUNT);           /* number of elements in the output vector */
295
296  /* This time, a result of NOMATCH isn't an error. If the value in "options"
297  is zero, it just means we have found all possible matches, so the loop ends.
298  Otherwise, it means we have failed to find a non-empty-string match at a
299  point where there was a previous empty-string match. In this case, we do what
300  Perl does: advance the matching position by one, and continue. We do this by
301  setting the "end of previous match" offset, because that is picked up at the
302  top of the loop as the point at which to start again. */
303
304  if (rc == PCRE_ERROR_NOMATCH)
305    {
306    if (options == 0) break;
307    ovector[1] = start_offset + 1;
308    continue;    /* Go round the loop again */
309    }
310
311  /* Other matching errors are not recoverable. */
312
313  if (rc &lt; 0)
314    {
315    printf("Matching error %d\n", rc);
316    pcre_free(re);    /* Release memory used for the compiled pattern */
317    return 1;
318    }
319
320  /* Match succeded */
321
322  printf("\nMatch succeeded again at offset %d\n", ovector[0]);
323
324  /* The match succeeded, but the output vector wasn't big enough. */
325
326  if (rc == 0)
327    {
328    rc = OVECCOUNT/3;
329    printf("ovector only has room for %d captured substrings\n", rc - 1);
330    }
331
332  /* As before, show substrings stored in the output vector by number, and then
333  also any named substrings. */
334
335  for (i = 0; i &lt; rc; i++)
336    {
337    char *substring_start = subject + ovector[2*i];
338    int substring_length = ovector[2*i+1] - ovector[2*i];
339    printf("%2d: %.*s\n", i, substring_length, substring_start);
340    }
341
342  if (namecount &lt;= 0) printf("No named substrings\n"); else
343    {
344    unsigned char *tabptr = name_table;
345    printf("Named substrings\n");
346    for (i = 0; i &lt; namecount; i++)
347      {
348      int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
349      printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
350        ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
351      tabptr += name_entry_size;
352      }
353    }
354  }      /* End of loop to find second and subsequent matches */
355
356printf("\n");
357pcre_free(re);       /* Release memory used for the compiled pattern */
358return 0;
359}
360
361/* End of pcredemo.c */
362<p>
363Return to the <a href="index.html">PCRE index page</a>.
364</p>
365