1<html>
2<head>
3<title>pcredemo specification</title>
4</head>
5<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
6<h1>pcredemo man page</h1>
7<p>
8Return to the <a href="index.html">PCRE index page</a>.
9</p>
10<p>
11This page is part of the PCRE HTML documentation. It was generated automatically
12from the original man page. If there is any nonsense in it, please consult the
13man page, in case the conversion went wrong.
14<br>
15<ul>
16</ul>
17<PRE>
18/*************************************************
19*           PCRE DEMONSTRATION PROGRAM           *
20*************************************************/
21
22/* This is a demonstration program to illustrate the most straightforward ways
23of calling the PCRE regular expression library from a C program. See the
24pcresample documentation for a short discussion ("man pcresample" if you have
25the PCRE man pages installed).
26
27In Unix-like environments, if PCRE is installed in your standard system
28libraries, you should be able to compile this program using this command:
29
30gcc -Wall pcredemo.c -lpcre -o pcredemo
31
32If PCRE is not installed in a standard place, it is likely to be installed with
33support for the pkg-config mechanism. If you have pkg-config, you can compile
34this program using this command:
35
36gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
37
38If you do not have pkg-config, you may have to use this:
39
40gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
41  -R/usr/local/lib -lpcre -o pcredemo
42
43Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
44library files for PCRE are installed on your system. Only some operating
45systems (e.g. Solaris) use the -R option.
46
47Building under Windows:
48
49If you want to statically link this program against a non-dll .a file, you must
50define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
51pcre_free() exported functions will be declared __declspec(dllimport), with
52unwanted results. So in this environment, uncomment the following line. */
53
54/* #define PCRE_STATIC */
55
56#include &lt;stdio.h&gt;
57#include &lt;string.h&gt;
58#include &lt;pcre.h&gt;
59
60#define OVECCOUNT 30    /* should be a multiple of 3 */
61
62
63int main(int argc, char **argv)
64{
65pcre *re;
66const char *error;
67char *pattern;
68char *subject;
69unsigned char *name_table;
70unsigned int option_bits;
71int erroffset;
72int find_all;
73int crlf_is_newline;
74int namecount;
75int name_entry_size;
76int ovector[OVECCOUNT];
77int subject_length;
78int rc, i;
79int utf8;
80
81
82/**************************************************************************
83* First, sort out the command line. There is only one possible option at  *
84* the moment, "-g" to request repeated matching to find all occurrences,  *
85* like Perl's /g option. We set the variable find_all to a non-zero value *
86* if the -g option is present. Apart from that, there must be exactly two *
87* arguments.                                                              *
88**************************************************************************/
89
90find_all = 0;
91for (i = 1; i &lt; argc; i++)
92  {
93  if (strcmp(argv[i], "-g") == 0) find_all = 1;
94    else break;
95  }
96
97/* After the options, we require exactly two arguments, which are the pattern,
98and the subject string. */
99
100if (argc - i != 2)
101  {
102  printf("Two arguments required: a regex and a subject string\n");
103  return 1;
104  }
105
106pattern = argv[i];
107subject = argv[i+1];
108subject_length = (int)strlen(subject);
109
110
111/*************************************************************************
112* Now we are going to compile the regular expression pattern, and handle *
113* and errors that are detected.                                          *
114*************************************************************************/
115
116re = pcre_compile(
117  pattern,              /* the pattern */
118  0,                    /* default options */
119  &amp;error,               /* for error message */
120  &amp;erroffset,           /* for error offset */
121  NULL);                /* use default character tables */
122
123/* Compilation failed: print the error message and exit */
124
125if (re == NULL)
126  {
127  printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
128  return 1;
129  }
130
131
132/*************************************************************************
133* If the compilation succeeded, we call PCRE again, in order to do a     *
134* pattern match against the subject string. This does just ONE match. If *
135* further matching is needed, it will be done below.                     *
136*************************************************************************/
137
138rc = pcre_exec(
139  re,                   /* the compiled pattern */
140  NULL,                 /* no extra data - we didn't study the pattern */
141  subject,              /* the subject string */
142  subject_length,       /* the length of the subject */
143  0,                    /* start at offset 0 in the subject */
144  0,                    /* default options */
145  ovector,              /* output vector for substring information */
146  OVECCOUNT);           /* number of elements in the output vector */
147
148/* Matching failed: handle error cases */
149
150if (rc &lt; 0)
151  {
152  switch(rc)
153    {
154    case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
155    /*
156    Handle other special cases if you like
157    */
158    default: printf("Matching error %d\n", rc); break;
159    }
160  pcre_free(re);     /* Release memory used for the compiled pattern */
161  return 1;
162  }
163
164/* Match succeded */
165
166printf("\nMatch succeeded at offset %d\n", ovector[0]);
167
168
169/*************************************************************************
170* We have found the first match within the subject string. If the output *
171* vector wasn't big enough, say so. Then output any substrings that were *
172* captured.                                                              *
173*************************************************************************/
174
175/* The output vector wasn't big enough */
176
177if (rc == 0)
178  {
179  rc = OVECCOUNT/3;
180  printf("ovector only has room for %d captured substrings\n", rc - 1);
181  }
182
183/* Show substrings stored in the output vector by number. Obviously, in a real
184application you might want to do things other than print them. */
185
186for (i = 0; i &lt; rc; i++)
187  {
188  char *substring_start = subject + ovector[2*i];
189  int substring_length = ovector[2*i+1] - ovector[2*i];
190  printf("%2d: %.*s\n", i, substring_length, substring_start);
191  }
192
193
194/**************************************************************************
195* That concludes the basic part of this demonstration program. We have    *
196* compiled a pattern, and performed a single match. The code that follows *
197* shows first how to access named substrings, and then how to code for    *
198* repeated matches on the same subject.                                   *
199**************************************************************************/
200
201/* See if there are any named substrings, and if so, show them by name. First
202we have to extract the count of named parentheses from the pattern. */
203
204(void)pcre_fullinfo(
205  re,                   /* the compiled pattern */
206  NULL,                 /* no extra data - we didn't study the pattern */
207  PCRE_INFO_NAMECOUNT,  /* number of named substrings */
208  &amp;namecount);          /* where to put the answer */
209
210if (namecount &lt;= 0) printf("No named substrings\n"); else
211  {
212  unsigned char *tabptr;
213  printf("Named substrings\n");
214
215  /* Before we can access the substrings, we must extract the table for
216  translating names to numbers, and the size of each entry in the table. */
217
218  (void)pcre_fullinfo(
219    re,                       /* the compiled pattern */
220    NULL,                     /* no extra data - we didn't study the pattern */
221    PCRE_INFO_NAMETABLE,      /* address of the table */
222    &amp;name_table);             /* where to put the answer */
223
224  (void)pcre_fullinfo(
225    re,                       /* the compiled pattern */
226    NULL,                     /* no extra data - we didn't study the pattern */
227    PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
228    &amp;name_entry_size);        /* where to put the answer */
229
230  /* Now we can scan the table and, for each entry, print the number, the name,
231  and the substring itself. */
232
233  tabptr = name_table;
234  for (i = 0; i &lt; namecount; i++)
235    {
236    int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
237    printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
238      ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
239    tabptr += name_entry_size;
240    }
241  }
242
243
244/*************************************************************************
245* If the "-g" option was given on the command line, we want to continue  *
246* to search for additional matches in the subject string, in a similar   *
247* way to the /g option in Perl. This turns out to be trickier than you   *
248* might think because of the possibility of matching an empty string.    *
249* What happens is as follows:                                            *
250*                                                                        *
251* If the previous match was NOT for an empty string, we can just start   *
252* the next match at the end of the previous one.                         *
253*                                                                        *
254* If the previous match WAS for an empty string, we can't do that, as it *
255* would lead to an infinite loop. Instead, a special call of pcre_exec() *
256* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set.    *
257* The first of these tells PCRE that an empty string at the start of the *
258* subject is not a valid match; other possibilities must be tried. The   *
259* second flag restricts PCRE to one match attempt at the initial string  *
260* position. If this match succeeds, an alternative to the empty string   *
261* match has been found, and we can print it and proceed round the loop,  *
262* advancing by the length of whatever was found. If this match does not  *
263* succeed, we still stay in the loop, advancing by just one character.   *
264* In UTF-8 mode, which can be set by (*UTF8) in the pattern, this may be *
265* more than one byte.                                                    *
266*                                                                        *
267* However, there is a complication concerned with newlines. When the     *
268* newline convention is such that CRLF is a valid newline, we must       *
269* advance by two characters rather than one. The newline convention can  *
270* be set in the regex by (*CR), etc.; if not, we must find the default.  *
271*************************************************************************/
272
273if (!find_all)     /* Check for -g */
274  {
275  pcre_free(re);   /* Release the memory used for the compiled pattern */
276  return 0;        /* Finish unless -g was given */
277  }
278
279/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
280sequence. First, find the options with which the regex was compiled; extract
281the UTF-8 state, and mask off all but the newline options. */
282
283(void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &amp;option_bits);
284utf8 = option_bits &amp; PCRE_UTF8;
285option_bits &amp;= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_CRLF|
286               PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF;
287
288/* If no newline options were set, find the default newline convention from the
289build configuration. */
290
291if (option_bits == 0)
292  {
293  int d;
294  (void)pcre_config(PCRE_CONFIG_NEWLINE, &amp;d);
295  /* Note that these values are always the ASCII ones, even in
296  EBCDIC environments. CR = 13, NL = 10. */
297  option_bits = (d == 13)? PCRE_NEWLINE_CR :
298          (d == 10)? PCRE_NEWLINE_LF :
299          (d == (13&lt;&lt;8 | 10))? PCRE_NEWLINE_CRLF :
300          (d == -2)? PCRE_NEWLINE_ANYCRLF :
301          (d == -1)? PCRE_NEWLINE_ANY : 0;
302  }
303
304/* See if CRLF is a valid newline sequence. */
305
306crlf_is_newline =
307     option_bits == PCRE_NEWLINE_ANY ||
308     option_bits == PCRE_NEWLINE_CRLF ||
309     option_bits == PCRE_NEWLINE_ANYCRLF;
310
311/* Loop for second and subsequent matches */
312
313for (;;)
314  {
315  int options = 0;                 /* Normally no options */
316  int start_offset = ovector[1];   /* Start at end of previous match */
317
318  /* If the previous match was for an empty string, we are finished if we are
319  at the end of the subject. Otherwise, arrange to run another match at the
320  same point to see if a non-empty match can be found. */
321
322  if (ovector[0] == ovector[1])
323    {
324    if (ovector[0] == subject_length) break;
325    options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
326    }
327
328  /* Run the next matching operation */
329
330  rc = pcre_exec(
331    re,                   /* the compiled pattern */
332    NULL,                 /* no extra data - we didn't study the pattern */
333    subject,              /* the subject string */
334    subject_length,       /* the length of the subject */
335    start_offset,         /* starting offset in the subject */
336    options,              /* options */
337    ovector,              /* output vector for substring information */
338    OVECCOUNT);           /* number of elements in the output vector */
339
340  /* This time, a result of NOMATCH isn't an error. If the value in "options"
341  is zero, it just means we have found all possible matches, so the loop ends.
342  Otherwise, it means we have failed to find a non-empty-string match at a
343  point where there was a previous empty-string match. In this case, we do what
344  Perl does: advance the matching position by one character, and continue. We
345  do this by setting the "end of previous match" offset, because that is picked
346  up at the top of the loop as the point at which to start again.
347
348  There are two complications: (a) When CRLF is a valid newline sequence, and
349  the current position is just before it, advance by an extra byte. (b)
350  Otherwise we must ensure that we skip an entire UTF-8 character if we are in
351  UTF-8 mode. */
352
353  if (rc == PCRE_ERROR_NOMATCH)
354    {
355    if (options == 0) break;                    /* All matches found */
356    ovector[1] = start_offset + 1;              /* Advance one byte */
357    if (crlf_is_newline &amp;&amp;                      /* If CRLF is newline &amp; */
358        start_offset &lt; subject_length - 1 &amp;&amp;    /* we are at CRLF, */
359        subject[start_offset] == '\r' &amp;&amp;
360        subject[start_offset + 1] == '\n')
361      ovector[1] += 1;                          /* Advance by one more. */
362    else if (utf8)                              /* Otherwise, ensure we */
363      {                                         /* advance a whole UTF-8 */
364      while (ovector[1] &lt; subject_length)       /* character. */
365        {
366        if ((subject[ovector[1]] &amp; 0xc0) != 0x80) break;
367        ovector[1] += 1;
368        }
369      }
370    continue;    /* Go round the loop again */
371    }
372
373  /* Other matching errors are not recoverable. */
374
375  if (rc &lt; 0)
376    {
377    printf("Matching error %d\n", rc);
378    pcre_free(re);    /* Release memory used for the compiled pattern */
379    return 1;
380    }
381
382  /* Match succeded */
383
384  printf("\nMatch succeeded again at offset %d\n", ovector[0]);
385
386  /* The match succeeded, but the output vector wasn't big enough. */
387
388  if (rc == 0)
389    {
390    rc = OVECCOUNT/3;
391    printf("ovector only has room for %d captured substrings\n", rc - 1);
392    }
393
394  /* As before, show substrings stored in the output vector by number, and then
395  also any named substrings. */
396
397  for (i = 0; i &lt; rc; i++)
398    {
399    char *substring_start = subject + ovector[2*i];
400    int substring_length = ovector[2*i+1] - ovector[2*i];
401    printf("%2d: %.*s\n", i, substring_length, substring_start);
402    }
403
404  if (namecount &lt;= 0) printf("No named substrings\n"); else
405    {
406    unsigned char *tabptr = name_table;
407    printf("Named substrings\n");
408    for (i = 0; i &lt; namecount; i++)
409      {
410      int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
411      printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
412        ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
413      tabptr += name_entry_size;
414      }
415    }
416  }      /* End of loop to find second and subsequent matches */
417
418printf("\n");
419pcre_free(re);       /* Release memory used for the compiled pattern */
420return 0;
421}
422
423/* End of pcredemo.c */
424<p>
425Return to the <a href="index.html">PCRE index page</a>.
426</p>
427