1/************************************************* 2* PCRE DEMONSTRATION PROGRAM * 3*************************************************/ 4 5/* This is a demonstration program to illustrate the most straightforward ways 6of calling the PCRE regular expression library from a C program. See the 7pcresample documentation for a short discussion ("man pcresample" if you have 8the PCRE man pages installed). 9 10In Unix-like environments, if PCRE is installed in your standard system 11libraries, you should be able to compile this program using this command: 12 13gcc -Wall pcredemo.c -lpcre -o pcredemo 14 15If PCRE is not installed in a standard place, it is likely to be installed with 16support for the pkg-config mechanism. If you have pkg-config, you can compile 17this program using this command: 18 19gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo 20 21If you do not have pkg-config, you may have to use this: 22 23gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \ 24 -R/usr/local/lib -lpcre -o pcredemo 25 26Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and 27library files for PCRE are installed on your system. Only some operating 28systems (e.g. Solaris) use the -R option. 29 30Building under Windows: 31 32If you want to statically link this program against a non-dll .a file, you must 33define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and 34pcre_free() exported functions will be declared __declspec(dllimport), with 35unwanted results. So in this environment, uncomment the following line. */ 36 37/* #define PCRE_STATIC */ 38 39#include <stdio.h> 40#include <string.h> 41#include <pcre.h> 42 43#define OVECCOUNT 30 /* should be a multiple of 3 */ 44 45 46int main(int argc, char **argv) 47{ 48pcre *re; 49const char *error; 50char *pattern; 51char *subject; 52unsigned char *name_table; 53int erroffset; 54int find_all; 55int namecount; 56int name_entry_size; 57int ovector[OVECCOUNT]; 58int subject_length; 59int rc, i; 60 61 62/************************************************************************** 63* First, sort out the command line. There is only one possible option at * 64* the moment, "-g" to request repeated matching to find all occurrences, * 65* like Perl's /g option. We set the variable find_all to a non-zero value * 66* if the -g option is present. Apart from that, there must be exactly two * 67* arguments. * 68**************************************************************************/ 69 70find_all = 0; 71for (i = 1; i < argc; i++) 72 { 73 if (strcmp(argv[i], "-g") == 0) find_all = 1; 74 else break; 75 } 76 77/* After the options, we require exactly two arguments, which are the pattern, 78and the subject string. */ 79 80if (argc - i != 2) 81 { 82 printf("Two arguments required: a regex and a subject string\n"); 83 return 1; 84 } 85 86pattern = argv[i]; 87subject = argv[i+1]; 88subject_length = (int)strlen(subject); 89 90 91/************************************************************************* 92* Now we are going to compile the regular expression pattern, and handle * 93* and errors that are detected. * 94*************************************************************************/ 95 96re = pcre_compile( 97 pattern, /* the pattern */ 98 0, /* default options */ 99 &error, /* for error message */ 100 &erroffset, /* for error offset */ 101 NULL); /* use default character tables */ 102 103/* Compilation failed: print the error message and exit */ 104 105if (re == NULL) 106 { 107 printf("PCRE compilation failed at offset %d: %s\n", erroffset, error); 108 return 1; 109 } 110 111 112/************************************************************************* 113* If the compilation succeeded, we call PCRE again, in order to do a * 114* pattern match against the subject string. This does just ONE match. If * 115* further matching is needed, it will be done below. * 116*************************************************************************/ 117 118rc = pcre_exec( 119 re, /* the compiled pattern */ 120 NULL, /* no extra data - we didn't study the pattern */ 121 subject, /* the subject string */ 122 subject_length, /* the length of the subject */ 123 0, /* start at offset 0 in the subject */ 124 0, /* default options */ 125 ovector, /* output vector for substring information */ 126 OVECCOUNT); /* number of elements in the output vector */ 127 128/* Matching failed: handle error cases */ 129 130if (rc < 0) 131 { 132 switch(rc) 133 { 134 case PCRE_ERROR_NOMATCH: printf("No match\n"); break; 135 /* 136 Handle other special cases if you like 137 */ 138 default: printf("Matching error %d\n", rc); break; 139 } 140 pcre_free(re); /* Release memory used for the compiled pattern */ 141 return 1; 142 } 143 144/* Match succeded */ 145 146printf("\nMatch succeeded at offset %d\n", ovector[0]); 147 148 149/************************************************************************* 150* We have found the first match within the subject string. If the output * 151* vector wasn't big enough, say so. Then output any substrings that were * 152* captured. * 153*************************************************************************/ 154 155/* The output vector wasn't big enough */ 156 157if (rc == 0) 158 { 159 rc = OVECCOUNT/3; 160 printf("ovector only has room for %d captured substrings\n", rc - 1); 161 } 162 163/* Show substrings stored in the output vector by number. Obviously, in a real 164application you might want to do things other than print them. */ 165 166for (i = 0; i < rc; i++) 167 { 168 char *substring_start = subject + ovector[2*i]; 169 int substring_length = ovector[2*i+1] - ovector[2*i]; 170 printf("%2d: %.*s\n", i, substring_length, substring_start); 171 } 172 173 174/************************************************************************** 175* That concludes the basic part of this demonstration program. We have * 176* compiled a pattern, and performed a single match. The code that follows * 177* shows first how to access named substrings, and then how to code for * 178* repeated matches on the same subject. * 179**************************************************************************/ 180 181/* See if there are any named substrings, and if so, show them by name. First 182we have to extract the count of named parentheses from the pattern. */ 183 184(void)pcre_fullinfo( 185 re, /* the compiled pattern */ 186 NULL, /* no extra data - we didn't study the pattern */ 187 PCRE_INFO_NAMECOUNT, /* number of named substrings */ 188 &namecount); /* where to put the answer */ 189 190if (namecount <= 0) printf("No named substrings\n"); else 191 { 192 unsigned char *tabptr; 193 printf("Named substrings\n"); 194 195 /* Before we can access the substrings, we must extract the table for 196 translating names to numbers, and the size of each entry in the table. */ 197 198 (void)pcre_fullinfo( 199 re, /* the compiled pattern */ 200 NULL, /* no extra data - we didn't study the pattern */ 201 PCRE_INFO_NAMETABLE, /* address of the table */ 202 &name_table); /* where to put the answer */ 203 204 (void)pcre_fullinfo( 205 re, /* the compiled pattern */ 206 NULL, /* no extra data - we didn't study the pattern */ 207 PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */ 208 &name_entry_size); /* where to put the answer */ 209 210 /* Now we can scan the table and, for each entry, print the number, the name, 211 and the substring itself. */ 212 213 tabptr = name_table; 214 for (i = 0; i < namecount; i++) 215 { 216 int n = (tabptr[0] << 8) | tabptr[1]; 217 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2, 218 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]); 219 tabptr += name_entry_size; 220 } 221 } 222 223 224/************************************************************************* 225* If the "-g" option was given on the command line, we want to continue * 226* to search for additional matches in the subject string, in a similar * 227* way to the /g option in Perl. This turns out to be trickier than you * 228* might think because of the possibility of matching an empty string. * 229* What happens is as follows: * 230* * 231* If the previous match was NOT for an empty string, we can just start * 232* the next match at the end of the previous one. * 233* * 234* If the previous match WAS for an empty string, we can't do that, as it * 235* would lead to an infinite loop. Instead, a special call of pcre_exec() * 236* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. * 237* The first of these tells PCRE that an empty string at the start of the * 238* subject is not a valid match; other possibilities must be tried. The * 239* second flag restricts PCRE to one match attempt at the initial string * 240* position. If this match succeeds, an alternative to the empty string * 241* match has been found, and we can proceed round the loop. * 242*************************************************************************/ 243 244if (!find_all) 245 { 246 pcre_free(re); /* Release the memory used for the compiled pattern */ 247 return 0; /* Finish unless -g was given */ 248 } 249 250/* Loop for second and subsequent matches */ 251 252for (;;) 253 { 254 int options = 0; /* Normally no options */ 255 int start_offset = ovector[1]; /* Start at end of previous match */ 256 257 /* If the previous match was for an empty string, we are finished if we are 258 at the end of the subject. Otherwise, arrange to run another match at the 259 same point to see if a non-empty match can be found. */ 260 261 if (ovector[0] == ovector[1]) 262 { 263 if (ovector[0] == subject_length) break; 264 options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED; 265 } 266 267 /* Run the next matching operation */ 268 269 rc = pcre_exec( 270 re, /* the compiled pattern */ 271 NULL, /* no extra data - we didn't study the pattern */ 272 subject, /* the subject string */ 273 subject_length, /* the length of the subject */ 274 start_offset, /* starting offset in the subject */ 275 options, /* options */ 276 ovector, /* output vector for substring information */ 277 OVECCOUNT); /* number of elements in the output vector */ 278 279 /* This time, a result of NOMATCH isn't an error. If the value in "options" 280 is zero, it just means we have found all possible matches, so the loop ends. 281 Otherwise, it means we have failed to find a non-empty-string match at a 282 point where there was a previous empty-string match. In this case, we do what 283 Perl does: advance the matching position by one, and continue. We do this by 284 setting the "end of previous match" offset, because that is picked up at the 285 top of the loop as the point at which to start again. */ 286 287 if (rc == PCRE_ERROR_NOMATCH) 288 { 289 if (options == 0) break; 290 ovector[1] = start_offset + 1; 291 continue; /* Go round the loop again */ 292 } 293 294 /* Other matching errors are not recoverable. */ 295 296 if (rc < 0) 297 { 298 printf("Matching error %d\n", rc); 299 pcre_free(re); /* Release memory used for the compiled pattern */ 300 return 1; 301 } 302 303 /* Match succeded */ 304 305 printf("\nMatch succeeded again at offset %d\n", ovector[0]); 306 307 /* The match succeeded, but the output vector wasn't big enough. */ 308 309 if (rc == 0) 310 { 311 rc = OVECCOUNT/3; 312 printf("ovector only has room for %d captured substrings\n", rc - 1); 313 } 314 315 /* As before, show substrings stored in the output vector by number, and then 316 also any named substrings. */ 317 318 for (i = 0; i < rc; i++) 319 { 320 char *substring_start = subject + ovector[2*i]; 321 int substring_length = ovector[2*i+1] - ovector[2*i]; 322 printf("%2d: %.*s\n", i, substring_length, substring_start); 323 } 324 325 if (namecount <= 0) printf("No named substrings\n"); else 326 { 327 unsigned char *tabptr = name_table; 328 printf("Named substrings\n"); 329 for (i = 0; i < namecount; i++) 330 { 331 int n = (tabptr[0] << 8) | tabptr[1]; 332 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2, 333 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]); 334 tabptr += name_entry_size; 335 } 336 } 337 } /* End of loop to find second and subsequent matches */ 338 339printf("\n"); 340pcre_free(re); /* Release memory used for the compiled pattern */ 341return 0; 342} 343 344/* End of pcredemo.c */ 345