1/* 2 * pcre.c - interface to the PCRE library 3 * 4 * This file is part of zsh, the Z shell. 5 * 6 * Copyright (c) 2001, 2002, 2003, 2004, 2007 Clint Adams 7 * All rights reserved. 8 * 9 * Permission is hereby granted, without written agreement and without 10 * license or royalty fees, to use, copy, modify, and distribute this 11 * software and to distribute modified versions of this software for any 12 * purpose, provided that the above copyright notice and the following 13 * two paragraphs appear in all copies of this software. 14 * 15 * In no event shall Clint Adams or the Zsh Development Group be liable 16 * to any party for direct, indirect, special, incidental, or consequential 17 * damages arising out of the use of this software and its documentation, 18 * even if Andrew Main and the Zsh Development Group have been advised of 19 * the possibility of such damage. 20 * 21 * Clint Adams and the Zsh Development Group specifically disclaim any 22 * warranties, including, but not limited to, the implied warranties of 23 * merchantability and fitness for a particular purpose. The software 24 * provided hereunder is on an "as is" basis, and Andrew Main and the 25 * Zsh Development Group have no obligation to provide maintenance, 26 * support, updates, enhancements, or modifications. 27 * 28 */ 29 30 31#include "pcre.mdh" 32#include "pcre.pro" 33 34#define CPCRE_PLAIN 0 35 36/**/ 37#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC) 38#include <pcre.h> 39 40static pcre *pcre_pattern; 41static pcre_extra *pcre_hints; 42 43/**/ 44static int 45zpcre_utf8_enabled(void) 46{ 47#if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET) 48 static int have_utf8_pcre = -1; 49 50 /* value can toggle based on MULTIBYTE, so don't 51 * be too eager with caching */ 52 if (have_utf8_pcre < -1) 53 return 0; 54 55 if (!isset(MULTIBYTE)) 56 return 0; 57 58 if ((have_utf8_pcre == -1) && 59 (!strcmp(nl_langinfo(CODESET), "UTF-8"))) { 60 61 if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre)) 62 have_utf8_pcre = -2; /* erk, failed to ask */ 63 } 64 65 if (have_utf8_pcre < 0) 66 return 0; 67 return have_utf8_pcre; 68 69#else 70 return 0; 71#endif 72} 73 74/**/ 75static int 76bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func)) 77{ 78 int pcre_opts = 0, pcre_errptr; 79 const char *pcre_error; 80 char *target; 81 82 if(OPT_ISSET(ops,'a')) pcre_opts |= PCRE_ANCHORED; 83 if(OPT_ISSET(ops,'i')) pcre_opts |= PCRE_CASELESS; 84 if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE; 85 if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED; 86 if(OPT_ISSET(ops,'s')) pcre_opts |= PCRE_DOTALL; 87 88 if (zpcre_utf8_enabled()) 89 pcre_opts |= PCRE_UTF8; 90 91 pcre_hints = NULL; /* Is this necessary? */ 92 93 if (pcre_pattern) 94 pcre_free(pcre_pattern); 95 96 target = ztrdup(*args); 97 unmetafy(target, NULL); 98 99 pcre_pattern = pcre_compile(target, pcre_opts, &pcre_error, &pcre_errptr, NULL); 100 101 free(target); 102 103 if (pcre_pattern == NULL) 104 { 105 zwarnnam(nam, "error in regex: %s", pcre_error); 106 return 1; 107 } 108 109 return 0; 110} 111 112/**/ 113#ifdef HAVE_PCRE_STUDY 114 115/**/ 116static int 117bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int func)) 118{ 119 const char *pcre_error; 120 121 if (pcre_pattern == NULL) 122 { 123 zwarnnam(nam, "no pattern has been compiled for study"); 124 return 1; 125 } 126 127 pcre_hints = pcre_study(pcre_pattern, 0, &pcre_error); 128 if (pcre_error != NULL) 129 { 130 zwarnnam(nam, "error while studying regex: %s", pcre_error); 131 return 1; 132 } 133 134 return 0; 135} 136 137/**/ 138#else /* !HAVE_PCRE_STUDY */ 139 140# define bin_pcre_study bin_notavail 141 142/**/ 143#endif /* !HAVE_PCRE_STUDY */ 144 145/**/ 146static int 147zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, 148 char *substravar, int want_offset_pair, int matchedinarr, 149 int want_begin_end) 150{ 151 char **captures, *match_all, **matches; 152 char offset_all[50]; 153 int capture_start = 1; 154 155 if (matchedinarr) 156 capture_start = 0; 157 if (matchvar == NULL) 158 matchvar = "MATCH"; 159 if (substravar == NULL) 160 substravar = "match"; 161 162 /* captures[0] will be entire matched string, [1] first substring */ 163 if (!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) { 164 int nelem = arrlen(captures)-1; 165 /* Set to the offsets of the complete match */ 166 if (want_offset_pair) { 167 sprintf(offset_all, "%d %d", ovec[0], ovec[1]); 168 setsparam("ZPCRE_OP", ztrdup(offset_all)); 169 } 170 match_all = metafy(captures[0], -1, META_DUP); 171 setsparam(matchvar, match_all); 172 /* 173 * If we're setting match, mbegin, mend we only do 174 * so if there were parenthesised matches, for consistency 175 * (c.f. regex.c). 176 */ 177 if (!want_begin_end || nelem) { 178 char **x, **y; 179 y = &captures[capture_start]; 180 matches = x = (char **) zalloc(sizeof(char *) * (arrlen(y) + 1)); 181 do { 182 if (*y) 183 *x++ = metafy(*y, -1, META_DUP); 184 else 185 *x++ = NULL; 186 } while (*y++); 187 setaparam(substravar, matches); 188 } 189 190 if (want_begin_end) { 191 char *ptr = arg; 192 zlong offs = 0; 193 194 /* Count the characters before the match */ 195 MB_METACHARINIT(); 196 while (ptr < arg + ovec[0]) { 197 offs++; 198 ptr += MB_METACHARLEN(ptr); 199 } 200 setiparam("MBEGIN", offs + !isset(KSHARRAYS)); 201 /* Add on the characters in the match */ 202 while (ptr < arg + ovec[1]) { 203 offs++; 204 ptr += MB_METACHARLEN(ptr); 205 } 206 setiparam("MEND", offs + !isset(KSHARRAYS) - 1); 207 if (nelem) { 208 char **mbegin, **mend, **bptr, **eptr; 209 int i, *ipair; 210 211 bptr = mbegin = zalloc(sizeof(char*)*(nelem+1)); 212 eptr = mend = zalloc(sizeof(char*)*(nelem+1)); 213 214 for (ipair = ovec + 2, i = 0; 215 i < nelem; 216 ipair += 2, i++, bptr++, eptr++) 217 { 218 char buf[DIGBUFSIZE]; 219 ptr = arg; 220 offs = 0; 221 /* Find the start offset */ 222 MB_METACHARINIT(); 223 while (ptr < arg + ipair[0]) { 224 offs++; 225 ptr += MB_METACHARLEN(ptr); 226 } 227 convbase(buf, offs + !isset(KSHARRAYS), 10); 228 *bptr = ztrdup(buf); 229 /* Continue to the end offset */ 230 while (ptr < arg + ipair[1]) { 231 offs++; 232 ptr += MB_METACHARLEN(ptr); 233 } 234 convbase(buf, offs + !isset(KSHARRAYS) - 1, 10); 235 *eptr = ztrdup(buf); 236 } 237 *bptr = *eptr = NULL; 238 239 setaparam("mbegin", mbegin); 240 setaparam("mend", mend); 241 } 242 } 243 244 pcre_free_substring_list((const char **)captures); 245 } 246 247 return 0; 248} 249 250/**/ 251static int 252getposint(char *instr, char *nam) 253{ 254 char *eptr; 255 int ret; 256 257 ret = (int)zstrtol(instr, &eptr, 10); 258 if (*eptr || ret < 0) { 259 zwarnnam(nam, "integer expected: %s", instr); 260 return -1; 261 } 262 263 return ret; 264} 265 266/**/ 267static int 268bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) 269{ 270 int ret, capcount, *ovec, ovecsize, c; 271 char *matched_portion = NULL; 272 char *plaintext = NULL; 273 char *receptacle = NULL; 274 int return_value = 1; 275 /* The subject length and offset start are both int values in pcre_exec */ 276 int subject_len; 277 int offset_start = 0; 278 int want_offset_pair = 0; 279 280 if (pcre_pattern == NULL) { 281 zwarnnam(nam, "no pattern has been compiled"); 282 return 1; 283 } 284 285 if(OPT_HASARG(ops,c='a')) { 286 receptacle = OPT_ARG(ops,c); 287 } 288 if(OPT_HASARG(ops,c='v')) { 289 matched_portion = OPT_ARG(ops,c); 290 } 291 if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */ 292 offset_start = getposint(OPT_ARG(ops,c), nam); 293 } 294 /* For the entire match, 'Return' the offset byte positions instead of the matched string */ 295 if(OPT_ISSET(ops,'b')) want_offset_pair = 1; 296 297 if(!*args) { 298 zwarnnam(nam, "not enough arguments"); 299 } 300 301 if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount))) 302 { 303 zwarnnam(nam, "error %d in fullinfo", ret); 304 return 1; 305 } 306 307 ovecsize = (capcount+1)*3; 308 ovec = zalloc(ovecsize*sizeof(int)); 309 310 plaintext = ztrdup(*args); 311 unmetafy(plaintext, NULL); 312 subject_len = (int)strlen(plaintext); 313 314 if (offset_start < 0 || offset_start >= subject_len) 315 ret = PCRE_ERROR_NOMATCH; 316 else 317 ret = pcre_exec(pcre_pattern, pcre_hints, plaintext, subject_len, offset_start, 0, ovec, ovecsize); 318 319 if (ret==0) return_value = 0; 320 else if (ret==PCRE_ERROR_NOMATCH) /* no match */; 321 else if (ret>0) { 322 zpcre_get_substrings(plaintext, ovec, ret, matched_portion, receptacle, 323 want_offset_pair, 0, 0); 324 return_value = 0; 325 } 326 else { 327 zwarnnam(nam, "error in pcre_exec [%d]", ret); 328 } 329 330 if (ovec) 331 zfree(ovec, ovecsize*sizeof(int)); 332 333 return return_value; 334} 335 336/**/ 337static int 338cond_pcre_match(char **a, int id) 339{ 340 pcre *pcre_pat; 341 const char *pcre_err; 342 char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar=NULL; 343 int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize; 344 int return_value = 0; 345 346 if (zpcre_utf8_enabled()) 347 pcre_opts |= PCRE_UTF8; 348 349 lhstr = cond_str(a,0,0); 350 rhre = cond_str(a,1,0); 351 lhstr_plain = ztrdup(lhstr); 352 rhre_plain = ztrdup(rhre); 353 unmetafy(lhstr_plain, NULL); 354 unmetafy(rhre_plain, NULL); 355 pcre_pat = NULL; 356 ov = NULL; 357 ovsize = 0; 358 359 if (isset(BASHREMATCH)) 360 avar="BASH_REMATCH"; 361 362 switch(id) { 363 case CPCRE_PLAIN: 364 pcre_pat = pcre_compile(rhre_plain, pcre_opts, &pcre_err, &pcre_errptr, NULL); 365 if (pcre_pat == NULL) { 366 zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err); 367 break; 368 } 369 pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt); 370 ovsize = (capcnt+1)*3; 371 ov = zalloc(ovsize*sizeof(int)); 372 r = pcre_exec(pcre_pat, NULL, lhstr_plain, strlen(lhstr_plain), 0, 0, ov, ovsize); 373 /* r < 0 => error; r==0 match but not enough size in ov 374 * r > 0 => (r-1) substrings found; r==1 => no substrings 375 */ 376 if (r==0) { 377 zwarn("reportable zsh problem: pcre_exec() returned 0"); 378 return_value = 1; 379 break; 380 } 381 else if (r==PCRE_ERROR_NOMATCH) { 382 return_value = 0; /* no match */ 383 break; 384 } 385 else if (r<0) { 386 zwarn("pcre_exec() error [%d]", r); 387 break; 388 } 389 else if (r>0) { 390 zpcre_get_substrings(lhstr_plain, ov, r, NULL, avar, 0, 391 isset(BASHREMATCH), 392 !isset(BASHREMATCH)); 393 return_value = 1; 394 break; 395 } 396 break; 397 } 398 399 if (lhstr_plain) 400 free(lhstr_plain); 401 if(rhre_plain) 402 free(rhre_plain); 403 if (pcre_pat) 404 pcre_free(pcre_pat); 405 if (ov) 406 zfree(ov, ovsize*sizeof(int)); 407 408 return return_value; 409} 410 411static struct conddef cotab[] = { 412 CONDDEF("pcre-match", CONDF_INFIX, cond_pcre_match, 0, 0, CPCRE_PLAIN) 413 /* CONDDEF can register =~ but it won't be found */ 414}; 415 416/**/ 417#else /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */ 418 419# define bin_pcre_compile bin_notavail 420# define bin_pcre_study bin_notavail 421# define bin_pcre_match bin_notavail 422 423/**/ 424#endif /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */ 425 426static struct builtin bintab[] = { 427 BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs", NULL), 428 BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "a:v:n:b", NULL), 429 BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL) 430}; 431 432 433static struct features module_features = { 434 bintab, sizeof(bintab)/sizeof(*bintab), 435#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC) 436 cotab, sizeof(cotab)/sizeof(*cotab), 437#else /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */ 438 NULL, 0, 439#endif /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */ 440 NULL, 0, 441 NULL, 0, 442 0 443}; 444 445 446/**/ 447int 448setup_(UNUSED(Module m)) 449{ 450 return 0; 451} 452 453/**/ 454int 455features_(Module m, char ***features) 456{ 457 *features = featuresarray(m, &module_features); 458 return 0; 459} 460 461/**/ 462int 463enables_(Module m, int **enables) 464{ 465 return handlefeatures(m, &module_features, enables); 466} 467 468/**/ 469int 470boot_(Module m) 471{ 472 return 0; 473} 474 475/**/ 476int 477cleanup_(Module m) 478{ 479 return setfeatureenables(m, &module_features, NULL); 480} 481 482/**/ 483int 484finish_(UNUSED(Module m)) 485{ 486 return 0; 487} 488