1/* 2 * regex.c 3 * 4 * This file is part of zsh, the Z shell. 5 * 6 * Copyright (c) 2007,2012 Phil Pennock 7 * All Rights Reserved. 8 * 9 * Permission is hereby granted, without written agreement and without 10 * license or royalty fees, to use, copy, modify, and distribute this 11 * software and to distribute modified versions of this software for any 12 * purpose, provided that the above copyright notice and the following 13 * two paragraphs appear in all copies of this software. 14 * 15 * In no event shall Phil Pennock or the Zsh Development Group be liable 16 * to any party for direct, indirect, special, incidental, or consequential 17 * damages arising out of the use of this software and its documentation, 18 * even if Phil Pennock and the Zsh Development Group have been advised of 19 * the possibility of such damage. 20 * 21 * Phil Pennock and the Zsh Development Group specifically disclaim any 22 * warranties, including, but not limited to, the implied warranties of 23 * merchantability and fitness for a particular purpose. The software 24 * provided hereunder is on an "as is" basis, and Phil Pennock and the 25 * Zsh Development Group have no obligation to provide maintenance, 26 * support, updates, enhancements, or modifications. 27 * 28 */ 29 30#include "regex.mdh" 31#include "regex.pro" 32 33#include <regex.h> 34 35/* we default to a vaguely modern syntax and set of capabilities */ 36#define ZREGEX_EXTENDED 0 37/* if you want Basic syntax, make it an alternative options */ 38 39static void 40zregex_regerrwarn(int r, regex_t *re, char *msg) 41{ 42 char *errbuf; 43 size_t errbufsz; 44 45 errbufsz = regerror(r, re, NULL, 0); 46 errbuf = zalloc(errbufsz*sizeof(char)); 47 regerror(r, re, errbuf, errbufsz); 48 zwarn("%s: %s", msg, errbuf); 49 zfree(errbuf, errbufsz); 50} 51 52/**/ 53static int 54zcond_regex_match(char **a, int id) 55{ 56 regex_t re; 57 regmatch_t *m, *matches = NULL; 58 size_t matchessz = 0; 59 char *lhstr, *lhstr_zshmeta, *rhre, *rhre_zshmeta, *s, **arr, **x; 60 int r, n, return_value, rcflags, reflags, nelem, start; 61 62 lhstr_zshmeta = cond_str(a,0,0); 63 rhre_zshmeta = cond_str(a,1,0); 64 rcflags = reflags = 0; 65 return_value = 0; /* 1 => matched successfully */ 66 67 lhstr = ztrdup(lhstr_zshmeta); 68 unmetafy(lhstr, NULL); 69 rhre = ztrdup(rhre_zshmeta); 70 unmetafy(rhre, NULL); 71 72 switch(id) { 73 case ZREGEX_EXTENDED: 74 rcflags |= REG_EXTENDED; 75 if (!isset(CASEMATCH)) 76 rcflags |= REG_ICASE; 77 r = regcomp(&re, rhre, rcflags); 78 if (r) { 79 zregex_regerrwarn(r, &re, "failed to compile regex"); 80 break; 81 } 82 /* re.re_nsub is number of parenthesized groups, we also need 83 * 1 for the 0 offset, which is the entire matched portion 84 */ 85 if ((int)re.re_nsub < 0) { 86 zwarn("INTERNAL ERROR: regcomp() returned " 87 "negative subpattern count %d", (int)re.re_nsub); 88 break; 89 } 90 matchessz = (re.re_nsub + 1) * sizeof(regmatch_t); 91 matches = zalloc(matchessz); 92 r = regexec(&re, lhstr, re.re_nsub+1, matches, reflags); 93 if (r == REG_NOMATCH) 94 ; /* We do nothing when we fail to match. */ 95 else if (r == 0) { 96 return_value = 1; 97 if (isset(BASHREMATCH)) { 98 start = 0; 99 nelem = re.re_nsub + 1; 100 } else { 101 start = 1; 102 nelem = re.re_nsub; 103 } 104 arr = NULL; /* bogus gcc warning of used uninitialised */ 105 /* entire matched portion + re_nsub substrings + NULL */ 106 if (nelem) { 107 arr = x = (char **) zalloc(sizeof(char *) * (nelem + 1)); 108 for (m = matches + start, n = start; n <= (int)re.re_nsub; ++n, ++m, ++x) { 109 *x = metafy(lhstr + m->rm_so, m->rm_eo - m->rm_so, META_DUP); 110 } 111 *x = NULL; 112 } 113 if (isset(BASHREMATCH)) { 114 setaparam("BASH_REMATCH", arr); 115 } else { 116 zlong offs; 117 char *ptr; 118 119 m = matches; 120 s = metafy(lhstr + m->rm_so, m->rm_eo - m->rm_so, META_DUP); 121 setsparam("MATCH", s); 122 /* 123 * Count the characters before the match. 124 */ 125 ptr = lhstr; 126 offs = 0; 127 MB_METACHARINIT(); 128 while (ptr < lhstr + m->rm_so) { 129 offs++; 130 ptr += MB_METACHARLEN(ptr); 131 } 132 setiparam("MBEGIN", offs + !isset(KSHARRAYS)); 133 /* 134 * Add on the characters in the match. 135 */ 136 while (ptr < lhstr + m->rm_eo) { 137 offs++; 138 ptr += MB_METACHARLEN(ptr); 139 } 140 setiparam("MEND", offs + !isset(KSHARRAYS) - 1); 141 if (nelem) { 142 char **mbegin, **mend, **bptr, **eptr; 143 bptr = mbegin = (char **)zalloc(sizeof(char *)*(nelem+1)); 144 eptr = mend = (char **)zalloc(sizeof(char *)*(nelem+1)); 145 146 for (m = matches + start, n = 0; 147 n < nelem; 148 ++n, ++m, ++bptr, ++eptr) 149 { 150 char buf[DIGBUFSIZE]; 151 ptr = lhstr; 152 offs = 0; 153 /* Find the start offset */ 154 MB_METACHARINIT(); 155 while (ptr < lhstr + m->rm_so) { 156 offs++; 157 ptr += MB_METACHARLEN(ptr); 158 } 159 convbase(buf, offs + !isset(KSHARRAYS), 10); 160 *bptr = ztrdup(buf); 161 /* Continue to the end offset */ 162 while (ptr < lhstr + m->rm_eo) { 163 offs++; 164 ptr += MB_METACHARLEN(ptr); 165 } 166 convbase(buf, offs + !isset(KSHARRAYS) - 1, 10); 167 *eptr = ztrdup(buf); 168 } 169 *bptr = *eptr = NULL; 170 171 setaparam("match", arr); 172 setaparam("mbegin", mbegin); 173 setaparam("mend", mend); 174 } 175 } 176 } 177 else 178 zregex_regerrwarn(r, &re, "regex matching error"); 179 break; 180 default: 181 DPUTS(1, "bad regex option"); 182 return_value = 0; 183 goto CLEAN_BASEMETA; 184 } 185 186 if (matches) 187 zfree(matches, matchessz); 188 regfree(&re); 189CLEAN_BASEMETA: 190 free(lhstr); 191 free(rhre); 192 return return_value; 193} 194 195static struct conddef cotab[] = { 196 CONDDEF("regex-match", CONDF_INFIX, zcond_regex_match, 0, 0, ZREGEX_EXTENDED) 197}; 198 199 200static struct features module_features = { 201 NULL, 0, 202 cotab, sizeof(cotab)/sizeof(*cotab), 203 NULL, 0, 204 NULL, 0, 205 0 206}; 207 208 209/**/ 210int 211setup_(UNUSED(Module m)) 212{ 213 return 0; 214} 215 216/**/ 217int 218features_(Module m, char ***features) 219{ 220 *features = featuresarray(m, &module_features); 221 return 0; 222} 223 224/**/ 225int 226enables_(Module m, int **enables) 227{ 228 return handlefeatures(m, &module_features, enables); 229} 230 231/**/ 232int 233boot_(Module m) 234{ 235 return 0; 236} 237 238/**/ 239int 240cleanup_(Module m) 241{ 242 return setfeatureenables(m, &module_features, NULL); 243} 244 245/**/ 246int 247finish_(UNUSED(Module m)) 248{ 249 return 0; 250} 251