1/*
2 * regex.c
3 *
4 * This file is part of zsh, the Z shell.
5 *
6 * Copyright (c) 2007,2012 Phil Pennock
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
14 *
15 * In no event shall Phil Pennock or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Phil Pennock and the Zsh Development Group have been advised of
19 * the possibility of such damage.
20 *
21 * Phil Pennock and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose.  The software
24 * provided hereunder is on an "as is" basis, and Phil Pennock and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
27 *
28 */
29
30#include "regex.mdh"
31#include "regex.pro"
32
33#include <regex.h>
34
35/* we default to a vaguely modern syntax and set of capabilities */
36#define ZREGEX_EXTENDED 0
37/* if you want Basic syntax, make it an alternative options */
38
39static void
40zregex_regerrwarn(int r, regex_t *re, char *msg)
41{
42    char *errbuf;
43    size_t errbufsz;
44
45    errbufsz = regerror(r, re, NULL, 0);
46    errbuf = zalloc(errbufsz*sizeof(char));
47    regerror(r, re, errbuf, errbufsz);
48    zwarn("%s: %s", msg, errbuf);
49    zfree(errbuf, errbufsz);
50}
51
52/**/
53static int
54zcond_regex_match(char **a, int id)
55{
56    regex_t re;
57    regmatch_t *m, *matches = NULL;
58    size_t matchessz = 0;
59    char *lhstr, *lhstr_zshmeta, *rhre, *rhre_zshmeta, *s, **arr, **x;
60    int r, n, return_value, rcflags, reflags, nelem, start;
61
62    lhstr_zshmeta = cond_str(a,0,0);
63    rhre_zshmeta = cond_str(a,1,0);
64    rcflags = reflags = 0;
65    return_value = 0; /* 1 => matched successfully */
66
67    lhstr = ztrdup(lhstr_zshmeta);
68    unmetafy(lhstr, NULL);
69    rhre = ztrdup(rhre_zshmeta);
70    unmetafy(rhre, NULL);
71
72    switch(id) {
73    case ZREGEX_EXTENDED:
74	rcflags |= REG_EXTENDED;
75	if (!isset(CASEMATCH))
76	    rcflags |= REG_ICASE;
77	r = regcomp(&re, rhre, rcflags);
78	if (r) {
79	    zregex_regerrwarn(r, &re, "failed to compile regex");
80	    break;
81	}
82	/* re.re_nsub is number of parenthesized groups, we also need
83	 * 1 for the 0 offset, which is the entire matched portion
84	 */
85	if ((int)re.re_nsub < 0) {
86	    zwarn("INTERNAL ERROR: regcomp() returned "
87		    "negative subpattern count %d", (int)re.re_nsub);
88	    break;
89	}
90	matchessz = (re.re_nsub + 1) * sizeof(regmatch_t);
91	matches = zalloc(matchessz);
92	r = regexec(&re, lhstr, re.re_nsub+1, matches, reflags);
93	if (r == REG_NOMATCH)
94	    ; /* We do nothing when we fail to match. */
95	else if (r == 0) {
96	    return_value = 1;
97	    if (isset(BASHREMATCH)) {
98		start = 0;
99		nelem = re.re_nsub + 1;
100	    } else {
101		start = 1;
102		nelem = re.re_nsub;
103	    }
104	    arr = NULL; /* bogus gcc warning of used uninitialised */
105	    /* entire matched portion + re_nsub substrings + NULL */
106	    if (nelem) {
107		arr = x = (char **) zalloc(sizeof(char *) * (nelem + 1));
108		for (m = matches + start, n = start; n <= (int)re.re_nsub; ++n, ++m, ++x) {
109		    *x = metafy(lhstr + m->rm_so, m->rm_eo - m->rm_so, META_DUP);
110		}
111		*x = NULL;
112	    }
113	    if (isset(BASHREMATCH)) {
114		setaparam("BASH_REMATCH", arr);
115	    } else {
116		zlong offs;
117		char *ptr;
118
119		m = matches;
120		s = metafy(lhstr + m->rm_so, m->rm_eo - m->rm_so, META_DUP);
121		setsparam("MATCH", s);
122		/*
123		 * Count the characters before the match.
124		 */
125		ptr = lhstr;
126		offs = 0;
127		MB_METACHARINIT();
128		while (ptr < lhstr + m->rm_so) {
129		    offs++;
130		    ptr += MB_METACHARLEN(ptr);
131		}
132		setiparam("MBEGIN", offs + !isset(KSHARRAYS));
133		/*
134		 * Add on the characters in the match.
135		 */
136		while (ptr < lhstr + m->rm_eo) {
137		    offs++;
138		    ptr += MB_METACHARLEN(ptr);
139		}
140		setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
141		if (nelem) {
142		    char **mbegin, **mend, **bptr, **eptr;
143		    bptr = mbegin = (char **)zalloc(sizeof(char *)*(nelem+1));
144		    eptr = mend = (char **)zalloc(sizeof(char *)*(nelem+1));
145
146		    for (m = matches + start, n = 0;
147			 n < nelem;
148			 ++n, ++m, ++bptr, ++eptr)
149		    {
150			char buf[DIGBUFSIZE];
151			ptr = lhstr;
152			offs = 0;
153			/* Find the start offset */
154			MB_METACHARINIT();
155			while (ptr < lhstr + m->rm_so) {
156			    offs++;
157			    ptr += MB_METACHARLEN(ptr);
158			}
159			convbase(buf, offs + !isset(KSHARRAYS), 10);
160			*bptr = ztrdup(buf);
161			/* Continue to the end offset */
162			while (ptr < lhstr + m->rm_eo) {
163			    offs++;
164			    ptr += MB_METACHARLEN(ptr);
165			}
166			convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
167			*eptr = ztrdup(buf);
168		    }
169		    *bptr = *eptr = NULL;
170
171		    setaparam("match", arr);
172		    setaparam("mbegin", mbegin);
173		    setaparam("mend", mend);
174		}
175	    }
176	}
177	else
178	    zregex_regerrwarn(r, &re, "regex matching error");
179	break;
180    default:
181	DPUTS(1, "bad regex option");
182	return_value = 0;
183	goto CLEAN_BASEMETA;
184    }
185
186    if (matches)
187	zfree(matches, matchessz);
188    regfree(&re);
189CLEAN_BASEMETA:
190    free(lhstr);
191    free(rhre);
192    return return_value;
193}
194
195static struct conddef cotab[] = {
196    CONDDEF("regex-match", CONDF_INFIX, zcond_regex_match, 0, 0, ZREGEX_EXTENDED)
197};
198
199
200static struct features module_features = {
201    NULL, 0,
202    cotab, sizeof(cotab)/sizeof(*cotab),
203    NULL, 0,
204    NULL, 0,
205    0
206};
207
208
209/**/
210int
211setup_(UNUSED(Module m))
212{
213    return 0;
214}
215
216/**/
217int
218features_(Module m, char ***features)
219{
220    *features = featuresarray(m, &module_features);
221    return 0;
222}
223
224/**/
225int
226enables_(Module m, int **enables)
227{
228    return handlefeatures(m, &module_features, enables);
229}
230
231/**/
232int
233boot_(Module m)
234{
235    return 0;
236}
237
238/**/
239int
240cleanup_(Module m)
241{
242    return setfeatureenables(m, &module_features, NULL);
243}
244
245/**/
246int
247finish_(UNUSED(Module m))
248{
249    return 0;
250}
251