1/*
2 * pcre.c - interface to the PCRE library
3 *
4 * This file is part of zsh, the Z shell.
5 *
6 * Copyright (c) 2001, 2002, 2003, 2004, 2007 Clint Adams
7 * All rights reserved.
8 *
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
14 *
15 * In no event shall Clint Adams or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Andrew Main and the Zsh Development Group have been advised of
19 * the possibility of such damage.
20 *
21 * Clint Adams and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose.  The software
24 * provided hereunder is on an "as is" basis, and Andrew Main and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
27 *
28 */
29
30
31#include "pcre.mdh"
32#include "pcre.pro"
33
34#define CPCRE_PLAIN 0
35
36/**/
37#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
38#include <pcre.h>
39
40static pcre *pcre_pattern;
41static pcre_extra *pcre_hints;
42
43/**/
44static int
45zpcre_utf8_enabled(void)
46{
47#if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
48    static int have_utf8_pcre = -1;
49
50    /* value can toggle based on MULTIBYTE, so don't
51     * be too eager with caching */
52    if (have_utf8_pcre < -1)
53	return 0;
54
55    if (!isset(MULTIBYTE))
56	return 0;
57
58    if ((have_utf8_pcre == -1) &&
59        (!strcmp(nl_langinfo(CODESET), "UTF-8"))) {
60
61	if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))
62	    have_utf8_pcre = -2; /* erk, failed to ask */
63    }
64
65    if (have_utf8_pcre < 0)
66	return 0;
67    return have_utf8_pcre;
68
69#else
70    return 0;
71#endif
72}
73
74/**/
75static int
76bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
77{
78    int pcre_opts = 0, pcre_errptr;
79    const char *pcre_error;
80    char *target;
81
82    if(OPT_ISSET(ops,'a')) pcre_opts |= PCRE_ANCHORED;
83    if(OPT_ISSET(ops,'i')) pcre_opts |= PCRE_CASELESS;
84    if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE;
85    if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED;
86    if(OPT_ISSET(ops,'s')) pcre_opts |= PCRE_DOTALL;
87
88    if (zpcre_utf8_enabled())
89	pcre_opts |= PCRE_UTF8;
90
91    pcre_hints = NULL;  /* Is this necessary? */
92
93    if (pcre_pattern)
94	pcre_free(pcre_pattern);
95
96    target = ztrdup(*args);
97    unmetafy(target, NULL);
98
99    pcre_pattern = pcre_compile(target, pcre_opts, &pcre_error, &pcre_errptr, NULL);
100
101    free(target);
102
103    if (pcre_pattern == NULL)
104    {
105	zwarnnam(nam, "error in regex: %s", pcre_error);
106	return 1;
107    }
108
109    return 0;
110}
111
112/**/
113#ifdef HAVE_PCRE_STUDY
114
115/**/
116static int
117bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int func))
118{
119    const char *pcre_error;
120
121    if (pcre_pattern == NULL)
122    {
123	zwarnnam(nam, "no pattern has been compiled for study");
124	return 1;
125    }
126
127    pcre_hints = pcre_study(pcre_pattern, 0, &pcre_error);
128    if (pcre_error != NULL)
129    {
130	zwarnnam(nam, "error while studying regex: %s", pcre_error);
131	return 1;
132    }
133
134    return 0;
135}
136
137/**/
138#else /* !HAVE_PCRE_STUDY */
139
140# define bin_pcre_study bin_notavail
141
142/**/
143#endif /* !HAVE_PCRE_STUDY */
144
145/**/
146static int
147zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
148		     char *substravar, int want_offset_pair, int matchedinarr,
149		     int want_begin_end)
150{
151    char **captures, *match_all, **matches;
152    char offset_all[50];
153    int capture_start = 1;
154
155    if (matchedinarr)
156	capture_start = 0;
157    if (matchvar == NULL)
158	matchvar = "MATCH";
159    if (substravar == NULL)
160	substravar = "match";
161
162    /* captures[0] will be entire matched string, [1] first substring */
163    if (!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
164	int nelem = arrlen(captures)-1;
165	/* Set to the offsets of the complete match */
166	if (want_offset_pair) {
167	    sprintf(offset_all, "%d %d", ovec[0], ovec[1]);
168	    setsparam("ZPCRE_OP", ztrdup(offset_all));
169	}
170	match_all = metafy(captures[0], -1, META_DUP);
171	setsparam(matchvar, match_all);
172	/*
173	 * If we're setting match, mbegin, mend we only do
174	 * so if there were parenthesised matches, for consistency
175	 * (c.f. regex.c).
176	 */
177	if (!want_begin_end || nelem) {
178	    char **x, **y;
179	    y = &captures[capture_start];
180	    matches = x = (char **) zalloc(sizeof(char *) * (arrlen(y) + 1));
181	    do {
182		if (*y)
183		    *x++ = metafy(*y, -1, META_DUP);
184		else
185		    *x++ = NULL;
186	    } while (*y++);
187	    setaparam(substravar, matches);
188	}
189
190	if (want_begin_end) {
191	    char *ptr = arg;
192	    zlong offs = 0;
193
194	    /* Count the characters before the match */
195	    MB_METACHARINIT();
196	    while (ptr < arg + ovec[0]) {
197		offs++;
198		ptr += MB_METACHARLEN(ptr);
199	    }
200	    setiparam("MBEGIN", offs + !isset(KSHARRAYS));
201	    /* Add on the characters in the match */
202	    while (ptr < arg + ovec[1]) {
203		offs++;
204		ptr += MB_METACHARLEN(ptr);
205	    }
206	    setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
207	    if (nelem) {
208		char **mbegin, **mend, **bptr, **eptr;
209		int i, *ipair;
210
211		bptr = mbegin = zalloc(sizeof(char*)*(nelem+1));
212		eptr = mend = zalloc(sizeof(char*)*(nelem+1));
213
214		for (ipair = ovec + 2, i = 0;
215		     i < nelem;
216		     ipair += 2, i++, bptr++, eptr++)
217		{
218		    char buf[DIGBUFSIZE];
219		    ptr = arg;
220		    offs = 0;
221		    /* Find the start offset */
222		    MB_METACHARINIT();
223		    while (ptr < arg + ipair[0]) {
224			offs++;
225			ptr += MB_METACHARLEN(ptr);
226		    }
227		    convbase(buf, offs + !isset(KSHARRAYS), 10);
228		    *bptr = ztrdup(buf);
229		    /* Continue to the end offset */
230		    while (ptr < arg + ipair[1]) {
231			offs++;
232			ptr += MB_METACHARLEN(ptr);
233		    }
234		    convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
235		    *eptr = ztrdup(buf);
236		}
237		*bptr = *eptr = NULL;
238
239		setaparam("mbegin", mbegin);
240		setaparam("mend", mend);
241	    }
242	}
243
244	pcre_free_substring_list((const char **)captures);
245    }
246
247    return 0;
248}
249
250/**/
251static int
252getposint(char *instr, char *nam)
253{
254    char *eptr;
255    int ret;
256
257    ret = (int)zstrtol(instr, &eptr, 10);
258    if (*eptr || ret < 0) {
259	zwarnnam(nam, "integer expected: %s", instr);
260	return -1;
261    }
262
263    return ret;
264}
265
266/**/
267static int
268bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
269{
270    int ret, capcount, *ovec, ovecsize, c;
271    char *matched_portion = NULL;
272    char *plaintext = NULL;
273    char *receptacle = NULL;
274    int return_value = 1;
275    /* The subject length and offset start are both int values in pcre_exec */
276    int subject_len;
277    int offset_start = 0;
278    int want_offset_pair = 0;
279
280    if (pcre_pattern == NULL) {
281	zwarnnam(nam, "no pattern has been compiled");
282	return 1;
283    }
284
285    if(OPT_HASARG(ops,c='a')) {
286	receptacle = OPT_ARG(ops,c);
287    }
288    if(OPT_HASARG(ops,c='v')) {
289	matched_portion = OPT_ARG(ops,c);
290    }
291    if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */
292	offset_start = getposint(OPT_ARG(ops,c), nam);
293    }
294    /* For the entire match, 'Return' the offset byte positions instead of the matched string */
295    if(OPT_ISSET(ops,'b')) want_offset_pair = 1;
296
297    if(!*args) {
298	zwarnnam(nam, "not enough arguments");
299    }
300
301    if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount)))
302    {
303	zwarnnam(nam, "error %d in fullinfo", ret);
304	return 1;
305    }
306
307    ovecsize = (capcount+1)*3;
308    ovec = zalloc(ovecsize*sizeof(int));
309
310    plaintext = ztrdup(*args);
311    unmetafy(plaintext, NULL);
312    subject_len = (int)strlen(plaintext);
313
314    if (offset_start < 0 || offset_start >= subject_len)
315	ret = PCRE_ERROR_NOMATCH;
316    else
317	ret = pcre_exec(pcre_pattern, pcre_hints, plaintext, subject_len, offset_start, 0, ovec, ovecsize);
318
319    if (ret==0) return_value = 0;
320    else if (ret==PCRE_ERROR_NOMATCH) /* no match */;
321    else if (ret>0) {
322	zpcre_get_substrings(plaintext, ovec, ret, matched_portion, receptacle,
323			     want_offset_pair, 0, 0);
324	return_value = 0;
325    }
326    else {
327	zwarnnam(nam, "error in pcre_exec [%d]", ret);
328    }
329
330    if (ovec)
331	zfree(ovec, ovecsize*sizeof(int));
332
333    return return_value;
334}
335
336/**/
337static int
338cond_pcre_match(char **a, int id)
339{
340    pcre *pcre_pat;
341    const char *pcre_err;
342    char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar=NULL;
343    int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;
344    int return_value = 0;
345
346    if (zpcre_utf8_enabled())
347	pcre_opts |= PCRE_UTF8;
348
349    lhstr = cond_str(a,0,0);
350    rhre = cond_str(a,1,0);
351    lhstr_plain = ztrdup(lhstr);
352    rhre_plain = ztrdup(rhre);
353    unmetafy(lhstr_plain, NULL);
354    unmetafy(rhre_plain, NULL);
355    pcre_pat = NULL;
356    ov = NULL;
357    ovsize = 0;
358
359    if (isset(BASHREMATCH))
360	avar="BASH_REMATCH";
361
362    switch(id) {
363	 case CPCRE_PLAIN:
364		pcre_pat = pcre_compile(rhre_plain, pcre_opts, &pcre_err, &pcre_errptr, NULL);
365		if (pcre_pat == NULL) {
366		    zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err);
367		    break;
368		}
369                pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
370    		ovsize = (capcnt+1)*3;
371		ov = zalloc(ovsize*sizeof(int));
372    		r = pcre_exec(pcre_pat, NULL, lhstr_plain, strlen(lhstr_plain), 0, 0, ov, ovsize);
373		/* r < 0 => error; r==0 match but not enough size in ov
374		 * r > 0 => (r-1) substrings found; r==1 => no substrings
375		 */
376    		if (r==0) {
377		    zwarn("reportable zsh problem: pcre_exec() returned 0");
378		    return_value = 1;
379		    break;
380		}
381	        else if (r==PCRE_ERROR_NOMATCH) {
382		    return_value = 0; /* no match */
383		    break;
384		}
385		else if (r<0) {
386		    zwarn("pcre_exec() error [%d]", r);
387		    break;
388		}
389                else if (r>0) {
390		    zpcre_get_substrings(lhstr_plain, ov, r, NULL, avar, 0,
391					 isset(BASHREMATCH),
392					 !isset(BASHREMATCH));
393		    return_value = 1;
394		    break;
395		}
396		break;
397    }
398
399    if (lhstr_plain)
400	free(lhstr_plain);
401    if(rhre_plain)
402	free(rhre_plain);
403    if (pcre_pat)
404	pcre_free(pcre_pat);
405    if (ov)
406	zfree(ov, ovsize*sizeof(int));
407
408    return return_value;
409}
410
411static struct conddef cotab[] = {
412    CONDDEF("pcre-match", CONDF_INFIX, cond_pcre_match, 0, 0, CPCRE_PLAIN)
413    /* CONDDEF can register =~ but it won't be found */
414};
415
416/**/
417#else /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
418
419# define bin_pcre_compile bin_notavail
420# define bin_pcre_study bin_notavail
421# define bin_pcre_match bin_notavail
422
423/**/
424#endif /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
425
426static struct builtin bintab[] = {
427    BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs",  NULL),
428    BUILTIN("pcre_match",   0, bin_pcre_match,   1, 1, 0, "a:v:n:b",    NULL),
429    BUILTIN("pcre_study",   0, bin_pcre_study,   0, 0, 0, NULL,    NULL)
430};
431
432
433static struct features module_features = {
434    bintab, sizeof(bintab)/sizeof(*bintab),
435#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
436    cotab, sizeof(cotab)/sizeof(*cotab),
437#else /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
438    NULL, 0,
439#endif /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
440    NULL, 0,
441    NULL, 0,
442    0
443};
444
445
446/**/
447int
448setup_(UNUSED(Module m))
449{
450    return 0;
451}
452
453/**/
454int
455features_(Module m, char ***features)
456{
457    *features = featuresarray(m, &module_features);
458    return 0;
459}
460
461/**/
462int
463enables_(Module m, int **enables)
464{
465    return handlefeatures(m, &module_features, enables);
466}
467
468/**/
469int
470boot_(Module m)
471{
472    return 0;
473}
474
475/**/
476int
477cleanup_(Module m)
478{
479    return setfeatureenables(m, &module_features, NULL);
480}
481
482/**/
483int
484finish_(UNUSED(Module m))
485{
486    return 0;
487}
488