1/* BEGIN LICENSE BLOCK
2 * Version: CMPL 1.1
3 *
4 * The contents of this file are subject to the Cisco-style Mozilla Public
5 * License Version 1.1 (the "License"); you may not use this file except
6 * in compliance with the License.  You may obtain a copy of the License
7 * at www.eclipse-clp.org/license.
8 *
9 * Software distributed under the License is distributed on an "AS IS"
10 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
11 * the License for the specific language governing rights and limitations
12 * under the License.
13 *
14 * The Original Code is  The ECLiPSe Constraint Logic Programming System.
15 * The Initial Developer of the Original Code is  Cisco Systems, Inc.
16 * Portions created by the Initial Developer are
17 * Copyright (C) 2006 Cisco Systems, Inc.  All Rights Reserved.
18 *
19 * Contributor(s): J. Chamois
20 *
21 * END LICENSE BLOCK */
22/*----------------------------------------------------------------------
23 * System:	ECLiPSe Constraint Logic Programming System
24 * Author:	J Chamois
25 * Licence:	This code is in the public domain
26 * Version:	$Id: eregex.c,v 1.1 2006/09/23 01:53:30 snovello Exp $
27 *----------------------------------------------------------------------*/
28
29#include <sys/types.h>
30#include <stdlib.h>	/* for malloc() */
31#include <string.h>
32#include <pcreposix.h>
33#include "eclipse.h"
34
35
36Extern stream_id Winapi	ec_stream_id(int);
37Extern int ec_outf(stream_id, const char*, int);
38Extern int ec_newline(stream_id);
39
40#define MAXMSGSIZE 512
41#define EC_EXTERNAL_ERROR -213
42
43
44static void
45_regfree(t_ext_ptr preg)
46{
47    regfree((regex_t*) preg);
48    free(preg);
49}
50
51
52static t_ext_type ec_xt_regex = {_regfree,0,0,0,0,0,0,0,0};
53
54
55static int
56_reg_error(int err, regex_t *preg)
57{
58    char buf[MAXMSGSIZE];
59    (void) regerror(err, preg, buf, MAXMSGSIZE);
60    (void) ec_outf(ec_stream_id(2), buf, strlen(buf));
61    (void) ec_newline(ec_stream_id(2));
62    return EC_EXTERNAL_ERROR;
63}
64
65
66static int
67_get_flags(pword list, int *pcflags, int *peflags)
68{
69    int err;
70    pword car, cdr;
71    *pcflags = REG_EXTENDED;
72    *peflags = 0;
73    for ( ; (err = ec_get_list(list,&car,&cdr)) == PSUCCEED; list = cdr)
74    {
75	char *string;
76        err = ec_get_string(car, &string);
77        if (err != PSUCCEED) return err;
78        if (!strcmp(string, "extended"))	*pcflags |= REG_EXTENDED;
79        else if (!strcmp(string, "basic"))	*pcflags &= ~REG_EXTENDED;
80        else if (!strcmp(string, "icase"))	*pcflags |= REG_ICASE;
81        else if (!strcmp(string, "newline"))	*pcflags |= REG_NEWLINE;
82        else if (!strcmp(string, "nosub"))	*pcflags |= REG_NOSUB;
83        else if (!strcmp(string, "notbol"))	*peflags |= REG_NOTBOL;
84        else if (!strcmp(string, "noteol"))	*peflags |= REG_NOTEOL;
85	else return RANGE_ERROR;
86    }
87    return err == PFAIL ? PSUCCEED : err;
88}
89
90
91static int
92_get_compiled_pattern(pword arg, int cflags, regex_t *pcompiled_reg, regex_t **ppreg)
93{
94    int err = ec_get_handle(arg, &ec_xt_regex, (t_ext_ptr*) ppreg);
95    if (err != PSUCCEED)
96    {
97	char *pattern;
98	err = ec_get_string(arg, &pattern);
99	if (err != PSUCCEED) return err;
100
101	err = regcomp(pcompiled_reg, pattern, cflags);
102	if (err) return _reg_error(err, pcompiled_reg);
103	*ppreg = pcompiled_reg;
104    }
105    return PSUCCEED;
106}
107
108
109int
110ec_regcomp()		/* (+Pattern,+Flags,-CompiledPattern) */
111{
112    int err, cflags, eflags;
113    char *pattern;
114    regex_t *preg;
115
116    err = ec_get_string(ec_arg(1), &pattern);
117    if (err != PSUCCEED) return err;
118    err = _get_flags(ec_arg(2), &cflags, &eflags);
119    if (err != PSUCCEED) return err;
120
121    preg = (regex_t *) malloc(sizeof(regex_t));
122    err = regcomp(preg, pattern, cflags);
123    if (err) return _reg_error(err, preg);
124
125    return ec_unify(ec_arg(3), ec_handle(&ec_xt_regex, preg));
126}
127
128
129int
130ec_regmatch()		/* (+Pattern,+String,+Flags) */
131{
132    int err, cflags, eflags;
133    regex_t compiled_reg, *preg;
134    char *string;
135
136    err = ec_get_string(ec_arg(2), &string);
137    if (err != PSUCCEED) return err;
138    err = _get_flags(ec_arg(3), &cflags, &eflags);
139    if (err != PSUCCEED) return err;
140    err = _get_compiled_pattern(ec_arg(1), cflags|REG_NOSUB, &compiled_reg, &preg);
141    if (err != PSUCCEED) return err;
142
143    err = regexec(preg, string, 0, 0, eflags);
144    if (preg == &compiled_reg)
145	regfree(preg);
146
147    return err == 0 ? PSUCCEED
148    	: err == REG_NOMATCH ? PFAIL
149	: _reg_error(err, preg);
150}
151
152
153int
154ec_regmatch4()		/* (+Pattern,+String,+Flags,-Match) */
155{
156    int err, cflags, eflags;
157    regex_t compiled_reg, *preg;
158    regmatch_t match;
159    char *string;
160
161    err = ec_get_string(ec_arg(2), &string);
162    if (err != PSUCCEED) return err;
163    err = _get_flags(ec_arg(3), &cflags, &eflags);
164    if (err != PSUCCEED) return err;
165    if (cflags & REG_NOSUB) return RANGE_ERROR;
166    err = _get_compiled_pattern(ec_arg(1), cflags, &compiled_reg, &preg);
167    if (err != PSUCCEED) return err;
168
169    err = regexec(preg, string, 1, &match, eflags);
170    if (preg == &compiled_reg)
171	regfree(preg);
172    if (err)
173	return err == REG_NOMATCH ? PFAIL : _reg_error(err, preg);
174    return ec_unify(ec_arg(4),
175	ec_length_string(match.rm_eo - match.rm_so, string + match.rm_so));
176}
177
178
179int
180ec_regmatchsub()	/* (+Pattern,+String,+Flags,-ListOfSubMatches) */
181{
182    int err, cflags, eflags;
183    regex_t compiled_reg, *preg;
184    size_t nmatch;
185    char *string;
186    pword list;
187
188    err = ec_get_string(ec_arg(2), &string);
189    if (err != PSUCCEED) return err;
190    err = _get_flags(ec_arg(3), &cflags, &eflags);
191    if (err != PSUCCEED) return err;
192    if (cflags & REG_NOSUB) return RANGE_ERROR;
193    err = _get_compiled_pattern(ec_arg(1), cflags, &compiled_reg, &preg);
194    if (err != PSUCCEED) return err;
195
196    nmatch = preg->re_nsub + 1;
197    {
198	int res;
199#ifdef __GNUC__
200	regmatch_t pmatch[nmatch];	/* not standard C! */
201#else
202	regmatch_t *pmatch = (regmatch_t *) malloc(nmatch*sizeof(regmatch_t));
203#endif
204
205	err = regexec(preg, string, nmatch, pmatch, eflags);
206	if (preg == &compiled_reg)
207	    regfree(preg);
208	if (err)
209	{
210	    res = (err == REG_NOMATCH ? PFAIL : _reg_error(err, preg));
211	}
212	else
213	{
214	    list = ec_nil();    /* build the list backwards */
215	    while(--nmatch)
216	    {
217		list = ec_list(
218		    ec_length_string(pmatch[nmatch].rm_eo - pmatch[nmatch].rm_so,
219			string + pmatch[nmatch].rm_so),
220		    list);
221	    }
222	    res = ec_unify(ec_arg(4), list);
223	}
224#ifndef __GNUC__
225	free(pmatch);
226#endif
227	return res;
228    }
229}
230
231
232int
233ec_regmatchall()	/* (+Pattern,+String,+Flags,-ListOfFullMatches) */
234{
235    int err, cflags, eflags;
236    regex_t compiled_reg, *preg;
237    regmatch_t match;
238    char *string;
239    pword list, tail, newtail;
240    long lstring;
241
242    err = ec_get_string_length(ec_arg(2), &string, &lstring);
243    if (err != PSUCCEED) return err;
244    err = _get_flags(ec_arg(3), &cflags, &eflags);
245    if (err != PSUCCEED) return err;
246    if (cflags & REG_NOSUB) return RANGE_ERROR;
247    err = _get_compiled_pattern(ec_arg(1), cflags, &compiled_reg, &preg);
248    if (err != PSUCCEED) return err;
249
250    list = tail = ec_newvar();	/* build list forward */
251    for (;;)
252    {
253	err = regexec(preg, string, 1, &match, eflags);
254	if (err == REG_NOMATCH)
255	    break;
256	if (err)
257	{
258	    if (preg == &compiled_reg)
259		regfree(preg);
260	    return _reg_error(err, preg);
261	}
262	if (match.rm_eo == match.rm_so)
263	{
264	    char msg[] = "infinitely many empty strings match";
265	    (void) ec_outf(ec_stream_id(2), msg, strlen(msg));
266	    (void) ec_newline(ec_stream_id(2));
267	    return EC_EXTERNAL_ERROR;
268	}
269
270	newtail = ec_newvar();	/* append list element */
271	(void) ec_unify(tail, ec_list(
272		ec_length_string(match.rm_eo - match.rm_so,
273		    string + match.rm_so),
274		newtail));
275	tail = newtail;
276
277	if (match.rm_eo > lstring)
278	    break;
279	lstring -= match.rm_eo;
280	string += match.rm_eo;
281    }
282    (void) ec_unify(tail, ec_nil());
283    if (preg == &compiled_reg)
284	regfree(preg);
285    return ec_unify(ec_arg(4), list);
286}
287
288
289int
290ec_regsplit()	/* (+Pattern,+String,+Flags,-SplitString) */
291{
292    int err, cflags, eflags;
293    regex_t compiled_reg, *preg;
294    regmatch_t match;
295    char *string;
296    pword list, tail, newtail;
297    long lstring;
298
299    err = ec_get_string_length(ec_arg(2), &string, &lstring);
300    if (err != PSUCCEED) return err;
301    err = _get_flags(ec_arg(3), &cflags, &eflags);
302    if (err != PSUCCEED) return err;
303    if (cflags & REG_NOSUB) return RANGE_ERROR;
304    err = _get_compiled_pattern(ec_arg(1), cflags, &compiled_reg, &preg);
305    if (err != PSUCCEED) return err;
306
307    list = tail = ec_newvar();	/* build list forward */
308    for (;;)
309    {
310	err = regexec(preg, string, 1, &match, eflags);
311	if (err == REG_NOMATCH)
312	    break;
313	if (err)
314	{
315	    if (preg == &compiled_reg)
316		regfree(preg);
317	    return _reg_error(err, preg);
318	}
319	if (match.rm_eo == match.rm_so)
320	{
321	    char msg[] = "infinitely many empty strings match";
322	    (void) ec_outf(ec_stream_id(2), msg, strlen(msg));
323	    (void) ec_newline(ec_stream_id(2));
324	    return EC_EXTERNAL_ERROR;
325	}
326
327	newtail = ec_newvar();	/* append list element */
328	(void) ec_unify(tail, ec_list(
329		ec_length_string(match.rm_so,
330		    string), ec_list(
331		ec_length_string(match.rm_eo - match.rm_so,
332		    string + match.rm_so),
333		newtail)));
334	tail = newtail;
335
336	if (match.rm_eo > lstring)
337	    break;
338	lstring -= match.rm_eo;
339	string += match.rm_eo;
340    }
341    (void) ec_unify(tail, ec_list(
342	    ec_length_string(lstring, string), ec_nil()));
343    if (preg == &compiled_reg)
344	regfree(preg);
345    return ec_unify(ec_arg(4), list);
346}
347
348