1/* BEGIN LICENSE BLOCK 2 * Version: CMPL 1.1 3 * 4 * The contents of this file are subject to the Cisco-style Mozilla Public 5 * License Version 1.1 (the "License"); you may not use this file except 6 * in compliance with the License. You may obtain a copy of the License 7 * at www.eclipse-clp.org/license. 8 * 9 * Software distributed under the License is distributed on an "AS IS" 10 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 11 * the License for the specific language governing rights and limitations 12 * under the License. 13 * 14 * The Original Code is The ECLiPSe Constraint Logic Programming System. 15 * The Initial Developer of the Original Code is Cisco Systems, Inc. 16 * Portions created by the Initial Developer are 17 * Copyright (C) 2006 Cisco Systems, Inc. All Rights Reserved. 18 * 19 * Contributor(s): J. Chamois 20 * 21 * END LICENSE BLOCK */ 22/*---------------------------------------------------------------------- 23 * System: ECLiPSe Constraint Logic Programming System 24 * Author: J Chamois 25 * Licence: This code is in the public domain 26 * Version: $Id: eregex.c,v 1.1 2006/09/23 01:53:30 snovello Exp $ 27 *----------------------------------------------------------------------*/ 28 29#include <sys/types.h> 30#include <stdlib.h> /* for malloc() */ 31#include <string.h> 32#include <pcreposix.h> 33#include "eclipse.h" 34 35 36Extern stream_id Winapi ec_stream_id(int); 37Extern int ec_outf(stream_id, const char*, int); 38Extern int ec_newline(stream_id); 39 40#define MAXMSGSIZE 512 41#define EC_EXTERNAL_ERROR -213 42 43 44static void 45_regfree(t_ext_ptr preg) 46{ 47 regfree((regex_t*) preg); 48 free(preg); 49} 50 51 52static t_ext_type ec_xt_regex = {_regfree,0,0,0,0,0,0,0,0}; 53 54 55static int 56_reg_error(int err, regex_t *preg) 57{ 58 char buf[MAXMSGSIZE]; 59 (void) regerror(err, preg, buf, MAXMSGSIZE); 60 (void) ec_outf(ec_stream_id(2), buf, strlen(buf)); 61 (void) ec_newline(ec_stream_id(2)); 62 return EC_EXTERNAL_ERROR; 63} 64 65 66static int 67_get_flags(pword list, int *pcflags, int *peflags) 68{ 69 int err; 70 pword car, cdr; 71 *pcflags = REG_EXTENDED; 72 *peflags = 0; 73 for ( ; (err = ec_get_list(list,&car,&cdr)) == PSUCCEED; list = cdr) 74 { 75 char *string; 76 err = ec_get_string(car, &string); 77 if (err != PSUCCEED) return err; 78 if (!strcmp(string, "extended")) *pcflags |= REG_EXTENDED; 79 else if (!strcmp(string, "basic")) *pcflags &= ~REG_EXTENDED; 80 else if (!strcmp(string, "icase")) *pcflags |= REG_ICASE; 81 else if (!strcmp(string, "newline")) *pcflags |= REG_NEWLINE; 82 else if (!strcmp(string, "nosub")) *pcflags |= REG_NOSUB; 83 else if (!strcmp(string, "notbol")) *peflags |= REG_NOTBOL; 84 else if (!strcmp(string, "noteol")) *peflags |= REG_NOTEOL; 85 else return RANGE_ERROR; 86 } 87 return err == PFAIL ? PSUCCEED : err; 88} 89 90 91static int 92_get_compiled_pattern(pword arg, int cflags, regex_t *pcompiled_reg, regex_t **ppreg) 93{ 94 int err = ec_get_handle(arg, &ec_xt_regex, (t_ext_ptr*) ppreg); 95 if (err != PSUCCEED) 96 { 97 char *pattern; 98 err = ec_get_string(arg, &pattern); 99 if (err != PSUCCEED) return err; 100 101 err = regcomp(pcompiled_reg, pattern, cflags); 102 if (err) return _reg_error(err, pcompiled_reg); 103 *ppreg = pcompiled_reg; 104 } 105 return PSUCCEED; 106} 107 108 109int 110ec_regcomp() /* (+Pattern,+Flags,-CompiledPattern) */ 111{ 112 int err, cflags, eflags; 113 char *pattern; 114 regex_t *preg; 115 116 err = ec_get_string(ec_arg(1), &pattern); 117 if (err != PSUCCEED) return err; 118 err = _get_flags(ec_arg(2), &cflags, &eflags); 119 if (err != PSUCCEED) return err; 120 121 preg = (regex_t *) malloc(sizeof(regex_t)); 122 err = regcomp(preg, pattern, cflags); 123 if (err) return _reg_error(err, preg); 124 125 return ec_unify(ec_arg(3), ec_handle(&ec_xt_regex, preg)); 126} 127 128 129int 130ec_regmatch() /* (+Pattern,+String,+Flags) */ 131{ 132 int err, cflags, eflags; 133 regex_t compiled_reg, *preg; 134 char *string; 135 136 err = ec_get_string(ec_arg(2), &string); 137 if (err != PSUCCEED) return err; 138 err = _get_flags(ec_arg(3), &cflags, &eflags); 139 if (err != PSUCCEED) return err; 140 err = _get_compiled_pattern(ec_arg(1), cflags|REG_NOSUB, &compiled_reg, &preg); 141 if (err != PSUCCEED) return err; 142 143 err = regexec(preg, string, 0, 0, eflags); 144 if (preg == &compiled_reg) 145 regfree(preg); 146 147 return err == 0 ? PSUCCEED 148 : err == REG_NOMATCH ? PFAIL 149 : _reg_error(err, preg); 150} 151 152 153int 154ec_regmatch4() /* (+Pattern,+String,+Flags,-Match) */ 155{ 156 int err, cflags, eflags; 157 regex_t compiled_reg, *preg; 158 regmatch_t match; 159 char *string; 160 161 err = ec_get_string(ec_arg(2), &string); 162 if (err != PSUCCEED) return err; 163 err = _get_flags(ec_arg(3), &cflags, &eflags); 164 if (err != PSUCCEED) return err; 165 if (cflags & REG_NOSUB) return RANGE_ERROR; 166 err = _get_compiled_pattern(ec_arg(1), cflags, &compiled_reg, &preg); 167 if (err != PSUCCEED) return err; 168 169 err = regexec(preg, string, 1, &match, eflags); 170 if (preg == &compiled_reg) 171 regfree(preg); 172 if (err) 173 return err == REG_NOMATCH ? PFAIL : _reg_error(err, preg); 174 return ec_unify(ec_arg(4), 175 ec_length_string(match.rm_eo - match.rm_so, string + match.rm_so)); 176} 177 178 179int 180ec_regmatchsub() /* (+Pattern,+String,+Flags,-ListOfSubMatches) */ 181{ 182 int err, cflags, eflags; 183 regex_t compiled_reg, *preg; 184 size_t nmatch; 185 char *string; 186 pword list; 187 188 err = ec_get_string(ec_arg(2), &string); 189 if (err != PSUCCEED) return err; 190 err = _get_flags(ec_arg(3), &cflags, &eflags); 191 if (err != PSUCCEED) return err; 192 if (cflags & REG_NOSUB) return RANGE_ERROR; 193 err = _get_compiled_pattern(ec_arg(1), cflags, &compiled_reg, &preg); 194 if (err != PSUCCEED) return err; 195 196 nmatch = preg->re_nsub + 1; 197 { 198 int res; 199#ifdef __GNUC__ 200 regmatch_t pmatch[nmatch]; /* not standard C! */ 201#else 202 regmatch_t *pmatch = (regmatch_t *) malloc(nmatch*sizeof(regmatch_t)); 203#endif 204 205 err = regexec(preg, string, nmatch, pmatch, eflags); 206 if (preg == &compiled_reg) 207 regfree(preg); 208 if (err) 209 { 210 res = (err == REG_NOMATCH ? PFAIL : _reg_error(err, preg)); 211 } 212 else 213 { 214 list = ec_nil(); /* build the list backwards */ 215 while(--nmatch) 216 { 217 list = ec_list( 218 ec_length_string(pmatch[nmatch].rm_eo - pmatch[nmatch].rm_so, 219 string + pmatch[nmatch].rm_so), 220 list); 221 } 222 res = ec_unify(ec_arg(4), list); 223 } 224#ifndef __GNUC__ 225 free(pmatch); 226#endif 227 return res; 228 } 229} 230 231 232int 233ec_regmatchall() /* (+Pattern,+String,+Flags,-ListOfFullMatches) */ 234{ 235 int err, cflags, eflags; 236 regex_t compiled_reg, *preg; 237 regmatch_t match; 238 char *string; 239 pword list, tail, newtail; 240 long lstring; 241 242 err = ec_get_string_length(ec_arg(2), &string, &lstring); 243 if (err != PSUCCEED) return err; 244 err = _get_flags(ec_arg(3), &cflags, &eflags); 245 if (err != PSUCCEED) return err; 246 if (cflags & REG_NOSUB) return RANGE_ERROR; 247 err = _get_compiled_pattern(ec_arg(1), cflags, &compiled_reg, &preg); 248 if (err != PSUCCEED) return err; 249 250 list = tail = ec_newvar(); /* build list forward */ 251 for (;;) 252 { 253 err = regexec(preg, string, 1, &match, eflags); 254 if (err == REG_NOMATCH) 255 break; 256 if (err) 257 { 258 if (preg == &compiled_reg) 259 regfree(preg); 260 return _reg_error(err, preg); 261 } 262 if (match.rm_eo == match.rm_so) 263 { 264 char msg[] = "infinitely many empty strings match"; 265 (void) ec_outf(ec_stream_id(2), msg, strlen(msg)); 266 (void) ec_newline(ec_stream_id(2)); 267 return EC_EXTERNAL_ERROR; 268 } 269 270 newtail = ec_newvar(); /* append list element */ 271 (void) ec_unify(tail, ec_list( 272 ec_length_string(match.rm_eo - match.rm_so, 273 string + match.rm_so), 274 newtail)); 275 tail = newtail; 276 277 if (match.rm_eo > lstring) 278 break; 279 lstring -= match.rm_eo; 280 string += match.rm_eo; 281 } 282 (void) ec_unify(tail, ec_nil()); 283 if (preg == &compiled_reg) 284 regfree(preg); 285 return ec_unify(ec_arg(4), list); 286} 287 288 289int 290ec_regsplit() /* (+Pattern,+String,+Flags,-SplitString) */ 291{ 292 int err, cflags, eflags; 293 regex_t compiled_reg, *preg; 294 regmatch_t match; 295 char *string; 296 pword list, tail, newtail; 297 long lstring; 298 299 err = ec_get_string_length(ec_arg(2), &string, &lstring); 300 if (err != PSUCCEED) return err; 301 err = _get_flags(ec_arg(3), &cflags, &eflags); 302 if (err != PSUCCEED) return err; 303 if (cflags & REG_NOSUB) return RANGE_ERROR; 304 err = _get_compiled_pattern(ec_arg(1), cflags, &compiled_reg, &preg); 305 if (err != PSUCCEED) return err; 306 307 list = tail = ec_newvar(); /* build list forward */ 308 for (;;) 309 { 310 err = regexec(preg, string, 1, &match, eflags); 311 if (err == REG_NOMATCH) 312 break; 313 if (err) 314 { 315 if (preg == &compiled_reg) 316 regfree(preg); 317 return _reg_error(err, preg); 318 } 319 if (match.rm_eo == match.rm_so) 320 { 321 char msg[] = "infinitely many empty strings match"; 322 (void) ec_outf(ec_stream_id(2), msg, strlen(msg)); 323 (void) ec_newline(ec_stream_id(2)); 324 return EC_EXTERNAL_ERROR; 325 } 326 327 newtail = ec_newvar(); /* append list element */ 328 (void) ec_unify(tail, ec_list( 329 ec_length_string(match.rm_so, 330 string), ec_list( 331 ec_length_string(match.rm_eo - match.rm_so, 332 string + match.rm_so), 333 newtail))); 334 tail = newtail; 335 336 if (match.rm_eo > lstring) 337 break; 338 lstring -= match.rm_eo; 339 string += match.rm_eo; 340 } 341 (void) ec_unify(tail, ec_list( 342 ec_length_string(lstring, string), ec_nil())); 343 if (preg == &compiled_reg) 344 regfree(preg); 345 return ec_unify(ec_arg(4), list); 346} 347 348