1/* 2 tre-python.c - TRE Python language bindings 3 4 This sotfware is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 The original version of this code was contributed by 8 Nikolai Saoukh <nms+python@otdel1.org>. 9 10*/ 11 12 13#include "Python.h" 14#include "structmember.h" 15 16#include <tre/tre.h> 17 18#define TRE_MODULE "tre" 19 20typedef struct { 21 PyObject_HEAD 22 regex_t rgx; 23 int flags; 24} TrePatternObject; 25 26typedef struct { 27 PyObject_HEAD 28 regaparams_t ap; 29} TreFuzzynessObject; 30 31typedef struct { 32 PyObject_HEAD 33 regamatch_t am; 34 PyObject *targ; /* string we matched against */ 35 TreFuzzynessObject *fz; /* fuzzyness used during match */ 36} TreMatchObject; 37 38 39static PyObject *ErrorObject; 40 41static void 42_set_tre_err(int rc, regex_t *rgx) 43{ 44 PyObject *errval; 45 char emsg[256]; 46 size_t elen; 47 48 elen = tre_regerror(rc, rgx, emsg, sizeof(emsg)); 49 if (emsg[elen] == '\0') 50 elen--; 51 errval = Py_BuildValue("s#", emsg, elen); 52 PyErr_SetObject(ErrorObject, errval); 53 Py_XDECREF(errval); 54} 55 56static PyObject * 57TreFuzzyness_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 58{ 59 static char *kwlist[] = { 60 "delcost", "inscost", "maxcost", "subcost", 61 "maxdel", "maxerr", "maxins", "maxsub", 62 NULL 63 }; 64 65 TreFuzzynessObject *self; 66 67 self = (TreFuzzynessObject*)type->tp_alloc(type, 0); 68 if (self == NULL) 69 return NULL; 70 tre_regaparams_default(&self->ap); 71 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iiiiiiii", kwlist, 72 &self->ap.cost_del, &self->ap.cost_ins, 73 &self->ap.max_cost, &self->ap.cost_subst, 74 &self->ap.max_del, &self->ap.max_err, 75 &self->ap.max_ins, &self->ap.max_subst)) 76 { 77 Py_DECREF(self); 78 return NULL; 79 } 80 return (PyObject*)self; 81} 82 83static PyObject * 84TreFuzzyness_repr(PyObject *obj) 85{ 86 TreFuzzynessObject *self = (TreFuzzynessObject*)obj; 87 PyObject *o; 88 89 o = PyString_FromFormat("%s(delcost=%d,inscost=%d,maxcost=%d,subcost=%d," 90 "maxdel=%d,maxerr=%d,maxins=%d,maxsub=%d)", 91 self->ob_type->tp_name, self->ap.cost_del, 92 self->ap.cost_ins, self->ap.max_cost, 93 self->ap.cost_subst, self->ap.max_del, 94 self->ap.max_err, self->ap.max_ins, 95 self->ap.max_subst); 96 return o; 97} 98 99static PyMemberDef TreFuzzyness_members[] = { 100 { "delcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_del), 0, 101 "The cost of a deleted character" }, 102 { "inscost", T_INT, offsetof(TreFuzzynessObject, ap.cost_ins), 0, 103 "The cost of an inserted character" }, 104 { "maxcost", T_INT, offsetof(TreFuzzynessObject, ap.max_cost), 0, 105 "The maximum allowed cost of a match. If this is set to zero, an exact " 106 "match is searched for" }, 107 { "subcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_subst), 0, 108 "The cost of a substituted character" }, 109 { "maxdel", T_INT, offsetof(TreFuzzynessObject, ap.max_del), 0, 110 "Maximum allowed number of deleted characters" }, 111 { "maxerr", T_INT, offsetof(TreFuzzynessObject, ap.max_err), 0, 112 "Maximum allowed number of errors (inserts + deletes + substitutes)" }, 113 { "maxins", T_INT, offsetof(TreFuzzynessObject, ap.max_ins), 0, 114 "Maximum allowed number of inserted characters" }, 115 { "maxsub", T_INT, offsetof(TreFuzzynessObject, ap.max_subst), 0, 116 "Maximum allowed number of substituted characters" }, 117 { NULL } 118}; 119 120static PyTypeObject TreFuzzynessType = { 121 PyObject_HEAD_INIT(NULL) 122 0, /* ob_size */ 123 TRE_MODULE ".Fuzzyness", /* tp_name */ 124 sizeof(TreFuzzynessObject), /* tp_basicsize */ 125 0, /* tp_itemsize */ 126 /* methods */ 127 0, /* tp_dealloc */ 128 0, /* tp_print */ 129 0, /* tp_getattr */ 130 0, /* tp_setattr */ 131 0, /* tp_compare */ 132 TreFuzzyness_repr, /* tp_repr */ 133 0, /* tp_as_number */ 134 0, /* tp_as_sequence */ 135 0, /* tp_as_mapping */ 136 0, /* tp_hash */ 137 0, /* tp_call */ 138 0, /* tp_str */ 139 0, /* tp_getattro */ 140 0, /* tp_setattro */ 141 0, /* tp_as_buffer */ 142 Py_TPFLAGS_DEFAULT, /* tp_flags */ 143 /* tp_doc */ 144 TRE_MODULE ".fuzzyness object holds approximation parameters for match", 145 0, /* tp_traverse */ 146 0, /* tp_clear */ 147 0, /* tp_richcompare */ 148 0, /* tp_weaklistoffset */ 149 0, /* tp_iter */ 150 0, /* tp_iternext */ 151 0, /* tp_methods */ 152 TreFuzzyness_members, /* tp_members */ 153 0, /* tp_getset */ 154 0, /* tp_base */ 155 0, /* tp_dict */ 156 0, /* tp_descr_get */ 157 0, /* tp_descr_set */ 158 0, /* tp_dictoffset */ 159 0, /* tp_init */ 160 0, /* tp_alloc */ 161 TreFuzzyness_new /* tp_new */ 162}; 163 164static PyObject * 165PyTreMatch_groups(TreMatchObject *self, PyObject *dummy) 166{ 167 PyObject *result; 168 size_t i; 169 170 if (self->am.nmatch < 1) 171 { 172 Py_INCREF(Py_None); 173 return Py_None; 174 } 175 result = PyTuple_New(self->am.nmatch); 176 for (i = 0; i < self->am.nmatch; i++) 177 { 178 PyObject *range; 179 regmatch_t *rm = &self->am.pmatch[i]; 180 181 if (rm->rm_so == (-1) && rm->rm_eo == (-1)) 182 { 183 Py_INCREF(Py_None); 184 range = Py_None; 185 } 186 else 187 { 188 range = Py_BuildValue("(ii)", rm->rm_so, rm->rm_eo); 189 } 190 PyTuple_SetItem(result, i, range); 191 } 192 return (PyObject*)result; 193} 194 195static PyObject * 196PyTreMatch_groupi(PyObject *obj, int gn) 197{ 198 TreMatchObject *self = (TreMatchObject*)obj; 199 PyObject *result; 200 regmatch_t *rm; 201 202 if (gn < 0 || (size_t)gn > self->am.nmatch - 1) 203 { 204 PyErr_SetString(PyExc_ValueError, "out of bounds"); 205 return NULL; 206 } 207 rm = &self->am.pmatch[gn]; 208 if (rm->rm_so == (-1) && rm->rm_eo == (-1)) 209 { 210 Py_INCREF(Py_None); 211 return Py_None; 212 } 213 result = PySequence_GetSlice(self->targ, rm->rm_so, rm->rm_eo); 214 return result; 215} 216 217static PyObject * 218PyTreMatch_group(TreMatchObject *self, PyObject *grpno) 219{ 220 PyObject *result; 221 long gn; 222 223 gn = PyInt_AsLong(grpno); 224 225 if (PyErr_Occurred()) 226 return NULL; 227 228 result = PyTreMatch_groupi((PyObject*)self, gn); 229 return result; 230} 231 232static PyMethodDef TreMatch_methods[] = { 233 {"group", (PyCFunction)PyTreMatch_group, METH_O, 234 "return submatched string or None if a parenthesized subexpression did " 235 "not participate in a match"}, 236 {"groups", (PyCFunction)PyTreMatch_groups, METH_NOARGS, 237 "return the tuple of slice tuples for all parenthesized subexpressions " 238 "(None for not participated)"}, 239 {NULL, NULL} 240}; 241 242static PyMemberDef TreMatch_members[] = { 243 { "cost", T_INT, offsetof(TreMatchObject, am.cost), READONLY, 244 "Cost of the match" }, 245 { "numdel", T_INT, offsetof(TreMatchObject, am.num_del), READONLY, 246 "Number of deletes in the match" }, 247 { "numins", T_INT, offsetof(TreMatchObject, am.num_ins), READONLY, 248 "Number of inserts in the match" }, 249 { "numsub", T_INT, offsetof(TreMatchObject, am.num_subst), READONLY, 250 "Number of substitutes in the match" }, 251 { "fuzzyness", T_OBJECT, offsetof(TreMatchObject, fz), READONLY, 252 "Fuzzyness used during match" }, 253 { NULL } 254}; 255 256static void 257PyTreMatch_dealloc(TreMatchObject *self) 258{ 259 Py_XDECREF(self->targ); 260 Py_XDECREF(self->fz); 261 if (self->am.pmatch != NULL) 262 PyMem_Del(self->am.pmatch); 263 PyObject_Del(self); 264} 265 266static PySequenceMethods TreMatch_as_sequence_methods = { 267 0, /* sq_length */ 268 0, /* sq_concat */ 269 0, /* sq_repeat */ 270 PyTreMatch_groupi, /* sq_item */ 271 0, /* sq_slice */ 272 0, /* sq_ass_item */ 273 0, /* sq_ass_slice */ 274 0, /* sq_contains */ 275 0, /* sq_inplace_concat */ 276 0 /* sq_inplace_repeat */ 277}; 278 279static PyTypeObject TreMatchType = { 280 PyObject_HEAD_INIT(NULL) 281 0, /* ob_size */ 282 TRE_MODULE ".Match", /* tp_name */ 283 sizeof(TreMatchObject), /* tp_basicsize */ 284 0, /* tp_itemsize */ 285 /* methods */ 286 (destructor)PyTreMatch_dealloc, /* tp_dealloc */ 287 0, /* tp_print */ 288 0, /* tp_getattr */ 289 0, /* tp_setattr */ 290 0, /* tp_compare */ 291 0, /* tp_repr */ 292 0, /* tp_as_number */ 293 &TreMatch_as_sequence_methods, /* tp_as_sequence */ 294 0, /* tp_as_mapping */ 295 0, /* tp_hash */ 296 0, /* tp_call */ 297 0, /* tp_str */ 298 0, /* tp_getattro */ 299 0, /* tp_setattro */ 300 0, /* tp_as_buffer */ 301 Py_TPFLAGS_DEFAULT, /* tp_flags */ 302 TRE_MODULE ".match object holds result of successful match", /* tp_doc */ 303 0, /* tp_traverse */ 304 0, /* tp_clear */ 305 0, /* tp_richcompare */ 306 0, /* tp_weaklistoffset */ 307 0, /* tp_iter */ 308 0, /* tp_iternext */ 309 TreMatch_methods, /* tp_methods */ 310 TreMatch_members /* tp_members */ 311}; 312 313static TreMatchObject * 314newTreMatchObject(void) 315{ 316 TreMatchObject *self; 317 318 self = PyObject_New(TreMatchObject, &TreMatchType); 319 if (self == NULL) 320 return NULL; 321 memset(&self->am, '\0', sizeof(self->am)); 322 self->targ = NULL; 323 self->fz = NULL; 324 return self; 325} 326 327static PyObject * 328PyTrePattern_search(TrePatternObject *self, PyObject *args) 329{ 330 PyObject *pstring; 331 int eflags = 0; 332 TreMatchObject *mo; 333 TreFuzzynessObject *fz; 334 size_t nsub; 335 int rc; 336 regmatch_t *pm; 337 char *targ; 338 size_t tlen; 339 340 if (!PyArg_ParseTuple(args, "SO!|i:match", &pstring, &TreFuzzynessType, 341 &fz, &eflags)) 342 return NULL; 343 344 mo = newTreMatchObject(); 345 if (mo == NULL) 346 return NULL; 347 348 nsub = self->rgx.re_nsub + 1; 349 pm = PyMem_New(regmatch_t, nsub); 350 if (pm != NULL) 351 { 352 mo->am.nmatch = nsub; 353 mo->am.pmatch = pm; 354 } 355 else 356 { 357 /* XXX */ 358 Py_DECREF(mo); 359 return NULL; 360 } 361 362 targ = PyString_AsString(pstring); 363 tlen = PyString_Size(pstring); 364 365 rc = tre_reganexec(&self->rgx, targ, tlen, &mo->am, fz->ap, eflags); 366 367 if (PyErr_Occurred()) 368 { 369 Py_DECREF(mo); 370 return NULL; 371 } 372 373 if (rc == REG_OK) 374 { 375 Py_INCREF(pstring); 376 mo->targ = pstring; 377 Py_INCREF(fz); 378 mo->fz = fz; 379 return (PyObject*)mo; 380 } 381 382 if (rc == REG_NOMATCH) 383 { 384 Py_DECREF(mo); 385 Py_INCREF(Py_None); 386 return Py_None; 387 } 388 _set_tre_err(rc, &self->rgx); 389 Py_DECREF(mo); 390 return NULL; 391} 392 393static PyMethodDef TrePattern_methods[] = { 394 { "search", (PyCFunction)PyTrePattern_search, METH_VARARGS, 395 "try to match against given string, returning " TRE_MODULE ".match object " 396 "or None on failure" }, 397 {NULL, NULL} 398}; 399 400static PyMemberDef TrePattern_members[] = { 401 { "nsub", T_INT, offsetof(TrePatternObject, rgx.re_nsub), READONLY, 402 "Number of parenthesized subexpressions in regex" }, 403 { NULL } 404}; 405 406static void 407PyTrePattern_dealloc(TrePatternObject *self) 408{ 409 tre_regfree(&self->rgx); 410 PyObject_Del(self); 411} 412 413static PyTypeObject TrePatternType = { 414 PyObject_HEAD_INIT(NULL) 415 0, /* ob_size */ 416 TRE_MODULE ".Pattern", /* tp_name */ 417 sizeof(TrePatternObject), /* tp_basicsize */ 418 0, /* tp_itemsize */ 419 /* methods */ 420 (destructor)PyTrePattern_dealloc, /*tp_dealloc*/ 421 0, /* tp_print */ 422 0, /* tp_getattr */ 423 0, /* tp_setattr */ 424 0, /* tp_compare */ 425 0, /* tp_repr */ 426 0, /* tp_as_number */ 427 0, /* tp_as_sequence */ 428 0, /* tp_as_mapping */ 429 0, /* tp_hash */ 430 0, /* tp_call */ 431 0, /* tp_str */ 432 0, /* tp_getattro */ 433 0, /* tp_setattro */ 434 0, /* tp_as_buffer */ 435 Py_TPFLAGS_DEFAULT, /* tp_flags */ 436 TRE_MODULE ".pattern object holds compiled tre regex", /* tp_doc */ 437 0, /* tp_traverse */ 438 0, /* tp_clear */ 439 0, /* tp_richcompare */ 440 0, /* tp_weaklistoffset */ 441 0, /* tp_iter */ 442 0, /* tp_iternext */ 443 TrePattern_methods, /* tp_methods */ 444 TrePattern_members /* tp_members */ 445}; 446 447static TrePatternObject * 448newTrePatternObject(PyObject *args) 449{ 450 TrePatternObject *self; 451 452 self = PyObject_New(TrePatternObject, &TrePatternType); 453 if (self == NULL) 454 return NULL; 455 self->flags = 0; 456 return self; 457} 458 459static PyObject * 460PyTre_ncompile(PyObject *self, PyObject *args) 461{ 462 TrePatternObject *rv; 463 char *pattern; 464 int pattlen; 465 int cflags = 0; 466 int rc; 467 468 if (!PyArg_ParseTuple(args, "s#|i:compile", &pattern, &pattlen, &cflags)) 469 return NULL; 470 471 rv = newTrePatternObject(args); 472 if (rv == NULL) 473 return NULL; 474 475 rc = tre_regncomp(&rv->rgx, (char*)pattern, pattlen, cflags); 476 if (rc != REG_OK) 477 { 478 if (!PyErr_Occurred()) 479 _set_tre_err(rc, &rv->rgx); 480 Py_DECREF(rv); 481 return NULL; 482 } 483 rv->flags = cflags; 484 return (PyObject*)rv; 485} 486 487static PyMethodDef tre_methods[] = { 488 { "compile", PyTre_ncompile, METH_VARARGS, 489 "Compile a regular expression pattern, returning a " 490 TRE_MODULE ".pattern object" }, 491 { NULL, NULL } 492}; 493 494static char *tre_doc = 495"Python module for TRE library\n\nModule exports " 496"the only function: compile"; 497 498static struct _tre_flags { 499 char *name; 500 int val; 501} tre_flags[] = { 502 { "EXTENDED", REG_EXTENDED }, 503 { "ICASE", REG_ICASE }, 504 { "NEWLINE", REG_NEWLINE }, 505 { "NOSUB", REG_NOSUB }, 506 { "LITERAL", REG_LITERAL }, 507 508 { "NOTBOL", REG_NOTBOL }, 509 { "NOTEOL", REG_NOTEOL }, 510 { NULL, 0 } 511}; 512 513PyMODINIT_FUNC 514inittre(void) 515{ 516 PyObject *m; 517 struct _tre_flags *fp; 518 519 if (PyType_Ready(&TreFuzzynessType) < 0) 520 return; 521 if (PyType_Ready(&TreMatchType) < 0) 522 return; 523 if (PyType_Ready(&TrePatternType) < 0) 524 return; 525 526 /* Create the module and add the functions */ 527 m = Py_InitModule3(TRE_MODULE, tre_methods, tre_doc); 528 if (m == NULL) 529 return; 530 531 Py_INCREF(&TreFuzzynessType); 532 if (PyModule_AddObject(m, "Fuzzyness", (PyObject*)&TreFuzzynessType) < 0) 533 return; 534 Py_INCREF(&TreMatchType); 535 if (PyModule_AddObject(m, "Match", (PyObject*)&TreMatchType) < 0) 536 return; 537 Py_INCREF(&TrePatternType); 538 if (PyModule_AddObject(m, "Pattern", (PyObject*)&TrePatternType) < 0) 539 return; 540 ErrorObject = PyErr_NewException(TRE_MODULE ".Error", NULL, NULL); 541 Py_INCREF(ErrorObject); 542 if (PyModule_AddObject(m, "Error", ErrorObject) < 0) 543 return; 544 545 /* Insert the flags */ 546 for (fp = tre_flags; fp->name != NULL; fp++) 547 if (PyModule_AddIntConstant(m, fp->name, fp->val) < 0) 548 return; 549} 550