1/* 2 tre-python.c - TRE Python language bindings 3 4 This sotfware is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 The original version of this code was contributed by 8 Nikolai Saoukh <nms+python@otdel1.org>. 9 10*/ 11 12 13#include "Python.h" 14#include "structmember.h" 15 16#include <tre/tre.h> 17 18#define TRE_MODULE "tre" 19 20typedef struct { 21 PyObject_HEAD 22 regex_t rgx; 23 int flags; 24} TrePatternObject; 25 26typedef struct { 27 PyObject_HEAD 28 regaparams_t ap; 29} TreFuzzynessObject; 30 31typedef struct { 32 PyObject_HEAD 33 regamatch_t am; 34 PyObject *targ; /* string we matched against */ 35 TreFuzzynessObject *fz; /* fuzzyness used during match */ 36} TreMatchObject; 37 38 39static PyObject *ErrorObject; 40 41static void 42_set_tre_err(int rc, regex_t *rgx) 43{ 44 PyObject *errval; 45 char emsg[256]; 46 size_t elen; 47 48 elen = tre_regerror(rc, rgx, emsg, sizeof(emsg)); 49 if (emsg[elen] == '\0') 50 elen--; 51 errval = Py_BuildValue("s#", emsg, elen); 52 PyErr_SetObject(ErrorObject, errval); 53 Py_XDECREF(errval); 54} 55 56static PyObject * 57TreFuzzyness_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 58{ 59 static char *kwlist[] = { 60 "delcost", "inscost", "maxcost", "subcost", 61 "maxdel", "maxerr", "maxins", "maxsub", 62 NULL 63 }; 64 65 TreFuzzynessObject *self; 66 67 self = (TreFuzzynessObject*)type->tp_alloc(type, 0); 68 if (self == NULL) 69 return NULL; 70 tre_regaparams_default(&self->ap); 71 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iiiiiiii", kwlist, 72 &self->ap.cost_del, &self->ap.cost_ins, 73 &self->ap.max_cost, &self->ap.cost_subst, 74 &self->ap.max_del, &self->ap.max_err, 75 &self->ap.max_ins, &self->ap.max_subst)) 76 { 77 Py_DECREF(self); 78 return NULL; 79 } 80 return (PyObject*)self; 81} 82 83static PyObject * 84TreFuzzyness_repr(PyObject *obj) 85{ 86 TreFuzzynessObject *self = (TreFuzzynessObject*)obj; 87 PyObject *o; 88 89 o = PyString_FromFormat("%s(delcost=%d,inscost=%d,maxcost=%d,subcost=%d," 90 "maxdel=%d,maxerr=%d,maxins=%d,maxsub=%d)", 91 self->ob_type->tp_name, self->ap.cost_del, 92 self->ap.cost_ins, self->ap.max_cost, 93 self->ap.cost_subst, self->ap.max_del, 94 self->ap.max_err, self->ap.max_ins, 95 self->ap.max_subst); 96 return o; 97} 98 99static PyMemberDef TreFuzzyness_members[] = { 100 { "delcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_del), 0, 101 "The cost of a deleted character" }, 102 { "inscost", T_INT, offsetof(TreFuzzynessObject, ap.cost_ins), 0, 103 "The cost of an inserted character" }, 104 { "maxcost", T_INT, offsetof(TreFuzzynessObject, ap.max_cost), 0, 105 "The maximum allowed cost of a match. If this is set to zero, an exact " 106 "match is searched for" }, 107 { "subcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_subst), 0, 108 "The cost of a substituted character" }, 109 { "maxdel", T_INT, offsetof(TreFuzzynessObject, ap.max_del), 0, 110 "Maximum allowed number of deleted characters" }, 111 { "maxerr", T_INT, offsetof(TreFuzzynessObject, ap.max_err), 0, 112 "Maximum allowed number of errors (inserts + deletes + substitutes)" }, 113 { "maxins", T_INT, offsetof(TreFuzzynessObject, ap.max_ins), 0, 114 "Maximum allowed number of inserted characters" }, 115 { "maxsub", T_INT, offsetof(TreFuzzynessObject, ap.max_subst), 0, 116 "Maximum allowed number of substituted characters" }, 117 { NULL } 118}; 119 120static PyTypeObject TreFuzzynessType = { 121 PyObject_HEAD_INIT(NULL) 122 0, /* ob_size */ 123 TRE_MODULE ".Fuzzyness", /* tp_name */ 124 sizeof(TreFuzzynessObject), /* tp_basicsize */ 125 0, /* tp_itemsize */ 126 /* methods */ 127 0, /* tp_dealloc */ 128 0, /* tp_print */ 129 0, /* tp_getattr */ 130 0, /* tp_setattr */ 131 0, /* tp_compare */ 132 TreFuzzyness_repr, /* tp_repr */ 133 0, /* tp_as_number */ 134 0, /* tp_as_sequence */ 135 0, /* tp_as_mapping */ 136 0, /* tp_hash */ 137 0, /* tp_call */ 138 0, /* tp_str */ 139 0, /* tp_getattro */ 140 0, /* tp_setattro */ 141 0, /* tp_as_buffer */ 142 Py_TPFLAGS_DEFAULT, /* tp_flags */ 143 /* tp_doc */ 144 TRE_MODULE ".fuzzyness object holds approximation parameters for match", 145 0, /* tp_traverse */ 146 0, /* tp_clear */ 147 0, /* tp_richcompare */ 148 0, /* tp_weaklistoffset */ 149 0, /* tp_iter */ 150 0, /* tp_iternext */ 151 0, /* tp_methods */ 152 TreFuzzyness_members, /* tp_members */ 153 0, /* tp_getset */ 154 0, /* tp_base */ 155 0, /* tp_dict */ 156 0, /* tp_descr_get */ 157 0, /* tp_descr_set */ 158 0, /* tp_dictoffset */ 159 0, /* tp_init */ 160 0, /* tp_alloc */ 161 TreFuzzyness_new /* tp_new */ 162}; 163 164static PyObject * 165PyTreMatch_groups(TreMatchObject *self, PyObject *dummy) 166{ 167 PyObject *result; 168 size_t i; 169 170 if (self->am.nmatch < 1) 171 { 172 Py_INCREF(Py_None); 173 return Py_None; 174 } 175 result = PyTuple_New(self->am.nmatch); 176 for (i = 0; i < self->am.nmatch; i++) 177 { 178 PyObject *range; 179 regmatch_t *rm = &self->am.pmatch[i]; 180 181 if (rm->rm_so == (-1) && rm->rm_eo == (-1)) 182 { 183 Py_INCREF(Py_None); 184 range = Py_None; 185 } 186 else 187 { 188 range = Py_BuildValue("(ii)", rm->rm_so, rm->rm_eo); 189 } 190 PyTuple_SetItem(result, i, range); 191 } 192 return (PyObject*)result; 193} 194 195static PyObject * 196PyTreMatch_groupi(PyObject *obj, int gn) 197{ 198 TreMatchObject *self = (TreMatchObject*)obj; 199 PyObject *result; 200 regmatch_t *rm; 201 202 if (gn < 0 || (size_t)gn > self->am.nmatch - 1) 203 { 204 PyErr_SetString(PyExc_ValueError, "out of bounds"); 205 return NULL; 206 } 207 rm = &self->am.pmatch[gn]; 208 if (rm->rm_so == (-1) && rm->rm_eo == (-1)) 209 { 210 Py_INCREF(Py_None); 211 return Py_None; 212 } 213 result = PySequence_GetSlice(self->targ, rm->rm_so, rm->rm_eo); 214 return result; 215} 216 217static PyObject * 218PyTreMatch_group(TreMatchObject *self, PyObject *grpno) 219{ 220 PyObject *result; 221 long gn; 222 223 gn = PyInt_AsLong(grpno); 224 225 if (PyErr_Occurred()) 226 return NULL; 227 228 result = PyTreMatch_groupi((PyObject*)self, gn); 229 return result; 230} 231 232static PyMethodDef TreMatch_methods[] = { 233 {"group", (PyCFunction)PyTreMatch_group, METH_O, 234 "return submatched string or None if a parenthesized subexpression did " 235 "not participate in a match"}, 236 {"groups", (PyCFunction)PyTreMatch_groups, METH_NOARGS, 237 "return the tuple of slice tuples for all parenthesized subexpressions " 238 "(None for not participated)"}, 239 {NULL, NULL} 240}; 241 242static PyMemberDef TreMatch_members[] = { 243 { "cost", T_INT, offsetof(TreMatchObject, am.cost), READONLY, 244 "Cost of the match" }, 245 { "numdel", T_INT, offsetof(TreMatchObject, am.num_del), READONLY, 246 "Number of deletes in the match" }, 247 { "numins", T_INT, offsetof(TreMatchObject, am.num_ins), READONLY, 248 "Number of inserts in the match" }, 249 { "numsub", T_INT, offsetof(TreMatchObject, am.num_subst), READONLY, 250 "Number of substitutes in the match" }, 251 { "fuzzyness", T_OBJECT, offsetof(TreMatchObject, fz), READONLY, 252 "Fuzzyness used during match" }, 253 { NULL } 254}; 255 256static void 257PyTreMatch_dealloc(TreMatchObject *self) 258{ 259 Py_XDECREF(self->targ); 260 Py_XDECREF(self->fz); 261 if (self->am.pmatch != NULL) 262 PyMem_Del(self->am.pmatch); 263 PyObject_Del(self); 264} 265 266static PySequenceMethods TreMatch_as_sequence_methods = { 267 0, /* sq_length */ 268 0, /* sq_concat */ 269 0, /* sq_repeat */ 270 PyTreMatch_groupi, /* sq_item */ 271 0, /* sq_slice */ 272 0, /* sq_ass_item */ 273 0, /* sq_ass_slice */ 274 0, /* sq_contains */ 275 0, /* sq_inplace_concat */ 276 0 /* sq_inplace_repeat */ 277}; 278 279static PyTypeObject TreMatchType = { 280 PyObject_HEAD_INIT(NULL) 281 0, /* ob_size */ 282 TRE_MODULE ".Match", /* tp_name */ 283 sizeof(TreMatchObject), /* tp_basicsize */ 284 0, /* tp_itemsize */ 285 /* methods */ 286 (destructor)PyTreMatch_dealloc, /* tp_dealloc */ 287 0, /* tp_print */ 288 0, /* tp_getattr */ 289 0, /* tp_setattr */ 290 0, /* tp_compare */ 291 0, /* tp_repr */ 292 0, /* tp_as_number */ 293 &TreMatch_as_sequence_methods, /* tp_as_sequence */ 294 0, /* tp_as_mapping */ 295 0, /* tp_hash */ 296 0, /* tp_call */ 297 0, /* tp_str */ 298 0, /* tp_getattro */ 299 0, /* tp_setattro */ 300 0, /* tp_as_buffer */ 301 Py_TPFLAGS_DEFAULT, /* tp_flags */ 302 TRE_MODULE ".match object holds result of successful match", /* tp_doc */ 303 0, /* tp_traverse */ 304 0, /* tp_clear */ 305 0, /* tp_richcompare */ 306 0, /* tp_weaklistoffset */ 307 0, /* tp_iter */ 308 0, /* tp_iternext */ 309 TreMatch_methods, /* tp_methods */ 310 TreMatch_members /* tp_members */ 311}; 312 313static TreMatchObject * 314newTreMatchObject(void) 315{ 316 TreMatchObject *self; 317 318 self = PyObject_New(TreMatchObject, &TreMatchType); 319 if (self == NULL) 320 return NULL; 321 memset(&self->am, '\0', sizeof(self->am)); 322 self->targ = NULL; 323 self->fz = NULL; 324 return self; 325} 326 327static PyObject * 328PyTrePattern_search(TrePatternObject *self, PyObject *args) 329{ 330 PyObject *pstring; 331 int eflags = 0; 332 TreMatchObject *mo; 333 TreFuzzynessObject *fz; 334 size_t nsub; 335 int rc; 336 regmatch_t *pm; 337 char *targ; 338 size_t tlen; 339 340 if (PyTuple_Size(args) > 0 && PyUnicode_Check(PyTuple_GetItem(args, 0))) 341 { 342 if (!PyArg_ParseTuple(args, "UO!|i:search", &pstring, &TreFuzzynessType, 343 &fz, &eflags)) 344 return NULL; 345 } 346 else 347 { 348 if (!PyArg_ParseTuple(args, "SO!|i:search", &pstring, &TreFuzzynessType, 349 &fz, &eflags)) 350 return NULL; 351 } 352 353 mo = newTreMatchObject(); 354 if (mo == NULL) 355 return NULL; 356 357 nsub = self->rgx.re_nsub + 1; 358 pm = PyMem_New(regmatch_t, nsub); 359 if (!pm) 360 { 361 Py_DECREF(mo); 362 return PyErr_NoMemory(); 363 } 364 365 mo->am.nmatch = nsub; 366 mo->am.pmatch = pm; 367 368 if (PyUnicode_Check(pstring)) 369 { 370 Py_ssize_t len = PyUnicode_GetSize(pstring); 371 wchar_t *buf = calloc(sizeof(wchar_t), len); 372 if(!buf) 373 { 374 Py_DECREF(mo); 375 return PyErr_NoMemory(); 376 } 377 PyUnicode_AsWideChar(pstring, buf, len); 378 rc = tre_regawnexec(&self->rgx, buf, len, &mo->am, fz->ap, eflags); 379 free(buf); 380 } 381 else 382 { 383 targ = PyString_AsString(pstring); 384 tlen = PyString_Size(pstring); 385 386 rc = tre_reganexec(&self->rgx, targ, tlen, &mo->am, fz->ap, eflags); 387 } 388 389 if (PyErr_Occurred()) 390 { 391 Py_DECREF(mo); 392 return NULL; 393 } 394 395 if (rc == REG_OK) 396 { 397 Py_INCREF(pstring); 398 mo->targ = pstring; 399 Py_INCREF(fz); 400 mo->fz = fz; 401 return (PyObject*)mo; 402 } 403 404 if (rc == REG_NOMATCH) 405 { 406 Py_DECREF(mo); 407 Py_INCREF(Py_None); 408 return Py_None; 409 } 410 _set_tre_err(rc, &self->rgx); 411 Py_DECREF(mo); 412 return NULL; 413} 414 415static PyMethodDef TrePattern_methods[] = { 416 { "search", (PyCFunction)PyTrePattern_search, METH_VARARGS, 417 "try to search in the given string, returning " TRE_MODULE ".match object " 418 "or None on failure" }, 419 {NULL, NULL} 420}; 421 422static PyMemberDef TrePattern_members[] = { 423 { "nsub", T_INT, offsetof(TrePatternObject, rgx.re_nsub), READONLY, 424 "Number of parenthesized subexpressions in regex" }, 425 { NULL } 426}; 427 428static void 429PyTrePattern_dealloc(TrePatternObject *self) 430{ 431 tre_regfree(&self->rgx); 432 PyObject_Del(self); 433} 434 435static PyTypeObject TrePatternType = { 436 PyObject_HEAD_INIT(NULL) 437 0, /* ob_size */ 438 TRE_MODULE ".Pattern", /* tp_name */ 439 sizeof(TrePatternObject), /* tp_basicsize */ 440 0, /* tp_itemsize */ 441 /* methods */ 442 (destructor)PyTrePattern_dealloc, /*tp_dealloc*/ 443 0, /* tp_print */ 444 0, /* tp_getattr */ 445 0, /* tp_setattr */ 446 0, /* tp_compare */ 447 0, /* tp_repr */ 448 0, /* tp_as_number */ 449 0, /* tp_as_sequence */ 450 0, /* tp_as_mapping */ 451 0, /* tp_hash */ 452 0, /* tp_call */ 453 0, /* tp_str */ 454 0, /* tp_getattro */ 455 0, /* tp_setattro */ 456 0, /* tp_as_buffer */ 457 Py_TPFLAGS_DEFAULT, /* tp_flags */ 458 TRE_MODULE ".pattern object holds compiled tre regex", /* tp_doc */ 459 0, /* tp_traverse */ 460 0, /* tp_clear */ 461 0, /* tp_richcompare */ 462 0, /* tp_weaklistoffset */ 463 0, /* tp_iter */ 464 0, /* tp_iternext */ 465 TrePattern_methods, /* tp_methods */ 466 TrePattern_members /* tp_members */ 467}; 468 469static TrePatternObject * 470newTrePatternObject() 471{ 472 TrePatternObject *self; 473 474 self = PyObject_New(TrePatternObject, &TrePatternType); 475 if (self == NULL) 476 return NULL; 477 self->flags = 0; 478 return self; 479} 480 481static PyObject * 482PyTre_ncompile(PyObject *self, PyObject *args) 483{ 484 TrePatternObject *rv; 485 PyUnicodeObject *upattern = NULL; 486 char *pattern = NULL; 487 int pattlen; 488 int cflags = 0; 489 int rc; 490 491 if (PyTuple_Size(args) > 0 && PyUnicode_Check(PyTuple_GetItem(args, 0))) 492 { 493 if (!PyArg_ParseTuple(args, "U|i:compile", &upattern, &cflags)) 494 return NULL; 495 } 496 else 497 { 498 if (!PyArg_ParseTuple(args, "s#|i:compile", &pattern, &pattlen, &cflags)) 499 return NULL; 500 } 501 502 rv = newTrePatternObject(); 503 if (rv == NULL) 504 return NULL; 505 506 if (upattern != NULL) 507 { 508 Py_ssize_t len = PyUnicode_GetSize(upattern); 509 wchar_t *buf = calloc(sizeof(wchar_t), len); 510 if(!buf) 511 { 512 Py_DECREF(rv); 513 return PyErr_NoMemory(); 514 } 515 PyUnicode_AsWideChar(upattern, buf, len); 516 rc = tre_regwncomp(&rv->rgx, buf, len, cflags); 517 free(buf); 518 } 519 else 520 rc = tre_regncomp(&rv->rgx, (char*)pattern, pattlen, cflags); 521 522 if (rc != REG_OK) 523 { 524 if (!PyErr_Occurred()) 525 _set_tre_err(rc, &rv->rgx); 526 Py_DECREF(rv); 527 return NULL; 528 } 529 rv->flags = cflags; 530 return (PyObject*)rv; 531} 532 533static PyMethodDef tre_methods[] = { 534 { "compile", PyTre_ncompile, METH_VARARGS, 535 "Compile a regular expression pattern, returning a " 536 TRE_MODULE ".pattern object" }, 537 { NULL, NULL } 538}; 539 540static char *tre_doc = 541"Python module for TRE library\n\nModule exports " 542"the only function: compile"; 543 544static struct _tre_flags { 545 char *name; 546 int val; 547} tre_flags[] = { 548 { "EXTENDED", REG_EXTENDED }, 549 { "ICASE", REG_ICASE }, 550 { "NEWLINE", REG_NEWLINE }, 551 { "NOSUB", REG_NOSUB }, 552 { "LITERAL", REG_LITERAL }, 553 554 { "NOTBOL", REG_NOTBOL }, 555 { "NOTEOL", REG_NOTEOL }, 556 { NULL, 0 } 557}; 558 559PyMODINIT_FUNC 560inittre(void) 561{ 562 PyObject *m; 563 struct _tre_flags *fp; 564 565 if (PyType_Ready(&TreFuzzynessType) < 0) 566 return; 567 if (PyType_Ready(&TreMatchType) < 0) 568 return; 569 if (PyType_Ready(&TrePatternType) < 0) 570 return; 571 572 /* Create the module and add the functions */ 573 m = Py_InitModule3(TRE_MODULE, tre_methods, tre_doc); 574 if (m == NULL) 575 return; 576 577 Py_INCREF(&TreFuzzynessType); 578 if (PyModule_AddObject(m, "Fuzzyness", (PyObject*)&TreFuzzynessType) < 0) 579 return; 580 Py_INCREF(&TreMatchType); 581 if (PyModule_AddObject(m, "Match", (PyObject*)&TreMatchType) < 0) 582 return; 583 Py_INCREF(&TrePatternType); 584 if (PyModule_AddObject(m, "Pattern", (PyObject*)&TrePatternType) < 0) 585 return; 586 ErrorObject = PyErr_NewException(TRE_MODULE ".Error", NULL, NULL); 587 Py_INCREF(ErrorObject); 588 if (PyModule_AddObject(m, "Error", ErrorObject) < 0) 589 return; 590 591 /* Insert the flags */ 592 for (fp = tre_flags; fp->name != NULL; fp++) 593 if (PyModule_AddIntConstant(m, fp->name, fp->val) < 0) 594 return; 595} 596