1/*
2  tre-python.c - TRE Python language bindings
3
4  This sotfware is released under a BSD-style license.
5  See the file LICENSE for details and copyright.
6
7  The original version of this code was contributed by
8  Nikolai Saoukh <nms+python@otdel1.org>.
9
10*/
11
12
13#include "Python.h"
14#include "structmember.h"
15
16#include <tre/tre.h>
17
18#define	TRE_MODULE	"tre"
19
20typedef struct {
21  PyObject_HEAD
22  regex_t rgx;
23  int flags;
24} TrePatternObject;
25
26typedef struct {
27  PyObject_HEAD
28  regaparams_t ap;
29} TreFuzzynessObject;
30
31typedef struct {
32  PyObject_HEAD
33  regamatch_t am;
34  PyObject *targ;	  /* string we matched against */
35  TreFuzzynessObject *fz; /* fuzzyness used during match */
36} TreMatchObject;
37
38
39static PyObject *ErrorObject;
40
41static void
42_set_tre_err(int rc, regex_t *rgx)
43{
44  PyObject *errval;
45  char emsg[256];
46  size_t elen;
47
48  elen = tre_regerror(rc, rgx, emsg, sizeof(emsg));
49  if (emsg[elen] == '\0')
50    elen--;
51  errval = Py_BuildValue("s#", emsg, elen);
52  PyErr_SetObject(ErrorObject, errval);
53  Py_XDECREF(errval);
54}
55
56static PyObject *
57TreFuzzyness_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
58{
59  static char *kwlist[] = {
60    "delcost", "inscost", "maxcost", "subcost",
61    "maxdel", "maxerr", "maxins", "maxsub",
62    NULL
63  };
64
65  TreFuzzynessObject *self;
66
67  self = (TreFuzzynessObject*)type->tp_alloc(type, 0);
68  if (self == NULL)
69    return NULL;
70  tre_regaparams_default(&self->ap);
71  if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iiiiiiii", kwlist,
72				   &self->ap.cost_del, &self->ap.cost_ins,
73				   &self->ap.max_cost, &self->ap.cost_subst,
74				   &self->ap.max_del, &self->ap.max_err,
75				   &self->ap.max_ins, &self->ap.max_subst))
76    {
77      Py_DECREF(self);
78      return NULL;
79    }
80  return (PyObject*)self;
81}
82
83static PyObject *
84TreFuzzyness_repr(PyObject *obj)
85{
86  TreFuzzynessObject *self = (TreFuzzynessObject*)obj;
87  PyObject *o;
88
89  o = PyString_FromFormat("%s(delcost=%d,inscost=%d,maxcost=%d,subcost=%d,"
90			  "maxdel=%d,maxerr=%d,maxins=%d,maxsub=%d)",
91			  self->ob_type->tp_name, self->ap.cost_del,
92			  self->ap.cost_ins, self->ap.max_cost,
93			  self->ap.cost_subst, self->ap.max_del,
94			  self->ap.max_err, self->ap.max_ins,
95			  self->ap.max_subst);
96  return o;
97}
98
99static PyMemberDef TreFuzzyness_members[] = {
100  { "delcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_del), 0,
101    "The cost of a deleted character" },
102  { "inscost", T_INT, offsetof(TreFuzzynessObject, ap.cost_ins), 0,
103    "The cost of an inserted character" },
104  { "maxcost", T_INT, offsetof(TreFuzzynessObject, ap.max_cost), 0,
105    "The maximum allowed cost of a match. If this is set to zero, an exact "
106    "match is searched for" },
107  { "subcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_subst), 0,
108    "The cost of a substituted character" },
109  { "maxdel", T_INT, offsetof(TreFuzzynessObject, ap.max_del), 0,
110    "Maximum allowed number of deleted characters" },
111  { "maxerr", T_INT, offsetof(TreFuzzynessObject, ap.max_err), 0,
112    "Maximum allowed number of errors (inserts + deletes + substitutes)" },
113  { "maxins", T_INT, offsetof(TreFuzzynessObject, ap.max_ins), 0,
114    "Maximum allowed number of inserted characters" },
115  { "maxsub", T_INT, offsetof(TreFuzzynessObject, ap.max_subst), 0,
116    "Maximum allowed number of substituted characters" },
117  { NULL }
118};
119
120static PyTypeObject TreFuzzynessType = {
121  PyObject_HEAD_INIT(NULL)
122  0,			        /* ob_size */
123  TRE_MODULE ".Fuzzyness",	/* tp_name */
124  sizeof(TreFuzzynessObject),	/* tp_basicsize */
125  0,			        /* tp_itemsize */
126  /* methods */
127  0,				/* tp_dealloc */
128  0,				/* tp_print */
129  0,				/* tp_getattr */
130  0,				/* tp_setattr */
131  0,				/* tp_compare */
132  TreFuzzyness_repr,		/* tp_repr */
133  0,				/* tp_as_number */
134  0,				/* tp_as_sequence */
135  0,				/* tp_as_mapping */
136  0,				/* tp_hash */
137  0,				/* tp_call */
138  0,				/* tp_str */
139  0,				/* tp_getattro */
140  0,				/* tp_setattro */
141  0,				/* tp_as_buffer */
142  Py_TPFLAGS_DEFAULT,		/* tp_flags */
143  /* tp_doc */
144  TRE_MODULE ".fuzzyness object holds approximation parameters for match",
145  0,				/* tp_traverse */
146  0,				/* tp_clear */
147  0,				/* tp_richcompare */
148  0,				/* tp_weaklistoffset */
149  0,				/* tp_iter */
150  0,				/* tp_iternext */
151  0,				/* tp_methods */
152  TreFuzzyness_members,		/* tp_members */
153  0,				/* tp_getset */
154  0,				/* tp_base */
155  0,				/* tp_dict */
156  0,				/* tp_descr_get */
157  0,				/* tp_descr_set */
158  0,				/* tp_dictoffset */
159  0,				/* tp_init */
160  0,				/* tp_alloc */
161  TreFuzzyness_new		/* tp_new */
162};
163
164static PyObject *
165PyTreMatch_groups(TreMatchObject *self, PyObject *dummy)
166{
167  PyObject *result;
168  size_t i;
169
170  if (self->am.nmatch < 1)
171    {
172      Py_INCREF(Py_None);
173      return Py_None;
174    }
175  result = PyTuple_New(self->am.nmatch);
176  for (i = 0; i < self->am.nmatch; i++)
177    {
178      PyObject *range;
179      regmatch_t *rm = &self->am.pmatch[i];
180
181      if (rm->rm_so == (-1) && rm->rm_eo == (-1))
182	{
183	  Py_INCREF(Py_None);
184	  range = Py_None;
185	}
186      else
187	{
188	  range = Py_BuildValue("(ii)", rm->rm_so, rm->rm_eo);
189	}
190      PyTuple_SetItem(result, i, range);
191    }
192  return (PyObject*)result;
193}
194
195static PyObject *
196PyTreMatch_groupi(PyObject *obj, int gn)
197{
198  TreMatchObject *self = (TreMatchObject*)obj;
199  PyObject *result;
200  regmatch_t *rm;
201
202  if (gn < 0 || (size_t)gn > self->am.nmatch - 1)
203    {
204      PyErr_SetString(PyExc_ValueError, "out of bounds");
205      return NULL;
206    }
207  rm = &self->am.pmatch[gn];
208  if (rm->rm_so == (-1) && rm->rm_eo == (-1))
209    {
210      Py_INCREF(Py_None);
211      return Py_None;
212    }
213  result = PySequence_GetSlice(self->targ, rm->rm_so, rm->rm_eo);
214  return result;
215}
216
217static PyObject *
218PyTreMatch_group(TreMatchObject *self, PyObject *grpno)
219{
220  PyObject *result;
221  long gn;
222
223  gn = PyInt_AsLong(grpno);
224
225  if (PyErr_Occurred())
226    return NULL;
227
228  result = PyTreMatch_groupi((PyObject*)self, gn);
229  return result;
230}
231
232static PyMethodDef TreMatch_methods[] = {
233  {"group", (PyCFunction)PyTreMatch_group, METH_O,
234   "return submatched string or None if a parenthesized subexpression did "
235   "not participate in a match"},
236  {"groups", (PyCFunction)PyTreMatch_groups, METH_NOARGS,
237   "return the tuple of slice tuples for all parenthesized subexpressions "
238   "(None for not participated)"},
239  {NULL, NULL}
240};
241
242static PyMemberDef TreMatch_members[] = {
243  { "cost", T_INT, offsetof(TreMatchObject, am.cost), READONLY,
244    "Cost of the match" },
245  { "numdel", T_INT, offsetof(TreMatchObject, am.num_del), READONLY,
246    "Number of deletes in the match" },
247  { "numins", T_INT, offsetof(TreMatchObject, am.num_ins), READONLY,
248    "Number of inserts in the match" },
249  { "numsub", T_INT, offsetof(TreMatchObject, am.num_subst), READONLY,
250    "Number of substitutes in the match" },
251  { "fuzzyness", T_OBJECT, offsetof(TreMatchObject, fz), READONLY,
252    "Fuzzyness used during match" },
253  { NULL }
254};
255
256static void
257PyTreMatch_dealloc(TreMatchObject *self)
258{
259  Py_XDECREF(self->targ);
260  Py_XDECREF(self->fz);
261  if (self->am.pmatch != NULL)
262    PyMem_Del(self->am.pmatch);
263  PyObject_Del(self);
264}
265
266static PySequenceMethods TreMatch_as_sequence_methods = {
267  0, /* sq_length */
268  0, /* sq_concat */
269  0, /* sq_repeat */
270  PyTreMatch_groupi, /* sq_item */
271  0, /* sq_slice */
272  0, /* sq_ass_item */
273  0, /* sq_ass_slice */
274  0, /* sq_contains */
275  0, /* sq_inplace_concat */
276  0 /* sq_inplace_repeat */
277};
278
279static PyTypeObject TreMatchType = {
280  PyObject_HEAD_INIT(NULL)
281  0,			        /* ob_size */
282  TRE_MODULE ".Match",		/* tp_name */
283  sizeof(TreMatchObject),	/* tp_basicsize */
284  0,			        /* tp_itemsize */
285  /* methods */
286  (destructor)PyTreMatch_dealloc, /* tp_dealloc */
287  0,			        /* tp_print */
288  0,				/* tp_getattr */
289  0,				/* tp_setattr */
290  0,				/* tp_compare */
291  0,				/* tp_repr */
292  0,				/* tp_as_number */
293  &TreMatch_as_sequence_methods,	/* tp_as_sequence */
294  0,				/* tp_as_mapping */
295  0,				/* tp_hash */
296  0,				/* tp_call */
297  0,				/* tp_str */
298  0,				/* tp_getattro */
299  0,				/* tp_setattro */
300  0,				/* tp_as_buffer */
301  Py_TPFLAGS_DEFAULT,		/* tp_flags */
302  TRE_MODULE ".match object holds result of successful match",	/* tp_doc */
303  0,				/* tp_traverse */
304  0,				/* tp_clear */
305  0,				/* tp_richcompare */
306  0,				/* tp_weaklistoffset */
307  0,				/* tp_iter */
308  0,				/* tp_iternext */
309  TreMatch_methods,		/* tp_methods */
310  TreMatch_members		/* tp_members */
311};
312
313static TreMatchObject *
314newTreMatchObject(void)
315{
316  TreMatchObject *self;
317
318  self = PyObject_New(TreMatchObject, &TreMatchType);
319  if (self == NULL)
320    return NULL;
321  memset(&self->am, '\0', sizeof(self->am));
322  self->targ = NULL;
323  self->fz = NULL;
324  return self;
325}
326
327static PyObject *
328PyTrePattern_search(TrePatternObject *self, PyObject *args)
329{
330  PyObject *pstring;
331  int eflags = 0;
332  TreMatchObject *mo;
333  TreFuzzynessObject *fz;
334  size_t nsub;
335  int rc;
336  regmatch_t *pm;
337  char *targ;
338  size_t tlen;
339
340  if (!PyArg_ParseTuple(args, "SO!|i:match", &pstring, &TreFuzzynessType,
341			&fz, &eflags))
342    return NULL;
343
344  mo = newTreMatchObject();
345  if (mo == NULL)
346    return NULL;
347
348  nsub = self->rgx.re_nsub + 1;
349  pm = PyMem_New(regmatch_t, nsub);
350  if (pm != NULL)
351    {
352      mo->am.nmatch = nsub;
353      mo->am.pmatch = pm;
354    }
355  else
356    {
357      /* XXX */
358      Py_DECREF(mo);
359      return NULL;
360    }
361
362  targ = PyString_AsString(pstring);
363  tlen = PyString_Size(pstring);
364
365  rc = tre_reganexec(&self->rgx, targ, tlen, &mo->am, fz->ap, eflags);
366
367  if (PyErr_Occurred())
368    {
369      Py_DECREF(mo);
370      return NULL;
371    }
372
373  if (rc == REG_OK)
374    {
375      Py_INCREF(pstring);
376      mo->targ = pstring;
377      Py_INCREF(fz);
378      mo->fz = fz;
379      return (PyObject*)mo;
380    }
381
382  if (rc == REG_NOMATCH)
383    {
384      Py_DECREF(mo);
385      Py_INCREF(Py_None);
386      return Py_None;
387    }
388  _set_tre_err(rc, &self->rgx);
389  Py_DECREF(mo);
390  return NULL;
391}
392
393static PyMethodDef TrePattern_methods[] = {
394  { "search", (PyCFunction)PyTrePattern_search, METH_VARARGS,
395    "try to match against given string, returning " TRE_MODULE ".match object "
396    "or None on failure" },
397  {NULL, NULL}
398};
399
400static PyMemberDef TrePattern_members[] = {
401  { "nsub", T_INT, offsetof(TrePatternObject, rgx.re_nsub), READONLY,
402    "Number of parenthesized subexpressions in regex" },
403  { NULL }
404};
405
406static void
407PyTrePattern_dealloc(TrePatternObject *self)
408{
409  tre_regfree(&self->rgx);
410  PyObject_Del(self);
411}
412
413static PyTypeObject TrePatternType = {
414  PyObject_HEAD_INIT(NULL)
415  0,			        /* ob_size */
416  TRE_MODULE ".Pattern",	/* tp_name */
417  sizeof(TrePatternObject),	/* tp_basicsize */
418  0,			        /* tp_itemsize */
419  /* methods */
420  (destructor)PyTrePattern_dealloc, /*tp_dealloc*/
421  0,				/* tp_print */
422  0,				/* tp_getattr */
423  0,				/* tp_setattr */
424  0,				/* tp_compare */
425  0,				/* tp_repr */
426  0,				/* tp_as_number */
427  0,				/* tp_as_sequence */
428  0,				/* tp_as_mapping */
429  0,				/* tp_hash */
430  0,				/* tp_call */
431  0,				/* tp_str */
432  0,				/* tp_getattro */
433  0,				/* tp_setattro */
434  0,				/* tp_as_buffer */
435  Py_TPFLAGS_DEFAULT,		/* tp_flags */
436  TRE_MODULE ".pattern object holds compiled tre regex",	/* tp_doc */
437  0,				/* tp_traverse */
438  0,				/* tp_clear */
439  0,				/* tp_richcompare */
440  0,				/* tp_weaklistoffset */
441  0,				/* tp_iter */
442  0,				/* tp_iternext */
443  TrePattern_methods,		/* tp_methods */
444  TrePattern_members		/* tp_members */
445};
446
447static TrePatternObject *
448newTrePatternObject(PyObject *args)
449{
450  TrePatternObject *self;
451
452  self = PyObject_New(TrePatternObject, &TrePatternType);
453  if (self == NULL)
454    return NULL;
455  self->flags = 0;
456  return self;
457}
458
459static PyObject *
460PyTre_ncompile(PyObject *self, PyObject *args)
461{
462  TrePatternObject *rv;
463  char *pattern;
464  int pattlen;
465  int cflags = 0;
466  int rc;
467
468  if (!PyArg_ParseTuple(args, "s#|i:compile", &pattern, &pattlen, &cflags))
469    return NULL;
470
471  rv = newTrePatternObject(args);
472  if (rv == NULL)
473    return NULL;
474
475  rc = tre_regncomp(&rv->rgx, (char*)pattern, pattlen, cflags);
476  if (rc != REG_OK)
477    {
478      if (!PyErr_Occurred())
479	_set_tre_err(rc, &rv->rgx);
480      Py_DECREF(rv);
481      return NULL;
482    }
483  rv->flags = cflags;
484  return (PyObject*)rv;
485}
486
487static PyMethodDef tre_methods[] = {
488  { "compile", PyTre_ncompile, METH_VARARGS,
489    "Compile a regular expression pattern, returning a "
490    TRE_MODULE ".pattern object" },
491  { NULL, NULL }
492};
493
494static char *tre_doc =
495"Python module for TRE library\n\nModule exports "
496"the only function: compile";
497
498static struct _tre_flags {
499  char *name;
500  int val;
501} tre_flags[] = {
502  { "EXTENDED", REG_EXTENDED },
503  { "ICASE", REG_ICASE },
504  { "NEWLINE", REG_NEWLINE },
505  { "NOSUB", REG_NOSUB },
506  { "LITERAL", REG_LITERAL },
507
508  { "NOTBOL", REG_NOTBOL },
509  { "NOTEOL", REG_NOTEOL },
510  { NULL, 0 }
511};
512
513PyMODINIT_FUNC
514inittre(void)
515{
516  PyObject *m;
517  struct _tre_flags *fp;
518
519  if (PyType_Ready(&TreFuzzynessType) < 0)
520    return;
521  if (PyType_Ready(&TreMatchType) < 0)
522    return;
523  if (PyType_Ready(&TrePatternType) < 0)
524    return;
525
526  /* Create the module and add the functions */
527  m = Py_InitModule3(TRE_MODULE, tre_methods, tre_doc);
528  if (m == NULL)
529    return;
530
531  Py_INCREF(&TreFuzzynessType);
532  if (PyModule_AddObject(m, "Fuzzyness", (PyObject*)&TreFuzzynessType) < 0)
533    return;
534  Py_INCREF(&TreMatchType);
535  if (PyModule_AddObject(m, "Match", (PyObject*)&TreMatchType) < 0)
536    return;
537  Py_INCREF(&TrePatternType);
538  if (PyModule_AddObject(m, "Pattern", (PyObject*)&TrePatternType) < 0)
539    return;
540  ErrorObject = PyErr_NewException(TRE_MODULE ".Error", NULL, NULL);
541  Py_INCREF(ErrorObject);
542  if (PyModule_AddObject(m, "Error", ErrorObject) < 0)
543    return;
544
545  /* Insert the flags */
546  for (fp = tre_flags; fp->name != NULL; fp++)
547    if (PyModule_AddIntConstant(m, fp->name, fp->val) < 0)
548      return;
549}
550