kwset.c revision 56918
168349Sobrien/* kwset.c - search for any of a set of keywords.
268349Sobrien   Copyright (C) 1989, 1998 Free Software Foundation, Inc.
3354939Sdelphij
468349Sobrien   This program is free software; you can redistribute it and/or modify
568349Sobrien   it under the terms of the GNU General Public License as published by
668349Sobrien   the Free Software Foundation; either version 2, or (at your option)
768349Sobrien   any later version.
8354939Sdelphij
9354939Sdelphij   This program is distributed in the hope that it will be useful,
10267843Sdelphij   but WITHOUT ANY WARRANTY; without even the implied warranty of
1168349Sobrien   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12267843Sdelphij   GNU General Public License for more details.
13328874Seadler
14328874Seadler   You should have received a copy of the GNU General Public License
15328874Seadler   along with this program; if not, write to the Free Software
16328874Seadler   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17186690Sobrien   02111-1307, USA.  */
18328874Seadler
19328874Seadler/* Written August 1989 by Mike Haertel.
20186690Sobrien   The author may be reached (Email) at the address mike@ai.mit.edu,
21328874Seadler   or (US mail) as Mike Haertel c/o Free Software Foundation. */
22328874Seadler
23267843Sdelphij/* The algorithm implemented by these routines bears a startling resemblence
24328874Seadler   to one discovered by Beate Commentz-Walter, although it is not identical.
25328874Seadler   See "A String Matching Algorithm Fast on the Average," Technical Report,
26328874Seadler   IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900
27328874Seadler   Heidelberg, Germany.  See also Aho, A.V., and M. Corasick, "Efficient
28328874Seadler   String Matching:  An Aid to Bibliographic Search," CACM June 1975,
29328874Seadler   Vol. 18, No. 6, which describes the failure function used below. */
30328874Seadler
31328874Seadler#ifdef HAVE_CONFIG_H
32328874Seadler# include <config.h>
33328874Seadler#endif
34328874Seadler#include <sys/types.h>
35328874Seadler#include "system.h"
36267843Sdelphij#include "kwset.h"
37328874Seadler#include "obstack.h"
38328874Seadler
39328874Seadler#ifdef GREP
40328874Seadlerextern char *xmalloc();
41267843Sdelphij# undef malloc
42328874Seadler# define malloc xmalloc
43328874Seadler#endif
44328874Seadler
45328874Seadler#define NCHAR (UCHAR_MAX + 1)
46267843Sdelphij#define obstack_chunk_alloc malloc
47328874Seadler#define obstack_chunk_free free
48328874Seadler
49328874Seadler/* Balanced tree of edges and labels leaving a given trie node. */
50328874Seadlerstruct tree
51328874Seadler{
52328874Seadler  struct tree *llink;		/* Left link; MUST be first field. */
53159764Sobrien  struct tree *rlink;		/* Right link (to larger labels). */
54267843Sdelphij  struct trie *trie;		/* Trie node pointed to by this edge. */
55186690Sobrien  unsigned char label;		/* Label on this edge. */
56159764Sobrien  char balance;			/* Difference in depths of subtrees. */
57110949Sobrien};
58226048Sobrien
59328874Seadler/* Node of a trie representing a set of reversed keywords. */
60328874Seadlerstruct trie
61110949Sobrien{
62328874Seadler  unsigned int accepting;	/* Word index of accepted word, or zero. */
63  struct tree *links;		/* Tree of edges leaving this node. */
64  struct trie *parent;		/* Parent of this node. */
65  struct trie *next;		/* List of all trie nodes in level order. */
66  struct trie *fail;		/* Aho-Corasick failure function. */
67  int depth;			/* Depth of this node from the root. */
68  int shift;			/* Shift function for search failures. */
69  int maxshift;			/* Max shift of self and descendents. */
70};
71
72/* Structure returned opaquely to the caller, containing everything. */
73struct kwset
74{
75  struct obstack obstack;	/* Obstack for node allocation. */
76  int words;			/* Number of words in the trie. */
77  struct trie *trie;		/* The trie itself. */
78  int mind;			/* Minimum depth of an accepting node. */
79  int maxd;			/* Maximum depth of any node. */
80  unsigned char delta[NCHAR];	/* Delta table for rapid search. */
81  struct trie *next[NCHAR];	/* Table of children of the root. */
82  char *target;			/* Target string if there's only one. */
83  int mind2;			/* Used in Boyer-Moore search for one string. */
84  char *trans;			/* Character translation table. */
85};
86
87/* prototypes */
88static void enqueue PARAMS((struct tree *, struct trie **));
89static void treefails PARAMS((register struct tree *, struct trie *, struct trie *));
90static void treedelta PARAMS((register struct tree *,register unsigned int, unsigned char *));
91static int  hasevery PARAMS((register struct tree *, register struct tree *));
92static void treenext PARAMS((struct tree *, struct trie **));
93static char * bmexec PARAMS((kwset_t, char *, size_t));
94static char * cwexec PARAMS((kwset_t, char *, size_t, struct kwsmatch *));
95
96/* Allocate and initialize a keyword set object, returning an opaque
97   pointer to it.  Return NULL if memory is not available. */
98kwset_t
99kwsalloc (char *trans)
100{
101  struct kwset *kwset;
102
103  kwset = (struct kwset *) malloc(sizeof (struct kwset));
104  if (!kwset)
105    return 0;
106
107  obstack_init(&kwset->obstack);
108  kwset->words = 0;
109  kwset->trie
110    = (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie));
111  if (!kwset->trie)
112    {
113      kwsfree((kwset_t) kwset);
114      return 0;
115    }
116  kwset->trie->accepting = 0;
117  kwset->trie->links = 0;
118  kwset->trie->parent = 0;
119  kwset->trie->next = 0;
120  kwset->trie->fail = 0;
121  kwset->trie->depth = 0;
122  kwset->trie->shift = 0;
123  kwset->mind = INT_MAX;
124  kwset->maxd = -1;
125  kwset->target = 0;
126  kwset->trans = trans;
127
128  return (kwset_t) kwset;
129}
130
131/* Add the given string to the contents of the keyword set.  Return NULL
132   for success, an error message otherwise. */
133char *
134kwsincr (kwset_t kws, char *text, size_t len)
135{
136  struct kwset *kwset;
137  register struct trie *trie;
138  register unsigned char label;
139  register struct tree *link;
140  register int depth;
141  struct tree *links[12];
142  enum { L, R } dirs[12];
143  struct tree *t, *r, *l, *rl, *lr;
144
145  kwset = (struct kwset *) kws;
146  trie = kwset->trie;
147  text += len;
148
149  /* Descend the trie (built of reversed keywords) character-by-character,
150     installing new nodes when necessary. */
151  while (len--)
152    {
153      label = kwset->trans ? kwset->trans[(unsigned char) *--text] : *--text;
154
155      /* Descend the tree of outgoing links for this trie node,
156	 looking for the current character and keeping track
157	 of the path followed. */
158      link = trie->links;
159      links[0] = (struct tree *) &trie->links;
160      dirs[0] = L;
161      depth = 1;
162
163      while (link && label != link->label)
164	{
165	  links[depth] = link;
166	  if (label < link->label)
167	    dirs[depth++] = L, link = link->llink;
168	  else
169	    dirs[depth++] = R, link = link->rlink;
170	}
171
172      /* The current character doesn't have an outgoing link at
173	 this trie node, so build a new trie node and install
174	 a link in the current trie node's tree. */
175      if (!link)
176	{
177	  link = (struct tree *) obstack_alloc(&kwset->obstack,
178					       sizeof (struct tree));
179	  if (!link)
180	    return _("memory exhausted");
181	  link->llink = 0;
182	  link->rlink = 0;
183	  link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
184						     sizeof (struct trie));
185	  if (!link->trie)
186	    return _("memory exhausted");
187	  link->trie->accepting = 0;
188	  link->trie->links = 0;
189	  link->trie->parent = trie;
190	  link->trie->next = 0;
191	  link->trie->fail = 0;
192	  link->trie->depth = trie->depth + 1;
193	  link->trie->shift = 0;
194	  link->label = label;
195	  link->balance = 0;
196
197	  /* Install the new tree node in its parent. */
198	  if (dirs[--depth] == L)
199	    links[depth]->llink = link;
200	  else
201	    links[depth]->rlink = link;
202
203	  /* Back up the tree fixing the balance flags. */
204	  while (depth && !links[depth]->balance)
205	    {
206	      if (dirs[depth] == L)
207		--links[depth]->balance;
208	      else
209		++links[depth]->balance;
210	      --depth;
211	    }
212
213	  /* Rebalance the tree by pointer rotations if necessary. */
214	  if (depth && ((dirs[depth] == L && --links[depth]->balance)
215			|| (dirs[depth] == R && ++links[depth]->balance)))
216	    {
217	      switch (links[depth]->balance)
218		{
219		case (char) -2:
220		  switch (dirs[depth + 1])
221		    {
222		    case L:
223		      r = links[depth], t = r->llink, rl = t->rlink;
224		      t->rlink = r, r->llink = rl;
225		      t->balance = r->balance = 0;
226		      break;
227		    case R:
228		      r = links[depth], l = r->llink, t = l->rlink;
229		      rl = t->rlink, lr = t->llink;
230		      t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
231		      l->balance = t->balance != 1 ? 0 : -1;
232		      r->balance = t->balance != (char) -1 ? 0 : 1;
233		      t->balance = 0;
234		      break;
235		    default:
236		      abort ();
237		    }
238		  break;
239		case 2:
240		  switch (dirs[depth + 1])
241		    {
242		    case R:
243		      l = links[depth], t = l->rlink, lr = t->llink;
244		      t->llink = l, l->rlink = lr;
245		      t->balance = l->balance = 0;
246		      break;
247		    case L:
248		      l = links[depth], r = l->rlink, t = r->llink;
249		      lr = t->llink, rl = t->rlink;
250		      t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
251		      l->balance = t->balance != 1 ? 0 : -1;
252		      r->balance = t->balance != (char) -1 ? 0 : 1;
253		      t->balance = 0;
254		      break;
255		    default:
256		      abort ();
257		    }
258		  break;
259		default:
260		  abort ();
261		}
262
263	      if (dirs[depth - 1] == L)
264		links[depth - 1]->llink = t;
265	      else
266		links[depth - 1]->rlink = t;
267	    }
268	}
269
270      trie = link->trie;
271    }
272
273  /* Mark the node we finally reached as accepting, encoding the
274     index number of this word in the keyword set so far. */
275  if (!trie->accepting)
276    trie->accepting = 1 + 2 * kwset->words;
277  ++kwset->words;
278
279  /* Keep track of the longest and shortest string of the keyword set. */
280  if (trie->depth < kwset->mind)
281    kwset->mind = trie->depth;
282  if (trie->depth > kwset->maxd)
283    kwset->maxd = trie->depth;
284
285  return 0;
286}
287
288/* Enqueue the trie nodes referenced from the given tree in the
289   given queue. */
290static void
291enqueue (struct tree *tree, struct trie **last)
292{
293  if (!tree)
294    return;
295  enqueue(tree->llink, last);
296  enqueue(tree->rlink, last);
297  (*last) = (*last)->next = tree->trie;
298}
299
300/* Compute the Aho-Corasick failure function for the trie nodes referenced
301   from the given tree, given the failure function for their parent as
302   well as a last resort failure node. */
303static void
304treefails (register struct tree *tree, struct trie *fail, struct trie *recourse)
305{
306  register struct tree *link;
307
308  if (!tree)
309    return;
310
311  treefails(tree->llink, fail, recourse);
312  treefails(tree->rlink, fail, recourse);
313
314  /* Find, in the chain of fails going back to the root, the first
315     node that has a descendent on the current label. */
316  while (fail)
317    {
318      link = fail->links;
319      while (link && tree->label != link->label)
320	if (tree->label < link->label)
321	  link = link->llink;
322	else
323	  link = link->rlink;
324      if (link)
325	{
326	  tree->trie->fail = link->trie;
327	  return;
328	}
329      fail = fail->fail;
330    }
331
332  tree->trie->fail = recourse;
333}
334
335/* Set delta entries for the links of the given tree such that
336   the preexisting delta value is larger than the current depth. */
337static void
338treedelta (register struct tree *tree,
339	   register unsigned int depth,
340	   unsigned char delta[])
341{
342  if (!tree)
343    return;
344  treedelta(tree->llink, depth, delta);
345  treedelta(tree->rlink, depth, delta);
346  if (depth < delta[tree->label])
347    delta[tree->label] = depth;
348}
349
350/* Return true if A has every label in B. */
351static int
352hasevery (register struct tree *a, register struct tree *b)
353{
354  if (!b)
355    return 1;
356  if (!hasevery(a, b->llink))
357    return 0;
358  if (!hasevery(a, b->rlink))
359    return 0;
360  while (a && b->label != a->label)
361    if (b->label < a->label)
362      a = a->llink;
363    else
364      a = a->rlink;
365  return !!a;
366}
367
368/* Compute a vector, indexed by character code, of the trie nodes
369   referenced from the given tree. */
370static void
371treenext (struct tree *tree, struct trie *next[])
372{
373  if (!tree)
374    return;
375  treenext(tree->llink, next);
376  treenext(tree->rlink, next);
377  next[tree->label] = tree->trie;
378}
379
380/* Compute the shift for each trie node, as well as the delta
381   table and next cache for the given keyword set. */
382char *
383kwsprep (kwset_t kws)
384{
385  register struct kwset *kwset;
386  register int i;
387  register struct trie *curr, *fail;
388  register char *trans;
389  unsigned char delta[NCHAR];
390  struct trie *last, *next[NCHAR];
391
392  kwset = (struct kwset *) kws;
393
394  /* Initial values for the delta table; will be changed later.  The
395     delta entry for a given character is the smallest depth of any
396     node at which an outgoing edge is labeled by that character. */
397  if (kwset->mind < 256)
398    for (i = 0; i < NCHAR; ++i)
399      delta[i] = kwset->mind;
400  else
401    for (i = 0; i < NCHAR; ++i)
402      delta[i] = 255;
403
404  /* Check if we can use the simple boyer-moore algorithm, instead
405     of the hairy commentz-walter algorithm. */
406  if (kwset->words == 1 && kwset->trans == 0)
407    {
408      /* Looking for just one string.  Extract it from the trie. */
409      kwset->target = obstack_alloc(&kwset->obstack, kwset->mind);
410      for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
411	{
412	  kwset->target[i] = curr->links->label;
413	  curr = curr->links->trie;
414	}
415      /* Build the Boyer Moore delta.  Boy that's easy compared to CW. */
416      for (i = 0; i < kwset->mind; ++i)
417	delta[(unsigned char) kwset->target[i]] = kwset->mind - (i + 1);
418      kwset->mind2 = kwset->mind;
419      /* Find the minimal delta2 shift that we might make after
420	 a backwards match has failed. */
421      for (i = 0; i < kwset->mind - 1; ++i)
422	if (kwset->target[i] == kwset->target[kwset->mind - 1])
423	  kwset->mind2 = kwset->mind - (i + 1);
424    }
425  else
426    {
427      /* Traverse the nodes of the trie in level order, simultaneously
428	 computing the delta table, failure function, and shift function. */
429      for (curr = last = kwset->trie; curr; curr = curr->next)
430	{
431	  /* Enqueue the immediate descendents in the level order queue. */
432	  enqueue(curr->links, &last);
433
434	  curr->shift = kwset->mind;
435	  curr->maxshift = kwset->mind;
436
437	  /* Update the delta table for the descendents of this node. */
438	  treedelta(curr->links, curr->depth, delta);
439
440	  /* Compute the failure function for the decendents of this node. */
441	  treefails(curr->links, curr->fail, kwset->trie);
442
443	  /* Update the shifts at each node in the current node's chain
444	     of fails back to the root. */
445	  for (fail = curr->fail; fail; fail = fail->fail)
446	    {
447	      /* If the current node has some outgoing edge that the fail
448		 doesn't, then the shift at the fail should be no larger
449		 than the difference of their depths. */
450	      if (!hasevery(fail->links, curr->links))
451		if (curr->depth - fail->depth < fail->shift)
452		  fail->shift = curr->depth - fail->depth;
453
454	      /* If the current node is accepting then the shift at the
455		 fail and its descendents should be no larger than the
456		 difference of their depths. */
457	      if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
458		fail->maxshift = curr->depth - fail->depth;
459	    }
460	}
461
462      /* Traverse the trie in level order again, fixing up all nodes whose
463	 shift exceeds their inherited maxshift. */
464      for (curr = kwset->trie->next; curr; curr = curr->next)
465	{
466	  if (curr->maxshift > curr->parent->maxshift)
467	    curr->maxshift = curr->parent->maxshift;
468	  if (curr->shift > curr->maxshift)
469	    curr->shift = curr->maxshift;
470	}
471
472      /* Create a vector, indexed by character code, of the outgoing links
473	 from the root node. */
474      for (i = 0; i < NCHAR; ++i)
475	next[i] = 0;
476      treenext(kwset->trie->links, next);
477
478      if ((trans = kwset->trans) != 0)
479	for (i = 0; i < NCHAR; ++i)
480	  kwset->next[i] = next[(unsigned char) trans[i]];
481      else
482	for (i = 0; i < NCHAR; ++i)
483	  kwset->next[i] = next[i];
484    }
485
486  /* Fix things up for any translation table. */
487  if ((trans = kwset->trans) != 0)
488    for (i = 0; i < NCHAR; ++i)
489      kwset->delta[i] = delta[(unsigned char) trans[i]];
490  else
491    for (i = 0; i < NCHAR; ++i)
492      kwset->delta[i] = delta[i];
493
494  return 0;
495}
496
497#define U(C) ((unsigned char) (C))
498
499/* Fast boyer-moore search. */
500static char *
501bmexec (kwset_t kws, char *text, size_t size)
502{
503  struct kwset *kwset;
504  register unsigned char *d1;
505  register char *ep, *sp, *tp;
506  register int d, gc, i, len, md2;
507
508  kwset = (struct kwset *) kws;
509  len = kwset->mind;
510
511  if (len == 0)
512    return text;
513  if (len > size)
514    return 0;
515  if (len == 1)
516    return memchr(text, kwset->target[0], size);
517
518  d1 = kwset->delta;
519  sp = kwset->target + len;
520  gc = U(sp[-2]);
521  md2 = kwset->mind2;
522  tp = text + len;
523
524  /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
525  if (size > 12 * len)
526    /* 11 is not a bug, the initial offset happens only once. */
527    for (ep = text + size - 11 * len;;)
528      {
529	while (tp <= ep)
530	  {
531	    d = d1[U(tp[-1])], tp += d;
532	    d = d1[U(tp[-1])], tp += d;
533	    if (d == 0)
534	      goto found;
535	    d = d1[U(tp[-1])], tp += d;
536	    d = d1[U(tp[-1])], tp += d;
537	    d = d1[U(tp[-1])], tp += d;
538	    if (d == 0)
539	      goto found;
540	    d = d1[U(tp[-1])], tp += d;
541	    d = d1[U(tp[-1])], tp += d;
542	    d = d1[U(tp[-1])], tp += d;
543	    if (d == 0)
544	      goto found;
545	    d = d1[U(tp[-1])], tp += d;
546	    d = d1[U(tp[-1])], tp += d;
547	  }
548	break;
549      found:
550	if (U(tp[-2]) == gc)
551	  {
552	    for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
553	      ;
554	    if (i > len)
555	      return tp - len;
556	  }
557	tp += md2;
558      }
559
560  /* Now we have only a few characters left to search.  We
561     carefully avoid ever producing an out-of-bounds pointer. */
562  ep = text + size;
563  d = d1[U(tp[-1])];
564  while (d <= ep - tp)
565    {
566      d = d1[U((tp += d)[-1])];
567      if (d != 0)
568	continue;
569      if (U(tp[-2]) == gc)
570	{
571	  for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
572	    ;
573	  if (i > len)
574	    return tp - len;
575	}
576      d = md2;
577    }
578
579  return 0;
580}
581
582/* Hairy multiple string search. */
583static char *
584cwexec (kwset_t kws, char *text, size_t len, struct kwsmatch *kwsmatch)
585{
586  struct kwset *kwset;
587  struct trie **next, *trie, *accept;
588  char *beg, *lim, *mch, *lmch;
589  register unsigned char c, *delta;
590  register int d;
591  register char *end, *qlim;
592  register struct tree *tree;
593  register char *trans;
594
595#ifdef lint
596  accept = NULL;
597#endif
598
599  /* Initialize register copies and look for easy ways out. */
600  kwset = (struct kwset *) kws;
601  if (len < kwset->mind)
602    return 0;
603  next = kwset->next;
604  delta = kwset->delta;
605  trans = kwset->trans;
606  lim = text + len;
607  end = text;
608  if ((d = kwset->mind) != 0)
609    mch = 0;
610  else
611    {
612      mch = text, accept = kwset->trie;
613      goto match;
614    }
615
616  if (len >= 4 * kwset->mind)
617    qlim = lim - 4 * kwset->mind;
618  else
619    qlim = 0;
620
621  while (lim - end >= d)
622    {
623      if (qlim && end <= qlim)
624	{
625	  end += d - 1;
626	  while ((d = delta[c = *end]) && end < qlim)
627	    {
628	      end += d;
629	      end += delta[(unsigned char) *end];
630	      end += delta[(unsigned char) *end];
631	    }
632	  ++end;
633	}
634      else
635	d = delta[c = (end += d)[-1]];
636      if (d)
637	continue;
638      beg = end - 1;
639      trie = next[c];
640      if (trie->accepting)
641	{
642	  mch = beg;
643	  accept = trie;
644	}
645      d = trie->shift;
646      while (beg > text)
647	{
648	  c = trans ? trans[(unsigned char) *--beg] : *--beg;
649	  tree = trie->links;
650	  while (tree && c != tree->label)
651	    if (c < tree->label)
652	      tree = tree->llink;
653	    else
654	      tree = tree->rlink;
655	  if (tree)
656	    {
657	      trie = tree->trie;
658	      if (trie->accepting)
659		{
660		  mch = beg;
661		  accept = trie;
662		}
663	    }
664	  else
665	    break;
666	  d = trie->shift;
667	}
668      if (mch)
669	goto match;
670    }
671  return 0;
672
673 match:
674  /* Given a known match, find the longest possible match anchored
675     at or before its starting point.  This is nearly a verbatim
676     copy of the preceding main search loops. */
677  if (lim - mch > kwset->maxd)
678    lim = mch + kwset->maxd;
679  lmch = 0;
680  d = 1;
681  while (lim - end >= d)
682    {
683      if ((d = delta[c = (end += d)[-1]]) != 0)
684	continue;
685      beg = end - 1;
686      if (!(trie = next[c]))
687	{
688	  d = 1;
689	  continue;
690	}
691      if (trie->accepting && beg <= mch)
692	{
693	  lmch = beg;
694	  accept = trie;
695	}
696      d = trie->shift;
697      while (beg > text)
698	{
699	  c = trans ? trans[(unsigned char) *--beg] : *--beg;
700	  tree = trie->links;
701	  while (tree && c != tree->label)
702	    if (c < tree->label)
703	      tree = tree->llink;
704	    else
705	      tree = tree->rlink;
706	  if (tree)
707	    {
708	      trie = tree->trie;
709	      if (trie->accepting && beg <= mch)
710		{
711		  lmch = beg;
712		  accept = trie;
713		}
714	    }
715	  else
716	    break;
717	  d = trie->shift;
718	}
719      if (lmch)
720	{
721	  mch = lmch;
722	  goto match;
723	}
724      if (!d)
725	d = 1;
726    }
727
728  if (kwsmatch)
729    {
730      kwsmatch->index = accept->accepting / 2;
731      kwsmatch->beg[0] = mch;
732      kwsmatch->size[0] = accept->depth;
733    }
734  return mch;
735}
736
737/* Search through the given text for a match of any member of the
738   given keyword set.  Return a pointer to the first character of
739   the matching substring, or NULL if no match is found.  If FOUNDLEN
740   is non-NULL store in the referenced location the length of the
741   matching substring.  Similarly, if FOUNDIDX is non-NULL, store
742   in the referenced location the index number of the particular
743   keyword matched. */
744char *
745kwsexec (kwset_t kws, char *text, size_t size, struct kwsmatch *kwsmatch)
746{
747  struct kwset *kwset;
748  char *ret;
749
750  kwset = (struct kwset *) kws;
751  if (kwset->words == 1 && kwset->trans == 0)
752    {
753      ret = bmexec(kws, text, size);
754      if (kwsmatch != 0 && ret != 0)
755	{
756	  kwsmatch->index = 0;
757	  kwsmatch->beg[0] = ret;
758	  kwsmatch->size[0] = kwset->mind;
759	}
760      return ret;
761    }
762  else
763    return cwexec(kws, text, size, kwsmatch);
764}
765
766/* Free the components of the given keyword set. */
767void
768kwsfree (kwset_t kws)
769{
770  struct kwset *kwset;
771
772  kwset = (struct kwset *) kws;
773  obstack_free(&kwset->obstack, 0);
774  free(kws);
775}
776