1226271Sgabor/* $FreeBSD$ */
2226035Sgabor
3226035Sgabor/*-
4226035Sgabor * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
5226035Sgabor * Copyright (C) 2008-2011 Gabor Kovesdan <gabor@FreeBSD.org>
6226035Sgabor * All rights reserved.
7226035Sgabor *
8226035Sgabor * Redistribution and use in source and binary forms, with or without
9226035Sgabor * modification, are permitted provided that the following conditions
10226035Sgabor * are met:
11226035Sgabor * 1. Redistributions of source code must retain the above copyright
12226035Sgabor *    notice, this list of conditions and the following disclaimer.
13226035Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14226035Sgabor *    notice, this list of conditions and the following disclaimer in the
15226035Sgabor *    documentation and/or other materials provided with the distribution.
16226035Sgabor *
17226035Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18226035Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19226035Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20226035Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21226035Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22226035Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23226035Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24226035Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25226035Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26226035Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27226035Sgabor * SUCH DAMAGE.
28226035Sgabor */
29226035Sgabor
30226035Sgabor#include "glue.h"
31226035Sgabor
32226035Sgabor#include <ctype.h>
33226035Sgabor#include <limits.h>
34226035Sgabor#include <regex.h>
35226035Sgabor#include <stdbool.h>
36226035Sgabor#include <stdlib.h>
37226035Sgabor#include <string.h>
38226035Sgabor#ifdef TRE_WCHAR
39226035Sgabor#include <wchar.h>
40226035Sgabor#include <wctype.h>
41226035Sgabor#endif
42226035Sgabor
43226035Sgabor#include "hashtable.h"
44226035Sgabor#include "tre-fastmatch.h"
45226035Sgabor#include "xmalloc.h"
46226035Sgabor
47226035Sgaborstatic int	fastcmp(const fastmatch_t *fg, const void *data,
48226035Sgabor			tre_str_type_t type);
49226035Sgabor
50226035Sgabor/*
51226035Sgabor * Clean up if pattern compilation fails.
52226035Sgabor */
53226035Sgabor#define FAIL_COMP(errcode)						\
54226035Sgabor  {									\
55226035Sgabor    if (fg->pattern)							\
56226035Sgabor      xfree(fg->pattern);						\
57226035Sgabor    if (fg->wpattern)							\
58226035Sgabor      xfree(fg->wpattern);						\
59226035Sgabor    if (fg->qsBc_table)							\
60226035Sgabor      hashtable_free(fg->qsBc_table);					\
61226035Sgabor    fg = NULL;								\
62226035Sgabor    return errcode;							\
63226035Sgabor  }
64226035Sgabor
65226035Sgabor/*
66226035Sgabor * Skips n characters in the input string and assigns the start
67226035Sgabor * address to startptr. Note: as per IEEE Std 1003.1-2008
68226035Sgabor * matching is based on bit pattern not character representations
69226035Sgabor * so we can handle MB strings as byte sequences just like
70226035Sgabor * SB strings.
71226035Sgabor */
72226035Sgabor#define SKIP_CHARS(n)							\
73226035Sgabor  switch (type)								\
74226035Sgabor    {									\
75226035Sgabor      case STR_WIDE:							\
76226035Sgabor	startptr = str_wide + n;					\
77226035Sgabor	break;								\
78226035Sgabor      default:								\
79226035Sgabor	startptr = str_byte + n;					\
80226035Sgabor    }
81226035Sgabor
82226035Sgabor/*
83226035Sgabor * Converts the wide string pattern to SB/MB string and stores
84226035Sgabor * it in fg->pattern. Sets fg->len to the byte length of the
85226035Sgabor * converted string.
86226035Sgabor */
87226035Sgabor#define STORE_MBS_PAT							\
88226035Sgabor  {									\
89226035Sgabor    size_t siz;								\
90226035Sgabor									\
91226035Sgabor    siz = wcstombs(NULL, fg->wpattern, 0);				\
92226035Sgabor    if (siz == (size_t)-1)						\
93226035Sgabor      return REG_BADPAT;						\
94226035Sgabor    fg->len = siz;							\
95226035Sgabor    fg->pattern = xmalloc(siz + 1);					\
96226035Sgabor    if (fg->pattern == NULL)						\
97226035Sgabor      return REG_ESPACE;						\
98226035Sgabor    wcstombs(fg->pattern, fg->wpattern, siz);				\
99226035Sgabor    fg->pattern[siz] = '\0';						\
100226035Sgabor  }									\
101226035Sgabor
102226035Sgabor#define IS_OUT_OF_BOUNDS						\
103226035Sgabor  ((!fg->reversed							\
104226035Sgabor    ? ((type == STR_WIDE) ? ((j + fg->wlen) > len)			\
105226035Sgabor			  : ((j + fg->len) > len))			\
106248214Smarkj    : (j < 0)))
107226035Sgabor
108226035Sgabor/*
109226035Sgabor * Checks whether the new position after shifting in the input string
110226035Sgabor * is out of the bounds and break out from the loop if so.
111226035Sgabor */
112226035Sgabor#define CHECKBOUNDS							\
113226035Sgabor  if (IS_OUT_OF_BOUNDS)							\
114226035Sgabor    break;								\
115226035Sgabor
116226035Sgabor/*
117226035Sgabor * Shifts in the input string after a mismatch. The position of the
118226035Sgabor * mismatch is stored in the mismatch variable.
119226035Sgabor */
120226035Sgabor#define SHIFT								\
121226035Sgabor  CHECKBOUNDS;								\
122226035Sgabor									\
123226035Sgabor  {									\
124226035Sgabor    int r = -1;								\
125226035Sgabor    unsigned int bc = 0, gs = 0, ts;					\
126226035Sgabor									\
127226035Sgabor    switch (type)							\
128226035Sgabor      {									\
129226035Sgabor	case STR_WIDE:							\
130226035Sgabor	  if (!fg->hasdot)						\
131226035Sgabor	    {								\
132226035Sgabor	      if (u != 0 && (unsigned)mismatch == fg->wlen - 1 - shift)	\
133226035Sgabor		mismatch -= u;						\
134226035Sgabor	      v = fg->wlen - 1 - mismatch;				\
135226035Sgabor	      r = hashtable_get(fg->qsBc_table,				\
136226035Sgabor		&str_wide[!fg->reversed ? (size_t)j + fg->wlen		\
137226035Sgabor			  : (size_t)j - 1], &bc);			\
138226035Sgabor	      gs = fg->bmGs[mismatch];					\
139226035Sgabor	    }								\
140226035Sgabor	    bc = (r == HASH_OK) ? bc : fg->defBc;			\
141226035Sgabor	    DPRINT(("tre_fast_match: mismatch on character" CHF ", "	\
142226035Sgabor		    "BC %d, GS %d\n",					\
143226035Sgabor		    ((const tre_char_t *)startptr)[mismatch + 1],	\
144226035Sgabor		    bc, gs));						\
145226035Sgabor            break;							\
146226035Sgabor	default:							\
147226035Sgabor	  if (!fg->hasdot)						\
148226035Sgabor	    {								\
149226035Sgabor	      if (u != 0 && (unsigned)mismatch == fg->len - 1 - shift)	\
150226035Sgabor		mismatch -= u;						\
151226035Sgabor	      v = fg->len - 1 - mismatch;				\
152226035Sgabor	      gs = fg->sbmGs[mismatch];					\
153226035Sgabor	    }								\
154226035Sgabor	  bc = fg->qsBc[((const unsigned char *)str_byte)		\
155226035Sgabor			[!fg->reversed ? (size_t)j + fg->len		\
156226035Sgabor			 : (size_t)j - 1]];				\
157226035Sgabor	  DPRINT(("tre_fast_match: mismatch on character %c, "		\
158226035Sgabor		 "BC %d, GS %d\n",					\
159226035Sgabor		 ((const unsigned char *)startptr)[mismatch + 1],	\
160226035Sgabor		 bc, gs));						\
161226035Sgabor      }									\
162226035Sgabor    if (fg->hasdot)							\
163226035Sgabor      shift = bc;							\
164226035Sgabor    else								\
165226035Sgabor      {									\
166226047Sdelphij	ts = (u >= v) ? (u - v) : 0;					\
167226035Sgabor	shift = MAX(ts, bc);						\
168226035Sgabor	shift = MAX(shift, gs);						\
169226035Sgabor	if (shift == gs)						\
170226035Sgabor	  u = MIN((type == STR_WIDE ? fg->wlen : fg->len) - shift, v);	\
171226035Sgabor	else								\
172226035Sgabor	  {								\
173226035Sgabor	    if (ts < bc)						\
174226035Sgabor	      shift = MAX(shift, u + 1);				\
175226035Sgabor	    u = 0;							\
176226035Sgabor	  }								\
177226035Sgabor      }									\
178226035Sgabor      DPRINT(("tre_fast_match: shifting %u characters\n", shift));	\
179226035Sgabor      j = !fg->reversed ? j + shift : j - shift;			\
180226035Sgabor  }
181226035Sgabor
182226035Sgabor/*
183226035Sgabor * Normal Quick Search would require a shift based on the position the
184226035Sgabor * next character after the comparison is within the pattern.  With
185226035Sgabor * wildcards, the position of the last dot effects the maximum shift
186226035Sgabor * distance.
187226035Sgabor * The closer to the end the wild card is the slower the search.
188226035Sgabor *
189226035Sgabor * Examples:
190226035Sgabor * Pattern    Max shift
191226035Sgabor * -------    ---------
192226035Sgabor * this               5
193226035Sgabor * .his               4
194226035Sgabor * t.is               3
195226035Sgabor * th.s               2
196226035Sgabor * thi.               1
197226035Sgabor */
198226035Sgabor
199226035Sgabor/*
200226035Sgabor * Fills in the bad character shift array for SB/MB strings.
201226035Sgabor */
202226035Sgabor#define FILL_QSBC							\
203226035Sgabor  if (fg->reversed)							\
204226035Sgabor    {									\
205226035Sgabor      _FILL_QSBC_REVERSED						\
206226035Sgabor    }									\
207226035Sgabor  else									\
208226035Sgabor    {									\
209226035Sgabor      _FILL_QSBC							\
210226035Sgabor    }
211226035Sgabor
212226035Sgabor#define _FILL_QSBC							\
213226035Sgabor  for (unsigned int i = 0; i <= UCHAR_MAX; i++)				\
214226035Sgabor    fg->qsBc[i] = fg->len - hasdot;					\
215226035Sgabor  for (unsigned int i = hasdot + 1; i < fg->len; i++)			\
216226035Sgabor    {									\
217226035Sgabor      fg->qsBc[(unsigned char)fg->pattern[i]] = fg->len - i;		\
218226035Sgabor      DPRINT(("BC shift for char %c is %zu\n", fg->pattern[i],		\
219226035Sgabor	     fg->len - i));						\
220226035Sgabor      if (fg->icase)							\
221226035Sgabor	{								\
222226035Sgabor	  char c = islower((unsigned char)fg->pattern[i]) ?		\
223226035Sgabor		   toupper((unsigned char)fg->pattern[i]) :		\
224226035Sgabor		   tolower((unsigned char)fg->pattern[i]);		\
225226035Sgabor	  fg->qsBc[(unsigned char)c] = fg->len - i;			\
226226035Sgabor	  DPRINT(("BC shift for char %c is %zu\n", c, fg->len - i));	\
227226035Sgabor	}								\
228226035Sgabor    }
229226035Sgabor
230226035Sgabor#define _FILL_QSBC_REVERSED						\
231226035Sgabor  for (unsigned int i = 0; i <= UCHAR_MAX; i++)				\
232226035Sgabor    fg->qsBc[i] = firstdot + 1;						\
233226035Sgabor  for (int i = firstdot - 1; i >= 0; i--)				\
234226035Sgabor    {									\
235226035Sgabor      fg->qsBc[(unsigned char)fg->pattern[i]] = i + 1;			\
236226035Sgabor      DPRINT(("Reverse BC shift for char %c is %d\n", fg->pattern[i],	\
237226035Sgabor	     i + 1));							\
238226035Sgabor      if (fg->icase)							\
239226035Sgabor        {								\
240226035Sgabor          char c = islower((unsigned char)fg->pattern[i]) ?		\
241226035Sgabor		   toupper((unsigned char)fg->pattern[i]) :		\
242226035Sgabor		   tolower((unsigned char)fg->pattern[i]);		\
243226035Sgabor          fg->qsBc[(unsigned char)c] = i + 1;				\
244226035Sgabor	  DPRINT(("Reverse BC shift for char %c is %d\n", c, i + 1));	\
245226035Sgabor        }								\
246226035Sgabor    }
247226035Sgabor
248226035Sgabor/*
249226035Sgabor * Fills in the bad character shifts into a hastable for wide strings.
250226035Sgabor * With wide characters it is not possible any more to use a normal
251226035Sgabor * array because there are too many characters and we could not
252226035Sgabor * provide enough memory. Fortunately, we only have to store distinct
253226035Sgabor * values for so many characters as the number of distinct characters
254226035Sgabor * in the pattern, so we can store them in a hashtable and store a
255226035Sgabor * default shift value for the rest.
256226035Sgabor */
257226035Sgabor#define FILL_QSBC_WIDE							\
258226035Sgabor  if (fg->reversed)							\
259226035Sgabor    {									\
260226035Sgabor      _FILL_QSBC_WIDE_REVERSED						\
261226035Sgabor    }									\
262226035Sgabor  else									\
263226035Sgabor    {									\
264226035Sgabor      _FILL_QSBC_WIDE							\
265226035Sgabor    }
266226035Sgabor
267226035Sgabor#define _FILL_QSBC_WIDE							\
268226035Sgabor  /* Adjust the shift based on location of the last dot ('.'). */	\
269226035Sgabor  fg->defBc = fg->wlen - whasdot;					\
270226035Sgabor									\
271226035Sgabor  /* Preprocess pattern. */						\
272226035Sgabor  fg->qsBc_table = hashtable_init(fg->wlen * (fg->icase ? 8 : 4),	\
273226035Sgabor				  sizeof(tre_char_t), sizeof(int));	\
274226035Sgabor  if (!fg->qsBc_table)							\
275226035Sgabor    FAIL_COMP(REG_ESPACE);						\
276226035Sgabor  for (unsigned int i = whasdot + 1; i < fg->wlen; i++)			\
277226035Sgabor    {									\
278226035Sgabor      int k = fg->wlen - i;						\
279226035Sgabor      int r;								\
280226035Sgabor									\
281226035Sgabor      r = hashtable_put(fg->qsBc_table, &fg->wpattern[i], &k);		\
282226035Sgabor      if ((r == HASH_FAIL) || (r == HASH_FULL))				\
283226035Sgabor	FAIL_COMP(REG_ESPACE);						\
284226035Sgabor      DPRINT(("BC shift for wide char " CHF " is %d\n", fg->wpattern[i],\
285226035Sgabor	     k));							\
286226035Sgabor      if (fg->icase)							\
287226035Sgabor	{								\
288226035Sgabor	  tre_char_t wc = iswlower(fg->wpattern[i]) ?			\
289226035Sgabor	    towupper(fg->wpattern[i]) : towlower(fg->wpattern[i]);	\
290226035Sgabor	  r = hashtable_put(fg->qsBc_table, &wc, &k);			\
291226035Sgabor	  if ((r == HASH_FAIL) || (r == HASH_FULL))			\
292226035Sgabor	    FAIL_COMP(REG_ESPACE);					\
293226035Sgabor	  DPRINT(("BC shift for wide char " CHF " is %d\n", wc, k));	\
294226035Sgabor	}								\
295226035Sgabor    }
296226035Sgabor
297226035Sgabor#define _FILL_QSBC_WIDE_REVERSED					\
298226035Sgabor  /* Adjust the shift based on location of the last dot ('.'). */	\
299226035Sgabor  fg->defBc = (size_t)wfirstdot;					\
300226035Sgabor									\
301226035Sgabor  /* Preprocess pattern. */						\
302226035Sgabor  fg->qsBc_table = hashtable_init(fg->wlen * (fg->icase ? 8 : 4),	\
303226035Sgabor				  sizeof(tre_char_t), sizeof(int));	\
304226035Sgabor  if (!fg->qsBc_table)							\
305226035Sgabor    FAIL_COMP(REG_ESPACE);						\
306226035Sgabor  for (int i = wfirstdot - 1; i >= 0; i--)				\
307226035Sgabor    {									\
308226035Sgabor      int k = i + 1;							\
309226035Sgabor      int r;								\
310226035Sgabor									\
311226035Sgabor      r = hashtable_put(fg->qsBc_table, &fg->wpattern[i], &k);		\
312226035Sgabor      if ((r == HASH_FAIL) || (r == HASH_FULL))				\
313226035Sgabor	FAIL_COMP(REG_ESPACE);						\
314226035Sgabor      DPRINT(("Reverse BC shift for wide char " CHF " is %d\n",		\
315226035Sgabor	     fg->wpattern[i], k));					\
316226035Sgabor      if (fg->icase)							\
317226035Sgabor	{								\
318226035Sgabor	  tre_char_t wc = iswlower(fg->wpattern[i]) ?			\
319226035Sgabor	    towupper(fg->wpattern[i]) : towlower(fg->wpattern[i]);	\
320226035Sgabor	  r = hashtable_put(fg->qsBc_table, &wc, &k);			\
321226035Sgabor	  if ((r == HASH_FAIL) || (r == HASH_FULL))			\
322226035Sgabor	    FAIL_COMP(REG_ESPACE);					\
323226035Sgabor	  DPRINT(("Reverse BC shift for wide char " CHF " is %d\n", wc,	\
324226035Sgabor		 k));							\
325226035Sgabor	}								\
326226035Sgabor    }
327226035Sgabor
328226035Sgabor#ifdef _GREP_DEBUG
329226035Sgabor#define DPRINT_BMGS(len, fmt_str, sh)					\
330226035Sgabor  for (unsigned int i = 0; i < len; i++)				\
331226035Sgabor    DPRINT((fmt_str, i, sh[i]));
332226035Sgabor#else
333226035Sgabor#define DPRINT_BMGS(len, fmt_str, sh)					\
334226035Sgabor  do { } while(/*CONSTCOND*/0)
335226035Sgabor#endif
336226035Sgabor
337226035Sgabor/*
338226035Sgabor * Fills in the good suffix table for SB/MB strings.
339226035Sgabor */
340226035Sgabor#define FILL_BMGS							\
341226035Sgabor  if (!fg->hasdot)							\
342226035Sgabor    {									\
343226035Sgabor      fg->sbmGs = xmalloc(fg->len * sizeof(int));			\
344226035Sgabor      if (!fg->sbmGs)							\
345226035Sgabor	return REG_ESPACE;						\
346226035Sgabor      if (fg->len == 1)							\
347226035Sgabor	fg->sbmGs[0] = 1;						\
348226035Sgabor      else								\
349226035Sgabor	_FILL_BMGS(fg->sbmGs, fg->pattern, fg->len, false);		\
350226035Sgabor      DPRINT_BMGS(fg->len, "GS shift for pos %d is %d\n", fg->sbmGs);	\
351226035Sgabor    }
352226035Sgabor
353226035Sgabor/*
354226035Sgabor * Fills in the good suffix table for wide strings.
355226035Sgabor */
356226035Sgabor#define FILL_BMGS_WIDE							\
357226035Sgabor  if (!fg->hasdot)							\
358226035Sgabor    {									\
359226035Sgabor      fg->bmGs = xmalloc(fg->wlen * sizeof(int));			\
360226035Sgabor      if (!fg->bmGs)							\
361226035Sgabor	return REG_ESPACE;						\
362226035Sgabor      if (fg->wlen == 1)						\
363226035Sgabor	fg->bmGs[0] = 1;						\
364226035Sgabor      else								\
365226035Sgabor	_FILL_BMGS(fg->bmGs, fg->wpattern, fg->wlen, true);		\
366226035Sgabor      DPRINT_BMGS(fg->wlen, "GS shift (wide) for pos %d is %d\n",	\
367226035Sgabor		  fg->bmGs);						\
368226035Sgabor    }
369226035Sgabor
370226035Sgabor#define _FILL_BMGS(arr, pat, plen, wide)				\
371226035Sgabor  {									\
372226035Sgabor    char *p;								\
373226035Sgabor    tre_char_t *wp;							\
374226035Sgabor									\
375226035Sgabor    if (wide)								\
376226035Sgabor      {									\
377226035Sgabor	if (fg->icase)							\
378226035Sgabor	  {								\
379226035Sgabor	    wp = xmalloc(plen * sizeof(tre_char_t));			\
380226035Sgabor	    if (wp == NULL)						\
381226035Sgabor	      return REG_ESPACE;					\
382226035Sgabor	    for (unsigned int i = 0; i < plen; i++)			\
383226035Sgabor	      wp[i] = towlower(pat[i]);					\
384226035Sgabor	    _CALC_BMGS(arr, wp, plen);					\
385226035Sgabor	    xfree(wp);							\
386226035Sgabor	  }								\
387226035Sgabor	else								\
388226035Sgabor	  _CALC_BMGS(arr, pat, plen);					\
389226035Sgabor      }									\
390226035Sgabor    else								\
391226035Sgabor      {									\
392226035Sgabor	if (fg->icase)							\
393226035Sgabor	  {								\
394226035Sgabor	    p = xmalloc(plen);						\
395226035Sgabor	    if (p == NULL)						\
396226035Sgabor	      return REG_ESPACE;					\
397226035Sgabor	    for (unsigned int i = 0; i < plen; i++)			\
398253810Sache	      p[i] = tolower((unsigned char)pat[i]);                    \
399226035Sgabor	    _CALC_BMGS(arr, p, plen);					\
400226035Sgabor	    xfree(p);							\
401226035Sgabor	  }								\
402226035Sgabor	else								\
403226035Sgabor	  _CALC_BMGS(arr, pat, plen);					\
404226035Sgabor      }									\
405226035Sgabor  }
406226035Sgabor
407226035Sgabor#define _CALC_BMGS(arr, pat, plen)					\
408226035Sgabor  {									\
409226035Sgabor    int f = 0, g;							\
410226035Sgabor									\
411226035Sgabor    int *suff = xmalloc(plen * sizeof(int));				\
412226035Sgabor    if (suff == NULL)							\
413226035Sgabor      return REG_ESPACE;						\
414226035Sgabor									\
415226035Sgabor    suff[plen - 1] = plen;						\
416226035Sgabor    g = plen - 1;							\
417226035Sgabor    for (int i = plen - 2; i >= 0; i--)					\
418226035Sgabor      {									\
419226035Sgabor	if (i > g && suff[i + plen - 1 - f] < i - g)			\
420226035Sgabor	  suff[i] = suff[i + plen - 1 - f];				\
421226035Sgabor	else								\
422226035Sgabor	  {								\
423226035Sgabor	    if (i < g)							\
424226035Sgabor	      g = i;							\
425226035Sgabor	    f = i;							\
426226035Sgabor	    while (g >= 0 && pat[g] == pat[g + plen - 1 - f])		\
427226035Sgabor	      g--;							\
428226035Sgabor	    suff[i] = f - g;						\
429226035Sgabor	  }								\
430226035Sgabor      }									\
431226035Sgabor									\
432226035Sgabor    for (unsigned int i = 0; i < plen; i++)				\
433226035Sgabor      arr[i] = plen;							\
434226035Sgabor    g = 0;								\
435226035Sgabor    for (int i = plen - 1; i >= 0; i--)					\
436226035Sgabor      if (suff[i] == i + 1)						\
437226035Sgabor	for(; (unsigned long)g < plen - 1 - i; g++)			\
438226035Sgabor	  if (arr[g] == plen)						\
439226035Sgabor	    arr[g] = plen - 1 - i;					\
440226035Sgabor    for (unsigned int i = 0; i <= plen - 2; i++)			\
441226035Sgabor      arr[plen - 1 - suff[i]] = plen - 1 - i;				\
442226035Sgabor									\
443226035Sgabor    xfree(suff);							\
444226035Sgabor  }
445226035Sgabor
446226035Sgabor/*
447265160Spfg * Copies the pattern pat having length n to p and stores
448226035Sgabor * the size in l.
449226035Sgabor */
450226035Sgabor#define SAVE_PATTERN(src, srclen, dst, dstlen)				\
451226035Sgabor  dstlen = srclen;							\
452226035Sgabor  dst = xmalloc((dstlen + 1) * sizeof(tre_char_t));			\
453226035Sgabor  if (dst == NULL)							\
454226035Sgabor    return REG_ESPACE;							\
455226035Sgabor  if (dstlen > 0)							\
456226035Sgabor    memcpy(dst, src, dstlen * sizeof(tre_char_t));			\
457226035Sgabor  dst[dstlen] = TRE_CHAR('\0');
458226035Sgabor
459226035Sgabor/*
460226035Sgabor * Initializes pattern compiling.
461226035Sgabor */
462226035Sgabor#define INIT_COMP							\
463226035Sgabor  /* Initialize. */							\
464226035Sgabor  memset(fg, 0, sizeof(*fg));						\
465226035Sgabor  fg->icase = (cflags & REG_ICASE);					\
466226035Sgabor  fg->word = (cflags & REG_WORD);					\
467226035Sgabor  fg->newline = (cflags & REG_NEWLINE);					\
468226035Sgabor  fg->nosub = (cflags & REG_NOSUB);					\
469226035Sgabor									\
470226035Sgabor  /* Cannot handle REG_ICASE with MB string */				\
471245075Smarkj  if (fg->icase && (TRE_MB_CUR_MAX > 1) && n > 0)			\
472226035Sgabor    {									\
473226035Sgabor      DPRINT(("Cannot use fast matcher for MBS with REG_ICASE\n"));	\
474226035Sgabor      return REG_BADPAT;						\
475226035Sgabor    }
476226035Sgabor
477226035Sgabor/*
478226035Sgabor * Checks whether we have a 0-length pattern that will match
479226035Sgabor * anything. If literal is set to false, the EOL anchor is also
480226035Sgabor * taken into account.
481226035Sgabor */
482226035Sgabor#define CHECK_MATCHALL(literal)						\
483226035Sgabor  if (!literal && n == 1 && pat[0] == TRE_CHAR('$'))			\
484226035Sgabor    {									\
485226035Sgabor      n--;								\
486226035Sgabor      fg->eol = true;							\
487226035Sgabor    }									\
488226035Sgabor									\
489226035Sgabor  if (n == 0)								\
490226035Sgabor    {									\
491226035Sgabor      fg->matchall = true;						\
492226035Sgabor      fg->pattern = xmalloc(sizeof(char));				\
493226035Sgabor      if (!fg->pattern)							\
494226035Sgabor	FAIL_COMP(REG_ESPACE);						\
495226035Sgabor      fg->pattern[0] = '\0';						\
496226035Sgabor      fg->wpattern = xmalloc(sizeof(tre_char_t));			\
497226035Sgabor      if (!fg->wpattern)						\
498226035Sgabor	FAIL_COMP(REG_ESPACE);						\
499226035Sgabor      fg->wpattern[0] = TRE_CHAR('\0');					\
500226035Sgabor      DPRINT(("Matching every input\n"));				\
501226035Sgabor      return REG_OK;							\
502226035Sgabor    }
503226035Sgabor
504226035Sgabor/*
505226035Sgabor * Returns: REG_OK on success, error code otherwise
506226035Sgabor */
507226035Sgaborint
508226035Sgabortre_compile_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n,
509226035Sgabor		    int cflags)
510226035Sgabor{
511226035Sgabor  size_t hasdot = 0, whasdot = 0;
512226035Sgabor  ssize_t firstdot = -1, wfirstdot = -1;
513226035Sgabor
514226035Sgabor  INIT_COMP;
515226035Sgabor
516226035Sgabor  CHECK_MATCHALL(true);
517226035Sgabor
518226035Sgabor  /* Cannot handle word boundaries with MB string */
519226035Sgabor  if (fg->word && (TRE_MB_CUR_MAX > 1))
520226035Sgabor    return REG_BADPAT;
521226035Sgabor
522226035Sgabor#ifdef TRE_WCHAR
523226035Sgabor  SAVE_PATTERN(pat, n, fg->wpattern, fg->wlen);
524226035Sgabor  STORE_MBS_PAT;
525226035Sgabor#else
526226035Sgabor  SAVE_PATTERN(pat, n, fg->pattern, fg->len);
527226035Sgabor#endif
528226035Sgabor
529226035Sgabor  DPRINT(("tre_compile_literal: pattern: %s, len %zu, icase: %c, word: %c, "
530226035Sgabor	 "newline %c\n", fg->pattern, fg->len, fg->icase ? 'y' : 'n',
531226035Sgabor	 fg->word ? 'y' : 'n', fg->newline ? 'y' : 'n'));
532226035Sgabor
533226035Sgabor  FILL_QSBC;
534226035Sgabor  FILL_BMGS;
535226035Sgabor#ifdef TRE_WCHAR
536226035Sgabor  FILL_QSBC_WIDE;
537226035Sgabor  FILL_BMGS_WIDE;
538226035Sgabor#endif
539226035Sgabor
540226035Sgabor  return REG_OK;
541226035Sgabor}
542226035Sgabor
543226035Sgabor/*
544226035Sgabor * Returns: REG_OK on success, error code otherwise
545226035Sgabor */
546226035Sgaborint
547226035Sgabortre_compile_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n,
548226035Sgabor		 int cflags)
549226035Sgabor{
550226035Sgabor  tre_char_t *tmp;
551226271Sgabor  size_t pos = 0, hasdot = 0, whasdot = 0;
552226035Sgabor  ssize_t firstdot = -1, wfirstdot = -1;
553226035Sgabor  bool escaped = false;
554226035Sgabor  bool *_escmap = NULL;
555226035Sgabor
556226035Sgabor  INIT_COMP;
557226035Sgabor
558226035Sgabor  /* Remove beginning-of-line character ('^'). */
559226035Sgabor  if (pat[0] == TRE_CHAR('^'))
560226035Sgabor    {
561226035Sgabor      fg->bol = true;
562226035Sgabor      n--;
563226035Sgabor      pat++;
564226035Sgabor    }
565226035Sgabor
566226035Sgabor  CHECK_MATCHALL(false);
567226035Sgabor
568226035Sgabor  /* Handle word-boundary matching when GNU extensions are enabled */
569226035Sgabor  if ((cflags & REG_GNU) && (n >= 14) &&
570226035Sgabor      (memcmp(pat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) &&
571226035Sgabor      (memcmp(pat + n - 7, TRE_CHAR("[[:>:]]"),
572226035Sgabor	      7 * sizeof(tre_char_t)) == 0))
573226035Sgabor    {
574226035Sgabor      n -= 14;
575226035Sgabor      pat += 7;
576226035Sgabor      fg->word = true;
577226035Sgabor    }
578226035Sgabor
579226035Sgabor  /* Cannot handle word boundaries with MB string */
580226035Sgabor  if (fg->word && (TRE_MB_CUR_MAX > 1))
581226035Sgabor    return REG_BADPAT;
582226035Sgabor
583226035Sgabor  tmp = xmalloc((n + 1) * sizeof(tre_char_t));
584226035Sgabor  if (tmp == NULL)
585226035Sgabor    return REG_ESPACE;
586226035Sgabor
587226035Sgabor/* Copies the char into the stored pattern and skips to the next char. */
588226035Sgabor#define STORE_CHAR							\
589226035Sgabor  do									\
590226035Sgabor    {									\
591226035Sgabor      tmp[pos++] = pat[i];						\
592226035Sgabor      escaped = false;							\
593226035Sgabor      continue;								\
594226035Sgabor    } while (0)
595226035Sgabor
596226035Sgabor  /* Traverse the input pattern for processing */
597226035Sgabor  for (unsigned int i = 0; i < n; i++)
598226035Sgabor    {
599226035Sgabor      switch (pat[i])
600226035Sgabor	{
601226035Sgabor	  case TRE_CHAR('\\'):
602226035Sgabor	    if (escaped)
603226035Sgabor	      STORE_CHAR;
604226035Sgabor	    else if (i == n - 1)
605226035Sgabor	      goto badpat;
606226035Sgabor	    else
607226035Sgabor	      escaped = true;
608226035Sgabor	    continue;
609226035Sgabor	  case TRE_CHAR('['):
610226035Sgabor	    if (escaped)
611226035Sgabor	      STORE_CHAR;
612226035Sgabor	    else
613226035Sgabor	      goto badpat;
614226035Sgabor	    continue;
615226035Sgabor	  case TRE_CHAR('*'):
616226035Sgabor	    if (escaped || (!(cflags & REG_EXTENDED) && (i == 0)))
617226035Sgabor	      STORE_CHAR;
618226035Sgabor	    else
619226035Sgabor	      goto badpat;
620226035Sgabor	    continue;
621226035Sgabor	  case TRE_CHAR('+'):
622226035Sgabor	  case TRE_CHAR('?'):
623226035Sgabor	    if ((cflags & REG_EXTENDED) && (i == 0))
624303882Sdim	      goto badpat;
625226035Sgabor	    else if ((cflags & REG_EXTENDED) ^ !escaped)
626226035Sgabor	      STORE_CHAR;
627226035Sgabor	    else
628226035Sgabor	      goto badpat;
629226035Sgabor	    continue;
630226035Sgabor	  case TRE_CHAR('.'):
631226035Sgabor	    if (escaped)
632226035Sgabor	      {
633226035Sgabor		if (!_escmap)
634226035Sgabor		  _escmap = xmalloc(n * sizeof(bool));
635226035Sgabor		if (!_escmap)
636226035Sgabor		  {
637226035Sgabor		    xfree(tmp);
638226035Sgabor		    return REG_ESPACE;
639226035Sgabor		  }
640226035Sgabor		_escmap[i] = true;
641226035Sgabor		STORE_CHAR;
642226035Sgabor	      }
643226035Sgabor	    else
644226035Sgabor	      {
645226035Sgabor		whasdot = i;
646226035Sgabor		if (wfirstdot == -1)
647226035Sgabor			wfirstdot = i;
648226035Sgabor		STORE_CHAR;
649226035Sgabor	      }
650226035Sgabor	    continue;
651226035Sgabor	  case TRE_CHAR('^'):
652226035Sgabor	    STORE_CHAR;
653226035Sgabor	    continue;
654226035Sgabor	  case TRE_CHAR('$'):
655226035Sgabor	    if (!escaped && (i == n - 1))
656226035Sgabor	      fg->eol = true;
657226035Sgabor	    else
658226035Sgabor	      STORE_CHAR;
659226035Sgabor	    continue;
660226035Sgabor	  case TRE_CHAR('('):
661226035Sgabor	    if ((cflags & REG_EXTENDED) ^ escaped)
662226035Sgabor	      goto badpat;
663226035Sgabor	    else
664226035Sgabor	      STORE_CHAR;
665226035Sgabor	    continue;
666226035Sgabor	  case TRE_CHAR('{'):
667226035Sgabor	    if (!(cflags & REG_EXTENDED) ^ escaped)
668226035Sgabor	      STORE_CHAR;
669226035Sgabor	    else if (!(cflags & REG_EXTENDED) && (i == 0))
670226035Sgabor	      STORE_CHAR;
671226035Sgabor	    else if ((cflags & REG_EXTENDED) && (i == 0))
672226035Sgabor	      continue;
673226035Sgabor	    else
674226035Sgabor	      goto badpat;
675226035Sgabor	    continue;
676226035Sgabor	  case TRE_CHAR('|'):
677226035Sgabor	    if ((cflags & REG_EXTENDED) ^ escaped)
678226035Sgabor	      goto badpat;
679226035Sgabor	    else
680226035Sgabor	      STORE_CHAR;
681226035Sgabor	    continue;
682226035Sgabor	  default:
683226035Sgabor	    if (escaped)
684226035Sgabor	      goto badpat;
685226035Sgabor	    else
686226035Sgabor	      STORE_CHAR;
687226035Sgabor	    continue;
688226035Sgabor	}
689226035Sgabor      continue;
690226035Sgaborbadpat:
691226035Sgabor      xfree(tmp);
692226035Sgabor      DPRINT(("tre_compile_fast: compilation of pattern failed, falling"
693226035Sgabor	      "back to NFA\n"));
694226035Sgabor      return REG_BADPAT;
695226035Sgabor    }
696226035Sgabor
697226271Sgabor  fg->hasdot = wfirstdot > -1;
698226035Sgabor
699226035Sgabor  /*
700226035Sgabor   * The pattern has been processed and copied to tmp as a literal string
701226035Sgabor   * with escapes, anchors (^$) and the word boundary match character
702226035Sgabor   * classes stripped out.
703226035Sgabor   */
704226035Sgabor#ifdef TRE_WCHAR
705226035Sgabor  SAVE_PATTERN(tmp, pos, fg->wpattern, fg->wlen);
706226035Sgabor  fg->wescmap = _escmap;
707226035Sgabor  STORE_MBS_PAT;
708226035Sgabor
709226035Sgabor  /*
710226035Sgabor   * The position of dots and escaped dots is different in the MB string
711226035Sgabor   * than in to the wide string so traverse the converted string, as well,
712226035Sgabor   * to store these positions.
713226035Sgabor   */
714226035Sgabor  if (fg->hasdot || (fg->wescmap != NULL))
715226035Sgabor    {
716226035Sgabor      if (fg->wescmap != NULL)
717226035Sgabor	{
718226035Sgabor	  fg->escmap = xmalloc(fg->len * sizeof(bool));
719226035Sgabor	  if (!fg->escmap)
720226035Sgabor	    {
721226035Sgabor	      tre_free_fast(fg);
722226035Sgabor	      return REG_ESPACE;
723226035Sgabor	    }
724226035Sgabor	}
725226035Sgabor
726226035Sgabor      escaped = false;
727226035Sgabor      for (unsigned int i = 0; i < fg->len; i++)
728226035Sgabor	if (fg->pattern[i] == '\\')
729226035Sgabor	  escaped = !escaped;
730274757Semaste	else if (fg->pattern[i] == '.' && fg->escmap && escaped)
731226035Sgabor	  {
732226035Sgabor	    fg->escmap[i] = true;
733226035Sgabor	    escaped = false;
734226035Sgabor	  }
735226035Sgabor	else if (fg->pattern[i] == '.' && !escaped)
736226035Sgabor	  {
737226035Sgabor	    hasdot = i;
738226035Sgabor	    if (firstdot == -1)
739226035Sgabor	      firstdot = i;
740226035Sgabor	  }
741226035Sgabor	else
742226035Sgabor	  escaped = false;
743226035Sgabor    }
744226035Sgabor#else
745226035Sgabor  SAVE_PATTERN(tmp, pos, fg->pattern, fg->len);
746226035Sgabor  fg->escmap = _escmap;
747226035Sgabor#endif
748226035Sgabor
749226035Sgabor  xfree(tmp);
750226035Sgabor
751226035Sgabor  DPRINT(("tre_compile_fast: pattern: %s, len %zu, bol %c, eol %c, "
752226035Sgabor	 "icase: %c, word: %c, newline %c\n", fg->pattern, fg->len,
753226035Sgabor	 fg->bol ? 'y' : 'n', fg->eol ? 'y' : 'n',
754226035Sgabor	 fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n',
755226035Sgabor	 fg->newline ? 'y' : 'n'));
756226035Sgabor
757226035Sgabor  /* Check whether reverse QS algorithm is more efficient */
758226035Sgabor  if ((wfirstdot > -1) && (fg->wlen - whasdot + 1 < (size_t)wfirstdot) &&
759226035Sgabor      fg->nosub)
760226035Sgabor    {
761226035Sgabor      fg->reversed = true;
762226035Sgabor      DPRINT(("tre_compile_fast: using reverse QS algorithm\n"));
763226035Sgabor    }
764226035Sgabor
765226035Sgabor  FILL_QSBC;
766226035Sgabor  FILL_BMGS;
767226035Sgabor#ifdef TRE_WCHAR
768226035Sgabor  FILL_QSBC_WIDE;
769226035Sgabor  FILL_BMGS_WIDE;
770226035Sgabor#endif
771226035Sgabor
772226035Sgabor  return REG_OK;
773226035Sgabor}
774226035Sgabor
775226035Sgabor#define _SHIFT_ONE							\
776226035Sgabor  {									\
777226035Sgabor    shift = 1;								\
778226035Sgabor    j = !fg->reversed ? j + shift : j - shift;				\
779226035Sgabor    continue;								\
780226035Sgabor  }
781226035Sgabor
782226035Sgabor#define _BBOUND_COND							\
783226035Sgabor  ((type == STR_WIDE) ?							\
784226035Sgabor    ((j == 0) || !(tre_isalnum(str_wide[j - 1]) ||			\
785226035Sgabor      (str_wide[j - 1] == TRE_CHAR('_')))) :				\
786226035Sgabor    ((j == 0) || !(tre_isalnum(str_byte[j - 1]) ||			\
787226035Sgabor      (str_byte[j - 1] == '_'))))
788226035Sgabor
789226035Sgabor#define _EBOUND_COND							\
790226035Sgabor  ((type == STR_WIDE) ?							\
791226035Sgabor    ((j + fg->wlen == len) || !(tre_isalnum(str_wide[j + fg->wlen]) ||	\
792226035Sgabor      (str_wide[j + fg->wlen] == TRE_CHAR('_')))) :			\
793226035Sgabor    ((j + fg->len == len) || !(tre_isalnum(str_byte[j + fg->len]) ||	\
794226035Sgabor      (str_byte[j + fg->len] == '_'))))
795226035Sgabor
796226035Sgabor/*
797226035Sgabor * Condition to check whether the match on position j is on a
798226035Sgabor * word boundary.
799226035Sgabor */
800226035Sgabor#define IS_ON_WORD_BOUNDARY						\
801226035Sgabor  (_BBOUND_COND && _EBOUND_COND)
802226035Sgabor
803226035Sgabor/*
804226035Sgabor * Checks word boundary and shifts one if match is not on a
805226035Sgabor * boundary.
806226035Sgabor */
807226035Sgabor#define CHECK_WORD_BOUNDARY						\
808226035Sgabor    if (!IS_ON_WORD_BOUNDARY)						\
809226035Sgabor      _SHIFT_ONE;
810226035Sgabor
811226035Sgabor#define _BOL_COND							\
812226035Sgabor  ((j == 0) || ((type == STR_WIDE) ? (str_wide[j - 1] == TRE_CHAR('\n'))\
813226035Sgabor				   : (str_byte[j - 1] == '\n')))
814226035Sgabor
815226035Sgabor/*
816226035Sgabor * Checks BOL anchor and shifts one if match is not on a
817226035Sgabor * boundary.
818226035Sgabor */
819226035Sgabor#define CHECK_BOL_ANCHOR						\
820226035Sgabor    if (!_BOL_COND)							\
821226035Sgabor      _SHIFT_ONE;
822226035Sgabor
823226035Sgabor#define _EOL_COND							\
824226035Sgabor  ((type == STR_WIDE)							\
825226035Sgabor    ? ((j + fg->wlen == len) ||						\
826226035Sgabor		(str_wide[j + fg->wlen] == TRE_CHAR('\n')))		\
827226035Sgabor    : ((j + fg->len == len) || (str_byte[j + fg->wlen] == '\n')))
828226035Sgabor
829226035Sgabor/*
830226035Sgabor * Checks EOL anchor and shifts one if match is not on a
831226035Sgabor * boundary.
832226035Sgabor */
833226035Sgabor#define CHECK_EOL_ANCHOR						\
834226035Sgabor    if (!_EOL_COND)							\
835226035Sgabor      _SHIFT_ONE;
836226035Sgabor
837226035Sgabor/*
838226035Sgabor * Executes matching of the precompiled pattern on the input string.
839226035Sgabor * Returns REG_OK or REG_NOMATCH depending on if we find a match or not.
840226035Sgabor */
841226035Sgaborint
842226035Sgabortre_match_fast(const fastmatch_t *fg, const void *data, size_t len,
843226035Sgabor    tre_str_type_t type, int nmatch, regmatch_t pmatch[], int eflags)
844226035Sgabor{
845226035Sgabor  unsigned int shift, u = 0, v = 0;
846226035Sgabor  ssize_t j = 0;
847226035Sgabor  int ret = REG_NOMATCH;
848226035Sgabor  int mismatch;
849226035Sgabor  const char *str_byte = data;
850226035Sgabor  const void *startptr = NULL;
851226035Sgabor  const tre_char_t *str_wide = data;
852226035Sgabor
853226035Sgabor  /* Calculate length if unspecified. */
854226035Sgabor  if (len == (size_t)-1)
855226035Sgabor    switch (type)
856226035Sgabor      {
857226035Sgabor	case STR_WIDE:
858226035Sgabor	  len = tre_strlen(str_wide);
859226035Sgabor	  break;
860226035Sgabor	default:
861226035Sgabor	  len = strlen(str_byte);
862226035Sgabor	  break;
863226035Sgabor      }
864226035Sgabor
865226035Sgabor  /* Shortcut for empty pattern */
866226035Sgabor  if (fg->matchall)
867226035Sgabor    {
868226035Sgabor      if (!fg->nosub && nmatch >= 1)
869226035Sgabor	{
870226035Sgabor	  pmatch[0].rm_so = 0;
871226035Sgabor	  pmatch[0].rm_eo = len;
872226035Sgabor	}
873226035Sgabor      if (fg->bol && fg->eol)
874226035Sgabor	return (len == 0) ? REG_OK : REG_NOMATCH;
875226035Sgabor      else
876226035Sgabor	return REG_OK;
877226035Sgabor    }
878226035Sgabor
879226035Sgabor  /* No point in going farther if we do not have enough data. */
880226035Sgabor  switch (type)
881226035Sgabor    {
882226035Sgabor      case STR_WIDE:
883226035Sgabor	if (len < fg->wlen)
884226035Sgabor	  return ret;
885226035Sgabor	shift = fg->wlen;
886226035Sgabor	break;
887226035Sgabor      default:
888226035Sgabor	if (len < fg->len)
889226035Sgabor	  return ret;
890226035Sgabor	shift = fg->len;
891226035Sgabor    }
892226035Sgabor
893226035Sgabor  /*
894226035Sgabor   * REG_NOTBOL means not anchoring ^ to the beginning of the line, so we
895226035Sgabor   * can shift one because there can't be a match at the beginning.
896226035Sgabor   */
897226035Sgabor  if (fg->bol && (eflags & REG_NOTBOL))
898226035Sgabor    j = 1;
899226035Sgabor
900226035Sgabor  /*
901226035Sgabor   * Like above, we cannot have a match at the very end when anchoring to
902226035Sgabor   * the end and REG_NOTEOL is specified.
903226035Sgabor   */
904226035Sgabor  if (fg->eol && (eflags & REG_NOTEOL))
905226035Sgabor    len--;
906226035Sgabor
907226035Sgabor  if (fg->reversed)
908226035Sgabor    j = len - (type == STR_WIDE ? fg->wlen : fg->len);
909226035Sgabor
910226035Sgabor
911226035Sgabor  /* Only try once at the beginning or ending of the line. */
912226035Sgabor  if ((fg->bol || fg->eol) && !fg->newline && !(eflags & REG_NOTBOL) &&
913226035Sgabor      !(eflags & REG_NOTEOL))
914226035Sgabor    {
915226035Sgabor      /* Simple text comparison. */
916226035Sgabor      if (!((fg->bol && fg->eol) &&
917226035Sgabor	  (type == STR_WIDE ? (len != fg->wlen) : (len != fg->len))))
918226035Sgabor	{
919226035Sgabor	  /* Determine where in data to start search at. */
920226035Sgabor	  j = fg->eol ? len - (type == STR_WIDE ? fg->wlen : fg->len) : 0;
921226035Sgabor	  SKIP_CHARS(j);
922226035Sgabor	  mismatch = fastcmp(fg, startptr, type);
923226035Sgabor	  if (mismatch == REG_OK)
924226035Sgabor	    {
925226035Sgabor	      if (fg->word && !IS_ON_WORD_BOUNDARY)
926226035Sgabor		return ret;
927226035Sgabor	      if (!fg->nosub && nmatch >= 1)
928226035Sgabor		{
929226035Sgabor		  pmatch[0].rm_so = j;
930226035Sgabor		  pmatch[0].rm_eo = j + (type == STR_WIDE ? fg->wlen : fg->len);
931226035Sgabor		}
932226035Sgabor	      return REG_OK;
933226035Sgabor            }
934226035Sgabor        }
935226035Sgabor    }
936226035Sgabor  else
937226035Sgabor    {
938226035Sgabor      /* Quick Search / Turbo Boyer-Moore algorithm. */
939226035Sgabor      do
940226035Sgabor	{
941226035Sgabor	  SKIP_CHARS(j);
942226035Sgabor	  mismatch = fastcmp(fg, startptr, type);
943226035Sgabor	  if (mismatch == REG_OK)
944226035Sgabor	    {
945226035Sgabor	      if (fg->word)
946226035Sgabor		CHECK_WORD_BOUNDARY;
947226035Sgabor	      if (fg->bol)
948226035Sgabor		CHECK_BOL_ANCHOR;
949226035Sgabor	      if (fg->eol)
950226035Sgabor		CHECK_EOL_ANCHOR;
951226035Sgabor	      if (!fg->nosub && nmatch >= 1)
952226035Sgabor		{
953226035Sgabor		  pmatch[0].rm_so = j;
954226035Sgabor		  pmatch[0].rm_eo = j + ((type == STR_WIDE) ? fg->wlen : fg->len);
955226035Sgabor		}
956226035Sgabor	      return REG_OK;
957226035Sgabor	    }
958226035Sgabor	  else if (mismatch > 0)
959226035Sgabor	    return mismatch;
960226035Sgabor	  mismatch = -mismatch - 1;
961226035Sgabor	  SHIFT;
962226035Sgabor        } while (!IS_OUT_OF_BOUNDS);
963226035Sgabor    }
964226035Sgabor    return ret;
965226035Sgabor}
966226035Sgabor
967226035Sgabor/*
968226035Sgabor * Frees the resources that were allocated when the pattern was compiled.
969226035Sgabor */
970226035Sgaborvoid
971226035Sgabortre_free_fast(fastmatch_t *fg)
972226035Sgabor{
973226035Sgabor
974226035Sgabor  DPRINT(("tre_fast_free: freeing structures for pattern %s\n",
975226035Sgabor	 fg->pattern));
976226035Sgabor
977226035Sgabor#ifdef TRE_WCHAR
978226035Sgabor  hashtable_free(fg->qsBc_table);
979226035Sgabor  if (!fg->hasdot)
980226035Sgabor    xfree(fg->bmGs);
981226035Sgabor  if (fg->wescmap)
982226035Sgabor    xfree(fg->wescmap);
983226035Sgabor  xfree(fg->wpattern);
984226035Sgabor#endif
985226035Sgabor  if (!fg->hasdot)
986226035Sgabor    xfree(fg->sbmGs);
987226035Sgabor  if (fg->escmap)
988226035Sgabor    xfree(fg->escmap);
989226035Sgabor  xfree(fg->pattern);
990226035Sgabor}
991226035Sgabor
992226035Sgabor/*
993226035Sgabor * Returns:	-(i + 1) on failure (position that it failed with minus sign)
994226035Sgabor *		error code on error
995226035Sgabor *		REG_OK on success
996226035Sgabor */
997226035Sgaborstatic inline int
998226035Sgaborfastcmp(const fastmatch_t *fg, const void *data, tre_str_type_t type)
999226035Sgabor{
1000226035Sgabor  const char *str_byte = data;
1001226035Sgabor  const char *pat_byte = fg->pattern;
1002226035Sgabor  const tre_char_t *str_wide = data;
1003226035Sgabor  const tre_char_t *pat_wide = fg->wpattern;
1004226035Sgabor  const bool *escmap = (type == STR_WIDE) ? fg->wescmap : fg->escmap;
1005226035Sgabor  size_t len = (type == STR_WIDE) ? fg->wlen : fg->len;
1006226035Sgabor  int ret = REG_OK;
1007226035Sgabor
1008226035Sgabor  /* Compare the pattern and the input char-by-char from the last position. */
1009226035Sgabor  for (int i = len - 1; i >= 0; i--) {
1010226035Sgabor    switch (type)
1011226035Sgabor      {
1012226035Sgabor	case STR_WIDE:
1013226035Sgabor
1014226035Sgabor	  /* Check dot */
1015226035Sgabor	  if (fg->hasdot && pat_wide[i] == TRE_CHAR('.') &&
1016226035Sgabor	      (!escmap || !escmap[i]) &&
1017226035Sgabor	      (!fg->newline || (str_wide[i] != TRE_CHAR('\n'))))
1018226035Sgabor	    continue;
1019226035Sgabor
1020226035Sgabor	  /* Compare */
1021226035Sgabor	  if (fg->icase ? (towlower(pat_wide[i]) == towlower(str_wide[i]))
1022226035Sgabor		    : (pat_wide[i] == str_wide[i]))
1023226035Sgabor	    continue;
1024226035Sgabor	  break;
1025226035Sgabor	default:
1026226035Sgabor	  /* Check dot */
1027226035Sgabor	  if (fg->hasdot && pat_byte[i] == '.' &&
1028226035Sgabor	      (!escmap || !escmap[i]) &&
1029226035Sgabor	      (!fg->newline || (str_byte[i] != '\n')))
1030226035Sgabor	    continue;
1031226035Sgabor
1032226035Sgabor	  /* Compare */
1033253810Sache	  if (fg->icase ? (tolower((unsigned char)pat_byte[i]) == tolower((unsigned char)str_byte[i]))
1034226035Sgabor		    : (pat_byte[i] == str_byte[i]))
1035226035Sgabor	  continue;
1036226035Sgabor      }
1037226035Sgabor    DPRINT(("fastcmp: mismatch at position %d\n", i));
1038226035Sgabor    ret = -(i + 1);
1039226035Sgabor    break;
1040226035Sgabor  }
1041226035Sgabor  return ret;
1042226035Sgabor}
1043