1226271Sgabor/* $FreeBSD$ */ 2226035Sgabor 3226035Sgabor/*- 4226035Sgabor * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav 5226035Sgabor * Copyright (C) 2008-2011 Gabor Kovesdan <gabor@FreeBSD.org> 6226035Sgabor * All rights reserved. 7226035Sgabor * 8226035Sgabor * Redistribution and use in source and binary forms, with or without 9226035Sgabor * modification, are permitted provided that the following conditions 10226035Sgabor * are met: 11226035Sgabor * 1. Redistributions of source code must retain the above copyright 12226035Sgabor * notice, this list of conditions and the following disclaimer. 13226035Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14226035Sgabor * notice, this list of conditions and the following disclaimer in the 15226035Sgabor * documentation and/or other materials provided with the distribution. 16226035Sgabor * 17226035Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18226035Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19226035Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20226035Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21226035Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22226035Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23226035Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24226035Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25226035Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26226035Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27226035Sgabor * SUCH DAMAGE. 28226035Sgabor */ 29226035Sgabor 30226035Sgabor#include "glue.h" 31226035Sgabor 32226035Sgabor#include <ctype.h> 33226035Sgabor#include <limits.h> 34226035Sgabor#include <regex.h> 35226035Sgabor#include <stdbool.h> 36226035Sgabor#include <stdlib.h> 37226035Sgabor#include <string.h> 38226035Sgabor#ifdef TRE_WCHAR 39226035Sgabor#include <wchar.h> 40226035Sgabor#include <wctype.h> 41226035Sgabor#endif 42226035Sgabor 43226035Sgabor#include "hashtable.h" 44226035Sgabor#include "tre-fastmatch.h" 45226035Sgabor#include "xmalloc.h" 46226035Sgabor 47226035Sgaborstatic int fastcmp(const fastmatch_t *fg, const void *data, 48226035Sgabor tre_str_type_t type); 49226035Sgabor 50226035Sgabor/* 51226035Sgabor * Clean up if pattern compilation fails. 52226035Sgabor */ 53226035Sgabor#define FAIL_COMP(errcode) \ 54226035Sgabor { \ 55226035Sgabor if (fg->pattern) \ 56226035Sgabor xfree(fg->pattern); \ 57226035Sgabor if (fg->wpattern) \ 58226035Sgabor xfree(fg->wpattern); \ 59226035Sgabor if (fg->qsBc_table) \ 60226035Sgabor hashtable_free(fg->qsBc_table); \ 61226035Sgabor fg = NULL; \ 62226035Sgabor return errcode; \ 63226035Sgabor } 64226035Sgabor 65226035Sgabor/* 66226035Sgabor * Skips n characters in the input string and assigns the start 67226035Sgabor * address to startptr. Note: as per IEEE Std 1003.1-2008 68226035Sgabor * matching is based on bit pattern not character representations 69226035Sgabor * so we can handle MB strings as byte sequences just like 70226035Sgabor * SB strings. 71226035Sgabor */ 72226035Sgabor#define SKIP_CHARS(n) \ 73226035Sgabor switch (type) \ 74226035Sgabor { \ 75226035Sgabor case STR_WIDE: \ 76226035Sgabor startptr = str_wide + n; \ 77226035Sgabor break; \ 78226035Sgabor default: \ 79226035Sgabor startptr = str_byte + n; \ 80226035Sgabor } 81226035Sgabor 82226035Sgabor/* 83226035Sgabor * Converts the wide string pattern to SB/MB string and stores 84226035Sgabor * it in fg->pattern. Sets fg->len to the byte length of the 85226035Sgabor * converted string. 86226035Sgabor */ 87226035Sgabor#define STORE_MBS_PAT \ 88226035Sgabor { \ 89226035Sgabor size_t siz; \ 90226035Sgabor \ 91226035Sgabor siz = wcstombs(NULL, fg->wpattern, 0); \ 92226035Sgabor if (siz == (size_t)-1) \ 93226035Sgabor return REG_BADPAT; \ 94226035Sgabor fg->len = siz; \ 95226035Sgabor fg->pattern = xmalloc(siz + 1); \ 96226035Sgabor if (fg->pattern == NULL) \ 97226035Sgabor return REG_ESPACE; \ 98226035Sgabor wcstombs(fg->pattern, fg->wpattern, siz); \ 99226035Sgabor fg->pattern[siz] = '\0'; \ 100226035Sgabor } \ 101226035Sgabor 102226035Sgabor#define IS_OUT_OF_BOUNDS \ 103226035Sgabor ((!fg->reversed \ 104226035Sgabor ? ((type == STR_WIDE) ? ((j + fg->wlen) > len) \ 105226035Sgabor : ((j + fg->len) > len)) \ 106248214Smarkj : (j < 0))) 107226035Sgabor 108226035Sgabor/* 109226035Sgabor * Checks whether the new position after shifting in the input string 110226035Sgabor * is out of the bounds and break out from the loop if so. 111226035Sgabor */ 112226035Sgabor#define CHECKBOUNDS \ 113226035Sgabor if (IS_OUT_OF_BOUNDS) \ 114226035Sgabor break; \ 115226035Sgabor 116226035Sgabor/* 117226035Sgabor * Shifts in the input string after a mismatch. The position of the 118226035Sgabor * mismatch is stored in the mismatch variable. 119226035Sgabor */ 120226035Sgabor#define SHIFT \ 121226035Sgabor CHECKBOUNDS; \ 122226035Sgabor \ 123226035Sgabor { \ 124226035Sgabor int r = -1; \ 125226035Sgabor unsigned int bc = 0, gs = 0, ts; \ 126226035Sgabor \ 127226035Sgabor switch (type) \ 128226035Sgabor { \ 129226035Sgabor case STR_WIDE: \ 130226035Sgabor if (!fg->hasdot) \ 131226035Sgabor { \ 132226035Sgabor if (u != 0 && (unsigned)mismatch == fg->wlen - 1 - shift) \ 133226035Sgabor mismatch -= u; \ 134226035Sgabor v = fg->wlen - 1 - mismatch; \ 135226035Sgabor r = hashtable_get(fg->qsBc_table, \ 136226035Sgabor &str_wide[!fg->reversed ? (size_t)j + fg->wlen \ 137226035Sgabor : (size_t)j - 1], &bc); \ 138226035Sgabor gs = fg->bmGs[mismatch]; \ 139226035Sgabor } \ 140226035Sgabor bc = (r == HASH_OK) ? bc : fg->defBc; \ 141226035Sgabor DPRINT(("tre_fast_match: mismatch on character" CHF ", " \ 142226035Sgabor "BC %d, GS %d\n", \ 143226035Sgabor ((const tre_char_t *)startptr)[mismatch + 1], \ 144226035Sgabor bc, gs)); \ 145226035Sgabor break; \ 146226035Sgabor default: \ 147226035Sgabor if (!fg->hasdot) \ 148226035Sgabor { \ 149226035Sgabor if (u != 0 && (unsigned)mismatch == fg->len - 1 - shift) \ 150226035Sgabor mismatch -= u; \ 151226035Sgabor v = fg->len - 1 - mismatch; \ 152226035Sgabor gs = fg->sbmGs[mismatch]; \ 153226035Sgabor } \ 154226035Sgabor bc = fg->qsBc[((const unsigned char *)str_byte) \ 155226035Sgabor [!fg->reversed ? (size_t)j + fg->len \ 156226035Sgabor : (size_t)j - 1]]; \ 157226035Sgabor DPRINT(("tre_fast_match: mismatch on character %c, " \ 158226035Sgabor "BC %d, GS %d\n", \ 159226035Sgabor ((const unsigned char *)startptr)[mismatch + 1], \ 160226035Sgabor bc, gs)); \ 161226035Sgabor } \ 162226035Sgabor if (fg->hasdot) \ 163226035Sgabor shift = bc; \ 164226035Sgabor else \ 165226035Sgabor { \ 166226047Sdelphij ts = (u >= v) ? (u - v) : 0; \ 167226035Sgabor shift = MAX(ts, bc); \ 168226035Sgabor shift = MAX(shift, gs); \ 169226035Sgabor if (shift == gs) \ 170226035Sgabor u = MIN((type == STR_WIDE ? fg->wlen : fg->len) - shift, v); \ 171226035Sgabor else \ 172226035Sgabor { \ 173226035Sgabor if (ts < bc) \ 174226035Sgabor shift = MAX(shift, u + 1); \ 175226035Sgabor u = 0; \ 176226035Sgabor } \ 177226035Sgabor } \ 178226035Sgabor DPRINT(("tre_fast_match: shifting %u characters\n", shift)); \ 179226035Sgabor j = !fg->reversed ? j + shift : j - shift; \ 180226035Sgabor } 181226035Sgabor 182226035Sgabor/* 183226035Sgabor * Normal Quick Search would require a shift based on the position the 184226035Sgabor * next character after the comparison is within the pattern. With 185226035Sgabor * wildcards, the position of the last dot effects the maximum shift 186226035Sgabor * distance. 187226035Sgabor * The closer to the end the wild card is the slower the search. 188226035Sgabor * 189226035Sgabor * Examples: 190226035Sgabor * Pattern Max shift 191226035Sgabor * ------- --------- 192226035Sgabor * this 5 193226035Sgabor * .his 4 194226035Sgabor * t.is 3 195226035Sgabor * th.s 2 196226035Sgabor * thi. 1 197226035Sgabor */ 198226035Sgabor 199226035Sgabor/* 200226035Sgabor * Fills in the bad character shift array for SB/MB strings. 201226035Sgabor */ 202226035Sgabor#define FILL_QSBC \ 203226035Sgabor if (fg->reversed) \ 204226035Sgabor { \ 205226035Sgabor _FILL_QSBC_REVERSED \ 206226035Sgabor } \ 207226035Sgabor else \ 208226035Sgabor { \ 209226035Sgabor _FILL_QSBC \ 210226035Sgabor } 211226035Sgabor 212226035Sgabor#define _FILL_QSBC \ 213226035Sgabor for (unsigned int i = 0; i <= UCHAR_MAX; i++) \ 214226035Sgabor fg->qsBc[i] = fg->len - hasdot; \ 215226035Sgabor for (unsigned int i = hasdot + 1; i < fg->len; i++) \ 216226035Sgabor { \ 217226035Sgabor fg->qsBc[(unsigned char)fg->pattern[i]] = fg->len - i; \ 218226035Sgabor DPRINT(("BC shift for char %c is %zu\n", fg->pattern[i], \ 219226035Sgabor fg->len - i)); \ 220226035Sgabor if (fg->icase) \ 221226035Sgabor { \ 222226035Sgabor char c = islower((unsigned char)fg->pattern[i]) ? \ 223226035Sgabor toupper((unsigned char)fg->pattern[i]) : \ 224226035Sgabor tolower((unsigned char)fg->pattern[i]); \ 225226035Sgabor fg->qsBc[(unsigned char)c] = fg->len - i; \ 226226035Sgabor DPRINT(("BC shift for char %c is %zu\n", c, fg->len - i)); \ 227226035Sgabor } \ 228226035Sgabor } 229226035Sgabor 230226035Sgabor#define _FILL_QSBC_REVERSED \ 231226035Sgabor for (unsigned int i = 0; i <= UCHAR_MAX; i++) \ 232226035Sgabor fg->qsBc[i] = firstdot + 1; \ 233226035Sgabor for (int i = firstdot - 1; i >= 0; i--) \ 234226035Sgabor { \ 235226035Sgabor fg->qsBc[(unsigned char)fg->pattern[i]] = i + 1; \ 236226035Sgabor DPRINT(("Reverse BC shift for char %c is %d\n", fg->pattern[i], \ 237226035Sgabor i + 1)); \ 238226035Sgabor if (fg->icase) \ 239226035Sgabor { \ 240226035Sgabor char c = islower((unsigned char)fg->pattern[i]) ? \ 241226035Sgabor toupper((unsigned char)fg->pattern[i]) : \ 242226035Sgabor tolower((unsigned char)fg->pattern[i]); \ 243226035Sgabor fg->qsBc[(unsigned char)c] = i + 1; \ 244226035Sgabor DPRINT(("Reverse BC shift for char %c is %d\n", c, i + 1)); \ 245226035Sgabor } \ 246226035Sgabor } 247226035Sgabor 248226035Sgabor/* 249226035Sgabor * Fills in the bad character shifts into a hastable for wide strings. 250226035Sgabor * With wide characters it is not possible any more to use a normal 251226035Sgabor * array because there are too many characters and we could not 252226035Sgabor * provide enough memory. Fortunately, we only have to store distinct 253226035Sgabor * values for so many characters as the number of distinct characters 254226035Sgabor * in the pattern, so we can store them in a hashtable and store a 255226035Sgabor * default shift value for the rest. 256226035Sgabor */ 257226035Sgabor#define FILL_QSBC_WIDE \ 258226035Sgabor if (fg->reversed) \ 259226035Sgabor { \ 260226035Sgabor _FILL_QSBC_WIDE_REVERSED \ 261226035Sgabor } \ 262226035Sgabor else \ 263226035Sgabor { \ 264226035Sgabor _FILL_QSBC_WIDE \ 265226035Sgabor } 266226035Sgabor 267226035Sgabor#define _FILL_QSBC_WIDE \ 268226035Sgabor /* Adjust the shift based on location of the last dot ('.'). */ \ 269226035Sgabor fg->defBc = fg->wlen - whasdot; \ 270226035Sgabor \ 271226035Sgabor /* Preprocess pattern. */ \ 272226035Sgabor fg->qsBc_table = hashtable_init(fg->wlen * (fg->icase ? 8 : 4), \ 273226035Sgabor sizeof(tre_char_t), sizeof(int)); \ 274226035Sgabor if (!fg->qsBc_table) \ 275226035Sgabor FAIL_COMP(REG_ESPACE); \ 276226035Sgabor for (unsigned int i = whasdot + 1; i < fg->wlen; i++) \ 277226035Sgabor { \ 278226035Sgabor int k = fg->wlen - i; \ 279226035Sgabor int r; \ 280226035Sgabor \ 281226035Sgabor r = hashtable_put(fg->qsBc_table, &fg->wpattern[i], &k); \ 282226035Sgabor if ((r == HASH_FAIL) || (r == HASH_FULL)) \ 283226035Sgabor FAIL_COMP(REG_ESPACE); \ 284226035Sgabor DPRINT(("BC shift for wide char " CHF " is %d\n", fg->wpattern[i],\ 285226035Sgabor k)); \ 286226035Sgabor if (fg->icase) \ 287226035Sgabor { \ 288226035Sgabor tre_char_t wc = iswlower(fg->wpattern[i]) ? \ 289226035Sgabor towupper(fg->wpattern[i]) : towlower(fg->wpattern[i]); \ 290226035Sgabor r = hashtable_put(fg->qsBc_table, &wc, &k); \ 291226035Sgabor if ((r == HASH_FAIL) || (r == HASH_FULL)) \ 292226035Sgabor FAIL_COMP(REG_ESPACE); \ 293226035Sgabor DPRINT(("BC shift for wide char " CHF " is %d\n", wc, k)); \ 294226035Sgabor } \ 295226035Sgabor } 296226035Sgabor 297226035Sgabor#define _FILL_QSBC_WIDE_REVERSED \ 298226035Sgabor /* Adjust the shift based on location of the last dot ('.'). */ \ 299226035Sgabor fg->defBc = (size_t)wfirstdot; \ 300226035Sgabor \ 301226035Sgabor /* Preprocess pattern. */ \ 302226035Sgabor fg->qsBc_table = hashtable_init(fg->wlen * (fg->icase ? 8 : 4), \ 303226035Sgabor sizeof(tre_char_t), sizeof(int)); \ 304226035Sgabor if (!fg->qsBc_table) \ 305226035Sgabor FAIL_COMP(REG_ESPACE); \ 306226035Sgabor for (int i = wfirstdot - 1; i >= 0; i--) \ 307226035Sgabor { \ 308226035Sgabor int k = i + 1; \ 309226035Sgabor int r; \ 310226035Sgabor \ 311226035Sgabor r = hashtable_put(fg->qsBc_table, &fg->wpattern[i], &k); \ 312226035Sgabor if ((r == HASH_FAIL) || (r == HASH_FULL)) \ 313226035Sgabor FAIL_COMP(REG_ESPACE); \ 314226035Sgabor DPRINT(("Reverse BC shift for wide char " CHF " is %d\n", \ 315226035Sgabor fg->wpattern[i], k)); \ 316226035Sgabor if (fg->icase) \ 317226035Sgabor { \ 318226035Sgabor tre_char_t wc = iswlower(fg->wpattern[i]) ? \ 319226035Sgabor towupper(fg->wpattern[i]) : towlower(fg->wpattern[i]); \ 320226035Sgabor r = hashtable_put(fg->qsBc_table, &wc, &k); \ 321226035Sgabor if ((r == HASH_FAIL) || (r == HASH_FULL)) \ 322226035Sgabor FAIL_COMP(REG_ESPACE); \ 323226035Sgabor DPRINT(("Reverse BC shift for wide char " CHF " is %d\n", wc, \ 324226035Sgabor k)); \ 325226035Sgabor } \ 326226035Sgabor } 327226035Sgabor 328226035Sgabor#ifdef _GREP_DEBUG 329226035Sgabor#define DPRINT_BMGS(len, fmt_str, sh) \ 330226035Sgabor for (unsigned int i = 0; i < len; i++) \ 331226035Sgabor DPRINT((fmt_str, i, sh[i])); 332226035Sgabor#else 333226035Sgabor#define DPRINT_BMGS(len, fmt_str, sh) \ 334226035Sgabor do { } while(/*CONSTCOND*/0) 335226035Sgabor#endif 336226035Sgabor 337226035Sgabor/* 338226035Sgabor * Fills in the good suffix table for SB/MB strings. 339226035Sgabor */ 340226035Sgabor#define FILL_BMGS \ 341226035Sgabor if (!fg->hasdot) \ 342226035Sgabor { \ 343226035Sgabor fg->sbmGs = xmalloc(fg->len * sizeof(int)); \ 344226035Sgabor if (!fg->sbmGs) \ 345226035Sgabor return REG_ESPACE; \ 346226035Sgabor if (fg->len == 1) \ 347226035Sgabor fg->sbmGs[0] = 1; \ 348226035Sgabor else \ 349226035Sgabor _FILL_BMGS(fg->sbmGs, fg->pattern, fg->len, false); \ 350226035Sgabor DPRINT_BMGS(fg->len, "GS shift for pos %d is %d\n", fg->sbmGs); \ 351226035Sgabor } 352226035Sgabor 353226035Sgabor/* 354226035Sgabor * Fills in the good suffix table for wide strings. 355226035Sgabor */ 356226035Sgabor#define FILL_BMGS_WIDE \ 357226035Sgabor if (!fg->hasdot) \ 358226035Sgabor { \ 359226035Sgabor fg->bmGs = xmalloc(fg->wlen * sizeof(int)); \ 360226035Sgabor if (!fg->bmGs) \ 361226035Sgabor return REG_ESPACE; \ 362226035Sgabor if (fg->wlen == 1) \ 363226035Sgabor fg->bmGs[0] = 1; \ 364226035Sgabor else \ 365226035Sgabor _FILL_BMGS(fg->bmGs, fg->wpattern, fg->wlen, true); \ 366226035Sgabor DPRINT_BMGS(fg->wlen, "GS shift (wide) for pos %d is %d\n", \ 367226035Sgabor fg->bmGs); \ 368226035Sgabor } 369226035Sgabor 370226035Sgabor#define _FILL_BMGS(arr, pat, plen, wide) \ 371226035Sgabor { \ 372226035Sgabor char *p; \ 373226035Sgabor tre_char_t *wp; \ 374226035Sgabor \ 375226035Sgabor if (wide) \ 376226035Sgabor { \ 377226035Sgabor if (fg->icase) \ 378226035Sgabor { \ 379226035Sgabor wp = xmalloc(plen * sizeof(tre_char_t)); \ 380226035Sgabor if (wp == NULL) \ 381226035Sgabor return REG_ESPACE; \ 382226035Sgabor for (unsigned int i = 0; i < plen; i++) \ 383226035Sgabor wp[i] = towlower(pat[i]); \ 384226035Sgabor _CALC_BMGS(arr, wp, plen); \ 385226035Sgabor xfree(wp); \ 386226035Sgabor } \ 387226035Sgabor else \ 388226035Sgabor _CALC_BMGS(arr, pat, plen); \ 389226035Sgabor } \ 390226035Sgabor else \ 391226035Sgabor { \ 392226035Sgabor if (fg->icase) \ 393226035Sgabor { \ 394226035Sgabor p = xmalloc(plen); \ 395226035Sgabor if (p == NULL) \ 396226035Sgabor return REG_ESPACE; \ 397226035Sgabor for (unsigned int i = 0; i < plen; i++) \ 398253810Sache p[i] = tolower((unsigned char)pat[i]); \ 399226035Sgabor _CALC_BMGS(arr, p, plen); \ 400226035Sgabor xfree(p); \ 401226035Sgabor } \ 402226035Sgabor else \ 403226035Sgabor _CALC_BMGS(arr, pat, plen); \ 404226035Sgabor } \ 405226035Sgabor } 406226035Sgabor 407226035Sgabor#define _CALC_BMGS(arr, pat, plen) \ 408226035Sgabor { \ 409226035Sgabor int f = 0, g; \ 410226035Sgabor \ 411226035Sgabor int *suff = xmalloc(plen * sizeof(int)); \ 412226035Sgabor if (suff == NULL) \ 413226035Sgabor return REG_ESPACE; \ 414226035Sgabor \ 415226035Sgabor suff[plen - 1] = plen; \ 416226035Sgabor g = plen - 1; \ 417226035Sgabor for (int i = plen - 2; i >= 0; i--) \ 418226035Sgabor { \ 419226035Sgabor if (i > g && suff[i + plen - 1 - f] < i - g) \ 420226035Sgabor suff[i] = suff[i + plen - 1 - f]; \ 421226035Sgabor else \ 422226035Sgabor { \ 423226035Sgabor if (i < g) \ 424226035Sgabor g = i; \ 425226035Sgabor f = i; \ 426226035Sgabor while (g >= 0 && pat[g] == pat[g + plen - 1 - f]) \ 427226035Sgabor g--; \ 428226035Sgabor suff[i] = f - g; \ 429226035Sgabor } \ 430226035Sgabor } \ 431226035Sgabor \ 432226035Sgabor for (unsigned int i = 0; i < plen; i++) \ 433226035Sgabor arr[i] = plen; \ 434226035Sgabor g = 0; \ 435226035Sgabor for (int i = plen - 1; i >= 0; i--) \ 436226035Sgabor if (suff[i] == i + 1) \ 437226035Sgabor for(; (unsigned long)g < plen - 1 - i; g++) \ 438226035Sgabor if (arr[g] == plen) \ 439226035Sgabor arr[g] = plen - 1 - i; \ 440226035Sgabor for (unsigned int i = 0; i <= plen - 2; i++) \ 441226035Sgabor arr[plen - 1 - suff[i]] = plen - 1 - i; \ 442226035Sgabor \ 443226035Sgabor xfree(suff); \ 444226035Sgabor } 445226035Sgabor 446226035Sgabor/* 447265160Spfg * Copies the pattern pat having length n to p and stores 448226035Sgabor * the size in l. 449226035Sgabor */ 450226035Sgabor#define SAVE_PATTERN(src, srclen, dst, dstlen) \ 451226035Sgabor dstlen = srclen; \ 452226035Sgabor dst = xmalloc((dstlen + 1) * sizeof(tre_char_t)); \ 453226035Sgabor if (dst == NULL) \ 454226035Sgabor return REG_ESPACE; \ 455226035Sgabor if (dstlen > 0) \ 456226035Sgabor memcpy(dst, src, dstlen * sizeof(tre_char_t)); \ 457226035Sgabor dst[dstlen] = TRE_CHAR('\0'); 458226035Sgabor 459226035Sgabor/* 460226035Sgabor * Initializes pattern compiling. 461226035Sgabor */ 462226035Sgabor#define INIT_COMP \ 463226035Sgabor /* Initialize. */ \ 464226035Sgabor memset(fg, 0, sizeof(*fg)); \ 465226035Sgabor fg->icase = (cflags & REG_ICASE); \ 466226035Sgabor fg->word = (cflags & REG_WORD); \ 467226035Sgabor fg->newline = (cflags & REG_NEWLINE); \ 468226035Sgabor fg->nosub = (cflags & REG_NOSUB); \ 469226035Sgabor \ 470226035Sgabor /* Cannot handle REG_ICASE with MB string */ \ 471245075Smarkj if (fg->icase && (TRE_MB_CUR_MAX > 1) && n > 0) \ 472226035Sgabor { \ 473226035Sgabor DPRINT(("Cannot use fast matcher for MBS with REG_ICASE\n")); \ 474226035Sgabor return REG_BADPAT; \ 475226035Sgabor } 476226035Sgabor 477226035Sgabor/* 478226035Sgabor * Checks whether we have a 0-length pattern that will match 479226035Sgabor * anything. If literal is set to false, the EOL anchor is also 480226035Sgabor * taken into account. 481226035Sgabor */ 482226035Sgabor#define CHECK_MATCHALL(literal) \ 483226035Sgabor if (!literal && n == 1 && pat[0] == TRE_CHAR('$')) \ 484226035Sgabor { \ 485226035Sgabor n--; \ 486226035Sgabor fg->eol = true; \ 487226035Sgabor } \ 488226035Sgabor \ 489226035Sgabor if (n == 0) \ 490226035Sgabor { \ 491226035Sgabor fg->matchall = true; \ 492226035Sgabor fg->pattern = xmalloc(sizeof(char)); \ 493226035Sgabor if (!fg->pattern) \ 494226035Sgabor FAIL_COMP(REG_ESPACE); \ 495226035Sgabor fg->pattern[0] = '\0'; \ 496226035Sgabor fg->wpattern = xmalloc(sizeof(tre_char_t)); \ 497226035Sgabor if (!fg->wpattern) \ 498226035Sgabor FAIL_COMP(REG_ESPACE); \ 499226035Sgabor fg->wpattern[0] = TRE_CHAR('\0'); \ 500226035Sgabor DPRINT(("Matching every input\n")); \ 501226035Sgabor return REG_OK; \ 502226035Sgabor } 503226035Sgabor 504226035Sgabor/* 505226035Sgabor * Returns: REG_OK on success, error code otherwise 506226035Sgabor */ 507226035Sgaborint 508226035Sgabortre_compile_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n, 509226035Sgabor int cflags) 510226035Sgabor{ 511226035Sgabor size_t hasdot = 0, whasdot = 0; 512226035Sgabor ssize_t firstdot = -1, wfirstdot = -1; 513226035Sgabor 514226035Sgabor INIT_COMP; 515226035Sgabor 516226035Sgabor CHECK_MATCHALL(true); 517226035Sgabor 518226035Sgabor /* Cannot handle word boundaries with MB string */ 519226035Sgabor if (fg->word && (TRE_MB_CUR_MAX > 1)) 520226035Sgabor return REG_BADPAT; 521226035Sgabor 522226035Sgabor#ifdef TRE_WCHAR 523226035Sgabor SAVE_PATTERN(pat, n, fg->wpattern, fg->wlen); 524226035Sgabor STORE_MBS_PAT; 525226035Sgabor#else 526226035Sgabor SAVE_PATTERN(pat, n, fg->pattern, fg->len); 527226035Sgabor#endif 528226035Sgabor 529226035Sgabor DPRINT(("tre_compile_literal: pattern: %s, len %zu, icase: %c, word: %c, " 530226035Sgabor "newline %c\n", fg->pattern, fg->len, fg->icase ? 'y' : 'n', 531226035Sgabor fg->word ? 'y' : 'n', fg->newline ? 'y' : 'n')); 532226035Sgabor 533226035Sgabor FILL_QSBC; 534226035Sgabor FILL_BMGS; 535226035Sgabor#ifdef TRE_WCHAR 536226035Sgabor FILL_QSBC_WIDE; 537226035Sgabor FILL_BMGS_WIDE; 538226035Sgabor#endif 539226035Sgabor 540226035Sgabor return REG_OK; 541226035Sgabor} 542226035Sgabor 543226035Sgabor/* 544226035Sgabor * Returns: REG_OK on success, error code otherwise 545226035Sgabor */ 546226035Sgaborint 547226035Sgabortre_compile_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n, 548226035Sgabor int cflags) 549226035Sgabor{ 550226035Sgabor tre_char_t *tmp; 551226271Sgabor size_t pos = 0, hasdot = 0, whasdot = 0; 552226035Sgabor ssize_t firstdot = -1, wfirstdot = -1; 553226035Sgabor bool escaped = false; 554226035Sgabor bool *_escmap = NULL; 555226035Sgabor 556226035Sgabor INIT_COMP; 557226035Sgabor 558226035Sgabor /* Remove beginning-of-line character ('^'). */ 559226035Sgabor if (pat[0] == TRE_CHAR('^')) 560226035Sgabor { 561226035Sgabor fg->bol = true; 562226035Sgabor n--; 563226035Sgabor pat++; 564226035Sgabor } 565226035Sgabor 566226035Sgabor CHECK_MATCHALL(false); 567226035Sgabor 568226035Sgabor /* Handle word-boundary matching when GNU extensions are enabled */ 569226035Sgabor if ((cflags & REG_GNU) && (n >= 14) && 570226035Sgabor (memcmp(pat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) && 571226035Sgabor (memcmp(pat + n - 7, TRE_CHAR("[[:>:]]"), 572226035Sgabor 7 * sizeof(tre_char_t)) == 0)) 573226035Sgabor { 574226035Sgabor n -= 14; 575226035Sgabor pat += 7; 576226035Sgabor fg->word = true; 577226035Sgabor } 578226035Sgabor 579226035Sgabor /* Cannot handle word boundaries with MB string */ 580226035Sgabor if (fg->word && (TRE_MB_CUR_MAX > 1)) 581226035Sgabor return REG_BADPAT; 582226035Sgabor 583226035Sgabor tmp = xmalloc((n + 1) * sizeof(tre_char_t)); 584226035Sgabor if (tmp == NULL) 585226035Sgabor return REG_ESPACE; 586226035Sgabor 587226035Sgabor/* Copies the char into the stored pattern and skips to the next char. */ 588226035Sgabor#define STORE_CHAR \ 589226035Sgabor do \ 590226035Sgabor { \ 591226035Sgabor tmp[pos++] = pat[i]; \ 592226035Sgabor escaped = false; \ 593226035Sgabor continue; \ 594226035Sgabor } while (0) 595226035Sgabor 596226035Sgabor /* Traverse the input pattern for processing */ 597226035Sgabor for (unsigned int i = 0; i < n; i++) 598226035Sgabor { 599226035Sgabor switch (pat[i]) 600226035Sgabor { 601226035Sgabor case TRE_CHAR('\\'): 602226035Sgabor if (escaped) 603226035Sgabor STORE_CHAR; 604226035Sgabor else if (i == n - 1) 605226035Sgabor goto badpat; 606226035Sgabor else 607226035Sgabor escaped = true; 608226035Sgabor continue; 609226035Sgabor case TRE_CHAR('['): 610226035Sgabor if (escaped) 611226035Sgabor STORE_CHAR; 612226035Sgabor else 613226035Sgabor goto badpat; 614226035Sgabor continue; 615226035Sgabor case TRE_CHAR('*'): 616226035Sgabor if (escaped || (!(cflags & REG_EXTENDED) && (i == 0))) 617226035Sgabor STORE_CHAR; 618226035Sgabor else 619226035Sgabor goto badpat; 620226035Sgabor continue; 621226035Sgabor case TRE_CHAR('+'): 622226035Sgabor case TRE_CHAR('?'): 623226035Sgabor if ((cflags & REG_EXTENDED) && (i == 0)) 624303882Sdim goto badpat; 625226035Sgabor else if ((cflags & REG_EXTENDED) ^ !escaped) 626226035Sgabor STORE_CHAR; 627226035Sgabor else 628226035Sgabor goto badpat; 629226035Sgabor continue; 630226035Sgabor case TRE_CHAR('.'): 631226035Sgabor if (escaped) 632226035Sgabor { 633226035Sgabor if (!_escmap) 634226035Sgabor _escmap = xmalloc(n * sizeof(bool)); 635226035Sgabor if (!_escmap) 636226035Sgabor { 637226035Sgabor xfree(tmp); 638226035Sgabor return REG_ESPACE; 639226035Sgabor } 640226035Sgabor _escmap[i] = true; 641226035Sgabor STORE_CHAR; 642226035Sgabor } 643226035Sgabor else 644226035Sgabor { 645226035Sgabor whasdot = i; 646226035Sgabor if (wfirstdot == -1) 647226035Sgabor wfirstdot = i; 648226035Sgabor STORE_CHAR; 649226035Sgabor } 650226035Sgabor continue; 651226035Sgabor case TRE_CHAR('^'): 652226035Sgabor STORE_CHAR; 653226035Sgabor continue; 654226035Sgabor case TRE_CHAR('$'): 655226035Sgabor if (!escaped && (i == n - 1)) 656226035Sgabor fg->eol = true; 657226035Sgabor else 658226035Sgabor STORE_CHAR; 659226035Sgabor continue; 660226035Sgabor case TRE_CHAR('('): 661226035Sgabor if ((cflags & REG_EXTENDED) ^ escaped) 662226035Sgabor goto badpat; 663226035Sgabor else 664226035Sgabor STORE_CHAR; 665226035Sgabor continue; 666226035Sgabor case TRE_CHAR('{'): 667226035Sgabor if (!(cflags & REG_EXTENDED) ^ escaped) 668226035Sgabor STORE_CHAR; 669226035Sgabor else if (!(cflags & REG_EXTENDED) && (i == 0)) 670226035Sgabor STORE_CHAR; 671226035Sgabor else if ((cflags & REG_EXTENDED) && (i == 0)) 672226035Sgabor continue; 673226035Sgabor else 674226035Sgabor goto badpat; 675226035Sgabor continue; 676226035Sgabor case TRE_CHAR('|'): 677226035Sgabor if ((cflags & REG_EXTENDED) ^ escaped) 678226035Sgabor goto badpat; 679226035Sgabor else 680226035Sgabor STORE_CHAR; 681226035Sgabor continue; 682226035Sgabor default: 683226035Sgabor if (escaped) 684226035Sgabor goto badpat; 685226035Sgabor else 686226035Sgabor STORE_CHAR; 687226035Sgabor continue; 688226035Sgabor } 689226035Sgabor continue; 690226035Sgaborbadpat: 691226035Sgabor xfree(tmp); 692226035Sgabor DPRINT(("tre_compile_fast: compilation of pattern failed, falling" 693226035Sgabor "back to NFA\n")); 694226035Sgabor return REG_BADPAT; 695226035Sgabor } 696226035Sgabor 697226271Sgabor fg->hasdot = wfirstdot > -1; 698226035Sgabor 699226035Sgabor /* 700226035Sgabor * The pattern has been processed and copied to tmp as a literal string 701226035Sgabor * with escapes, anchors (^$) and the word boundary match character 702226035Sgabor * classes stripped out. 703226035Sgabor */ 704226035Sgabor#ifdef TRE_WCHAR 705226035Sgabor SAVE_PATTERN(tmp, pos, fg->wpattern, fg->wlen); 706226035Sgabor fg->wescmap = _escmap; 707226035Sgabor STORE_MBS_PAT; 708226035Sgabor 709226035Sgabor /* 710226035Sgabor * The position of dots and escaped dots is different in the MB string 711226035Sgabor * than in to the wide string so traverse the converted string, as well, 712226035Sgabor * to store these positions. 713226035Sgabor */ 714226035Sgabor if (fg->hasdot || (fg->wescmap != NULL)) 715226035Sgabor { 716226035Sgabor if (fg->wescmap != NULL) 717226035Sgabor { 718226035Sgabor fg->escmap = xmalloc(fg->len * sizeof(bool)); 719226035Sgabor if (!fg->escmap) 720226035Sgabor { 721226035Sgabor tre_free_fast(fg); 722226035Sgabor return REG_ESPACE; 723226035Sgabor } 724226035Sgabor } 725226035Sgabor 726226035Sgabor escaped = false; 727226035Sgabor for (unsigned int i = 0; i < fg->len; i++) 728226035Sgabor if (fg->pattern[i] == '\\') 729226035Sgabor escaped = !escaped; 730274757Semaste else if (fg->pattern[i] == '.' && fg->escmap && escaped) 731226035Sgabor { 732226035Sgabor fg->escmap[i] = true; 733226035Sgabor escaped = false; 734226035Sgabor } 735226035Sgabor else if (fg->pattern[i] == '.' && !escaped) 736226035Sgabor { 737226035Sgabor hasdot = i; 738226035Sgabor if (firstdot == -1) 739226035Sgabor firstdot = i; 740226035Sgabor } 741226035Sgabor else 742226035Sgabor escaped = false; 743226035Sgabor } 744226035Sgabor#else 745226035Sgabor SAVE_PATTERN(tmp, pos, fg->pattern, fg->len); 746226035Sgabor fg->escmap = _escmap; 747226035Sgabor#endif 748226035Sgabor 749226035Sgabor xfree(tmp); 750226035Sgabor 751226035Sgabor DPRINT(("tre_compile_fast: pattern: %s, len %zu, bol %c, eol %c, " 752226035Sgabor "icase: %c, word: %c, newline %c\n", fg->pattern, fg->len, 753226035Sgabor fg->bol ? 'y' : 'n', fg->eol ? 'y' : 'n', 754226035Sgabor fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n', 755226035Sgabor fg->newline ? 'y' : 'n')); 756226035Sgabor 757226035Sgabor /* Check whether reverse QS algorithm is more efficient */ 758226035Sgabor if ((wfirstdot > -1) && (fg->wlen - whasdot + 1 < (size_t)wfirstdot) && 759226035Sgabor fg->nosub) 760226035Sgabor { 761226035Sgabor fg->reversed = true; 762226035Sgabor DPRINT(("tre_compile_fast: using reverse QS algorithm\n")); 763226035Sgabor } 764226035Sgabor 765226035Sgabor FILL_QSBC; 766226035Sgabor FILL_BMGS; 767226035Sgabor#ifdef TRE_WCHAR 768226035Sgabor FILL_QSBC_WIDE; 769226035Sgabor FILL_BMGS_WIDE; 770226035Sgabor#endif 771226035Sgabor 772226035Sgabor return REG_OK; 773226035Sgabor} 774226035Sgabor 775226035Sgabor#define _SHIFT_ONE \ 776226035Sgabor { \ 777226035Sgabor shift = 1; \ 778226035Sgabor j = !fg->reversed ? j + shift : j - shift; \ 779226035Sgabor continue; \ 780226035Sgabor } 781226035Sgabor 782226035Sgabor#define _BBOUND_COND \ 783226035Sgabor ((type == STR_WIDE) ? \ 784226035Sgabor ((j == 0) || !(tre_isalnum(str_wide[j - 1]) || \ 785226035Sgabor (str_wide[j - 1] == TRE_CHAR('_')))) : \ 786226035Sgabor ((j == 0) || !(tre_isalnum(str_byte[j - 1]) || \ 787226035Sgabor (str_byte[j - 1] == '_')))) 788226035Sgabor 789226035Sgabor#define _EBOUND_COND \ 790226035Sgabor ((type == STR_WIDE) ? \ 791226035Sgabor ((j + fg->wlen == len) || !(tre_isalnum(str_wide[j + fg->wlen]) || \ 792226035Sgabor (str_wide[j + fg->wlen] == TRE_CHAR('_')))) : \ 793226035Sgabor ((j + fg->len == len) || !(tre_isalnum(str_byte[j + fg->len]) || \ 794226035Sgabor (str_byte[j + fg->len] == '_')))) 795226035Sgabor 796226035Sgabor/* 797226035Sgabor * Condition to check whether the match on position j is on a 798226035Sgabor * word boundary. 799226035Sgabor */ 800226035Sgabor#define IS_ON_WORD_BOUNDARY \ 801226035Sgabor (_BBOUND_COND && _EBOUND_COND) 802226035Sgabor 803226035Sgabor/* 804226035Sgabor * Checks word boundary and shifts one if match is not on a 805226035Sgabor * boundary. 806226035Sgabor */ 807226035Sgabor#define CHECK_WORD_BOUNDARY \ 808226035Sgabor if (!IS_ON_WORD_BOUNDARY) \ 809226035Sgabor _SHIFT_ONE; 810226035Sgabor 811226035Sgabor#define _BOL_COND \ 812226035Sgabor ((j == 0) || ((type == STR_WIDE) ? (str_wide[j - 1] == TRE_CHAR('\n'))\ 813226035Sgabor : (str_byte[j - 1] == '\n'))) 814226035Sgabor 815226035Sgabor/* 816226035Sgabor * Checks BOL anchor and shifts one if match is not on a 817226035Sgabor * boundary. 818226035Sgabor */ 819226035Sgabor#define CHECK_BOL_ANCHOR \ 820226035Sgabor if (!_BOL_COND) \ 821226035Sgabor _SHIFT_ONE; 822226035Sgabor 823226035Sgabor#define _EOL_COND \ 824226035Sgabor ((type == STR_WIDE) \ 825226035Sgabor ? ((j + fg->wlen == len) || \ 826226035Sgabor (str_wide[j + fg->wlen] == TRE_CHAR('\n'))) \ 827226035Sgabor : ((j + fg->len == len) || (str_byte[j + fg->wlen] == '\n'))) 828226035Sgabor 829226035Sgabor/* 830226035Sgabor * Checks EOL anchor and shifts one if match is not on a 831226035Sgabor * boundary. 832226035Sgabor */ 833226035Sgabor#define CHECK_EOL_ANCHOR \ 834226035Sgabor if (!_EOL_COND) \ 835226035Sgabor _SHIFT_ONE; 836226035Sgabor 837226035Sgabor/* 838226035Sgabor * Executes matching of the precompiled pattern on the input string. 839226035Sgabor * Returns REG_OK or REG_NOMATCH depending on if we find a match or not. 840226035Sgabor */ 841226035Sgaborint 842226035Sgabortre_match_fast(const fastmatch_t *fg, const void *data, size_t len, 843226035Sgabor tre_str_type_t type, int nmatch, regmatch_t pmatch[], int eflags) 844226035Sgabor{ 845226035Sgabor unsigned int shift, u = 0, v = 0; 846226035Sgabor ssize_t j = 0; 847226035Sgabor int ret = REG_NOMATCH; 848226035Sgabor int mismatch; 849226035Sgabor const char *str_byte = data; 850226035Sgabor const void *startptr = NULL; 851226035Sgabor const tre_char_t *str_wide = data; 852226035Sgabor 853226035Sgabor /* Calculate length if unspecified. */ 854226035Sgabor if (len == (size_t)-1) 855226035Sgabor switch (type) 856226035Sgabor { 857226035Sgabor case STR_WIDE: 858226035Sgabor len = tre_strlen(str_wide); 859226035Sgabor break; 860226035Sgabor default: 861226035Sgabor len = strlen(str_byte); 862226035Sgabor break; 863226035Sgabor } 864226035Sgabor 865226035Sgabor /* Shortcut for empty pattern */ 866226035Sgabor if (fg->matchall) 867226035Sgabor { 868226035Sgabor if (!fg->nosub && nmatch >= 1) 869226035Sgabor { 870226035Sgabor pmatch[0].rm_so = 0; 871226035Sgabor pmatch[0].rm_eo = len; 872226035Sgabor } 873226035Sgabor if (fg->bol && fg->eol) 874226035Sgabor return (len == 0) ? REG_OK : REG_NOMATCH; 875226035Sgabor else 876226035Sgabor return REG_OK; 877226035Sgabor } 878226035Sgabor 879226035Sgabor /* No point in going farther if we do not have enough data. */ 880226035Sgabor switch (type) 881226035Sgabor { 882226035Sgabor case STR_WIDE: 883226035Sgabor if (len < fg->wlen) 884226035Sgabor return ret; 885226035Sgabor shift = fg->wlen; 886226035Sgabor break; 887226035Sgabor default: 888226035Sgabor if (len < fg->len) 889226035Sgabor return ret; 890226035Sgabor shift = fg->len; 891226035Sgabor } 892226035Sgabor 893226035Sgabor /* 894226035Sgabor * REG_NOTBOL means not anchoring ^ to the beginning of the line, so we 895226035Sgabor * can shift one because there can't be a match at the beginning. 896226035Sgabor */ 897226035Sgabor if (fg->bol && (eflags & REG_NOTBOL)) 898226035Sgabor j = 1; 899226035Sgabor 900226035Sgabor /* 901226035Sgabor * Like above, we cannot have a match at the very end when anchoring to 902226035Sgabor * the end and REG_NOTEOL is specified. 903226035Sgabor */ 904226035Sgabor if (fg->eol && (eflags & REG_NOTEOL)) 905226035Sgabor len--; 906226035Sgabor 907226035Sgabor if (fg->reversed) 908226035Sgabor j = len - (type == STR_WIDE ? fg->wlen : fg->len); 909226035Sgabor 910226035Sgabor 911226035Sgabor /* Only try once at the beginning or ending of the line. */ 912226035Sgabor if ((fg->bol || fg->eol) && !fg->newline && !(eflags & REG_NOTBOL) && 913226035Sgabor !(eflags & REG_NOTEOL)) 914226035Sgabor { 915226035Sgabor /* Simple text comparison. */ 916226035Sgabor if (!((fg->bol && fg->eol) && 917226035Sgabor (type == STR_WIDE ? (len != fg->wlen) : (len != fg->len)))) 918226035Sgabor { 919226035Sgabor /* Determine where in data to start search at. */ 920226035Sgabor j = fg->eol ? len - (type == STR_WIDE ? fg->wlen : fg->len) : 0; 921226035Sgabor SKIP_CHARS(j); 922226035Sgabor mismatch = fastcmp(fg, startptr, type); 923226035Sgabor if (mismatch == REG_OK) 924226035Sgabor { 925226035Sgabor if (fg->word && !IS_ON_WORD_BOUNDARY) 926226035Sgabor return ret; 927226035Sgabor if (!fg->nosub && nmatch >= 1) 928226035Sgabor { 929226035Sgabor pmatch[0].rm_so = j; 930226035Sgabor pmatch[0].rm_eo = j + (type == STR_WIDE ? fg->wlen : fg->len); 931226035Sgabor } 932226035Sgabor return REG_OK; 933226035Sgabor } 934226035Sgabor } 935226035Sgabor } 936226035Sgabor else 937226035Sgabor { 938226035Sgabor /* Quick Search / Turbo Boyer-Moore algorithm. */ 939226035Sgabor do 940226035Sgabor { 941226035Sgabor SKIP_CHARS(j); 942226035Sgabor mismatch = fastcmp(fg, startptr, type); 943226035Sgabor if (mismatch == REG_OK) 944226035Sgabor { 945226035Sgabor if (fg->word) 946226035Sgabor CHECK_WORD_BOUNDARY; 947226035Sgabor if (fg->bol) 948226035Sgabor CHECK_BOL_ANCHOR; 949226035Sgabor if (fg->eol) 950226035Sgabor CHECK_EOL_ANCHOR; 951226035Sgabor if (!fg->nosub && nmatch >= 1) 952226035Sgabor { 953226035Sgabor pmatch[0].rm_so = j; 954226035Sgabor pmatch[0].rm_eo = j + ((type == STR_WIDE) ? fg->wlen : fg->len); 955226035Sgabor } 956226035Sgabor return REG_OK; 957226035Sgabor } 958226035Sgabor else if (mismatch > 0) 959226035Sgabor return mismatch; 960226035Sgabor mismatch = -mismatch - 1; 961226035Sgabor SHIFT; 962226035Sgabor } while (!IS_OUT_OF_BOUNDS); 963226035Sgabor } 964226035Sgabor return ret; 965226035Sgabor} 966226035Sgabor 967226035Sgabor/* 968226035Sgabor * Frees the resources that were allocated when the pattern was compiled. 969226035Sgabor */ 970226035Sgaborvoid 971226035Sgabortre_free_fast(fastmatch_t *fg) 972226035Sgabor{ 973226035Sgabor 974226035Sgabor DPRINT(("tre_fast_free: freeing structures for pattern %s\n", 975226035Sgabor fg->pattern)); 976226035Sgabor 977226035Sgabor#ifdef TRE_WCHAR 978226035Sgabor hashtable_free(fg->qsBc_table); 979226035Sgabor if (!fg->hasdot) 980226035Sgabor xfree(fg->bmGs); 981226035Sgabor if (fg->wescmap) 982226035Sgabor xfree(fg->wescmap); 983226035Sgabor xfree(fg->wpattern); 984226035Sgabor#endif 985226035Sgabor if (!fg->hasdot) 986226035Sgabor xfree(fg->sbmGs); 987226035Sgabor if (fg->escmap) 988226035Sgabor xfree(fg->escmap); 989226035Sgabor xfree(fg->pattern); 990226035Sgabor} 991226035Sgabor 992226035Sgabor/* 993226035Sgabor * Returns: -(i + 1) on failure (position that it failed with minus sign) 994226035Sgabor * error code on error 995226035Sgabor * REG_OK on success 996226035Sgabor */ 997226035Sgaborstatic inline int 998226035Sgaborfastcmp(const fastmatch_t *fg, const void *data, tre_str_type_t type) 999226035Sgabor{ 1000226035Sgabor const char *str_byte = data; 1001226035Sgabor const char *pat_byte = fg->pattern; 1002226035Sgabor const tre_char_t *str_wide = data; 1003226035Sgabor const tre_char_t *pat_wide = fg->wpattern; 1004226035Sgabor const bool *escmap = (type == STR_WIDE) ? fg->wescmap : fg->escmap; 1005226035Sgabor size_t len = (type == STR_WIDE) ? fg->wlen : fg->len; 1006226035Sgabor int ret = REG_OK; 1007226035Sgabor 1008226035Sgabor /* Compare the pattern and the input char-by-char from the last position. */ 1009226035Sgabor for (int i = len - 1; i >= 0; i--) { 1010226035Sgabor switch (type) 1011226035Sgabor { 1012226035Sgabor case STR_WIDE: 1013226035Sgabor 1014226035Sgabor /* Check dot */ 1015226035Sgabor if (fg->hasdot && pat_wide[i] == TRE_CHAR('.') && 1016226035Sgabor (!escmap || !escmap[i]) && 1017226035Sgabor (!fg->newline || (str_wide[i] != TRE_CHAR('\n')))) 1018226035Sgabor continue; 1019226035Sgabor 1020226035Sgabor /* Compare */ 1021226035Sgabor if (fg->icase ? (towlower(pat_wide[i]) == towlower(str_wide[i])) 1022226035Sgabor : (pat_wide[i] == str_wide[i])) 1023226035Sgabor continue; 1024226035Sgabor break; 1025226035Sgabor default: 1026226035Sgabor /* Check dot */ 1027226035Sgabor if (fg->hasdot && pat_byte[i] == '.' && 1028226035Sgabor (!escmap || !escmap[i]) && 1029226035Sgabor (!fg->newline || (str_byte[i] != '\n'))) 1030226035Sgabor continue; 1031226035Sgabor 1032226035Sgabor /* Compare */ 1033253810Sache if (fg->icase ? (tolower((unsigned char)pat_byte[i]) == tolower((unsigned char)str_byte[i])) 1034226035Sgabor : (pat_byte[i] == str_byte[i])) 1035226035Sgabor continue; 1036226035Sgabor } 1037226035Sgabor DPRINT(("fastcmp: mismatch at position %d\n", i)); 1038226035Sgabor ret = -(i + 1); 1039226035Sgabor break; 1040226035Sgabor } 1041226035Sgabor return ret; 1042226035Sgabor} 1043