dfa.c (126435) | dfa.c (131557) |
---|---|
1/* dfa.c - deterministic extended regexp routines for GNU 2 Copyright 1988, 1998, 2000 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 2, or (at your option) 7 any later version. 8 --- 4 unchanged lines hidden (view full) --- 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ 17 18/* Written June, 1988 by Mike Haertel 19 Modified July, 1988 by Arthur David Olson to assist BMG speedups */ 20 | 1/* dfa.c - deterministic extended regexp routines for GNU 2 Copyright 1988, 1998, 2000 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 2, or (at your option) 7 any later version. 8 --- 4 unchanged lines hidden (view full) --- 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ 17 18/* Written June, 1988 by Mike Haertel 19 Modified July, 1988 by Arthur David Olson to assist BMG speedups */ 20 |
21/* $FreeBSD: head/gnu/usr.bin/grep/dfa.c 126435 2004-03-01 08:37:20Z ache $ */ | 21/* $FreeBSD: head/gnu/usr.bin/grep/dfa.c 131557 2004-07-04 10:02:03Z tjr $ */ |
22 23#ifdef HAVE_CONFIG_H 24#include <config.h> 25#endif 26 27#include <assert.h> 28#include <ctype.h> 29#include <stdio.h> 30 31#include <sys/types.h> 32#ifdef STDC_HEADERS 33#include <stdlib.h> 34#else 35extern char *calloc(), *malloc(), *realloc(); 36extern void free(); 37#endif 38 39#if defined(HAVE_STRING_H) || defined(STDC_HEADERS) 40#include <string.h> | 22 23#ifdef HAVE_CONFIG_H 24#include <config.h> 25#endif 26 27#include <assert.h> 28#include <ctype.h> 29#include <stdio.h> 30 31#include <sys/types.h> 32#ifdef STDC_HEADERS 33#include <stdlib.h> 34#else 35extern char *calloc(), *malloc(), *realloc(); 36extern void free(); 37#endif 38 39#if defined(HAVE_STRING_H) || defined(STDC_HEADERS) 40#include <string.h> |
41#undef index 42#define index strchr | |
43#else 44#include <strings.h> 45#endif 46 | 41#else 42#include <strings.h> 43#endif 44 |
45#if HAVE_SETLOCALE 46# include <locale.h> 47#endif 48 49#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC 50/* We can handle multibyte string. */ 51# define MBS_SUPPORT 52#endif 53 54#ifdef MBS_SUPPORT 55# include <wchar.h> 56# include <wctype.h> 57#endif 58 |
|
47#ifndef DEBUG /* use the same approach as regex.c */ 48#undef assert 49#define assert(e) 50#endif /* DEBUG */ 51 52#ifndef isgraph 53#define isgraph(C) (isprint(C) && !isspace(C)) 54#endif --- 44 unchanged lines hidden (view full) --- 99# endif 100# else 101# define _(Str) (Str) 102# endif 103#endif 104 105#include "regex.h" 106#include "dfa.h" | 59#ifndef DEBUG /* use the same approach as regex.c */ 60#undef assert 61#define assert(e) 62#endif /* DEBUG */ 63 64#ifndef isgraph 65#define isgraph(C) (isprint(C) && !isspace(C)) 66#endif --- 44 unchanged lines hidden (view full) --- 111# endif 112# else 113# define _(Str) (Str) 114# endif 115#endif 116 117#include "regex.h" 118#include "dfa.h" |
119#include "hard-locale.h" |
|
107 108/* HPUX, define those as macros in sys/param.h */ 109#ifdef setbit 110# undef setbit 111#endif 112#ifdef clrbit 113# undef clrbit 114#endif 115 116static void dfamust PARAMS ((struct dfa *dfa)); | 120 121/* HPUX, define those as macros in sys/param.h */ 122#ifdef setbit 123# undef setbit 124#endif 125#ifdef clrbit 126# undef clrbit 127#endif 128 129static void dfamust PARAMS ((struct dfa *dfa)); |
117 118static ptr_t xcalloc PARAMS ((size_t n, size_t s)); 119static ptr_t xmalloc PARAMS ((size_t n)); 120static ptr_t xrealloc PARAMS ((ptr_t p, size_t n)); 121#ifdef DEBUG 122static void prtok PARAMS ((token t)); 123#endif 124static int tstbit PARAMS ((int b, charclass c)); 125static void setbit PARAMS ((int b, charclass c)); 126static void clrbit PARAMS ((int b, charclass c)); 127static void copyset PARAMS ((charclass src, charclass dst)); 128static void zeroset PARAMS ((charclass s)); 129static void notset PARAMS ((charclass s)); 130static int equal PARAMS ((charclass s1, charclass s2)); 131static int charclass_index PARAMS ((charclass s)); 132static int looking_at PARAMS ((const char *s)); 133static token lex PARAMS ((void)); 134static void addtok PARAMS ((token t)); 135static void atom PARAMS ((void)); 136static int nsubtoks PARAMS ((int tindex)); 137static void copytoks PARAMS ((int tindex, int ntokens)); 138static void closure PARAMS ((void)); 139static void branch PARAMS ((void)); | |
140static void regexp PARAMS ((int toplevel)); | 130static void regexp PARAMS ((int toplevel)); |
141static void copy PARAMS ((position_set *src, position_set *dst)); 142static void insert PARAMS ((position p, position_set *s)); 143static void merge PARAMS ((position_set *s1, position_set *s2, position_set *m)); 144static void delete PARAMS ((position p, position_set *s)); 145static int state_index PARAMS ((struct dfa *d, position_set *s, 146 int newline, int letter)); 147static void build_state PARAMS ((int s, struct dfa *d)); 148static void build_state_zero PARAMS ((struct dfa *d)); 149static char *icatalloc PARAMS ((char *old, char *new)); 150static char *icpyalloc PARAMS ((char *string)); 151static char *istrstr PARAMS ((char *lookin, char *lookfor)); 152static void ifree PARAMS ((char *cp)); 153static void freelist PARAMS ((char **cpp)); 154static char **enlist PARAMS ((char **cpp, char *new, size_t len)); 155static char **comsubs PARAMS ((char *left, char *right)); 156static char **addlists PARAMS ((char **old, char **new)); 157static char **inboth PARAMS ((char **left, char **right)); | |
158 159static ptr_t 160xcalloc (size_t n, size_t s) 161{ 162 ptr_t r = calloc(n, s); 163 164 if (!r) 165 dfaerror(_("Memory exhausted")); --- 25 unchanged lines hidden (view full) --- 191#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t))) 192#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t))) 193#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t))) 194 195/* Reallocate an array of type t if nalloc is too small for index. */ 196#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \ 197 if ((index) >= (nalloc)) \ 198 { \ | 131 132static ptr_t 133xcalloc (size_t n, size_t s) 134{ 135 ptr_t r = calloc(n, s); 136 137 if (!r) 138 dfaerror(_("Memory exhausted")); --- 25 unchanged lines hidden (view full) --- 164#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t))) 165#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t))) 166#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t))) 167 168/* Reallocate an array of type t if nalloc is too small for index. */ 169#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \ 170 if ((index) >= (nalloc)) \ 171 { \ |
199 while ((index) >= (nalloc)) \ | 172 do \ |
200 (nalloc) *= 2; \ | 173 (nalloc) *= 2; \ |
174 while ((index) >= (nalloc)); \ |
|
201 REALLOC(p, t, nalloc); \ 202 } 203 204#ifdef DEBUG 205 206static void 207prtok (token t) 208{ | 175 REALLOC(p, t, nalloc); \ 176 } 177 178#ifdef DEBUG 179 180static void 181prtok (token t) 182{ |
209 char *s; | 183 char const *s; |
210 211 if (t < 0) 212 fprintf(stderr, "END"); 213 else if (t < NOTCHAR) 214 fprintf(stderr, "%c", t); 215 else 216 { 217 switch (t) --- 9 unchanged lines hidden (view full) --- 227 case QMARK: s = "QMARK"; break; 228 case STAR: s = "STAR"; break; 229 case PLUS: s = "PLUS"; break; 230 case CAT: s = "CAT"; break; 231 case OR: s = "OR"; break; 232 case ORTOP: s = "ORTOP"; break; 233 case LPAREN: s = "LPAREN"; break; 234 case RPAREN: s = "RPAREN"; break; | 184 185 if (t < 0) 186 fprintf(stderr, "END"); 187 else if (t < NOTCHAR) 188 fprintf(stderr, "%c", t); 189 else 190 { 191 switch (t) --- 9 unchanged lines hidden (view full) --- 201 case QMARK: s = "QMARK"; break; 202 case STAR: s = "STAR"; break; 203 case PLUS: s = "PLUS"; break; 204 case CAT: s = "CAT"; break; 205 case OR: s = "OR"; break; 206 case ORTOP: s = "ORTOP"; break; 207 case LPAREN: s = "LPAREN"; break; 208 case RPAREN: s = "RPAREN"; break; |
209 case CRANGE: s = "CRANGE"; break; 210#ifdef MBS_SUPPORT 211 case ANYCHAR: s = "ANYCHAR"; break; 212 case MBCSET: s = "MBCSET"; break; 213#endif /* MBS_SUPPORT */ |
|
235 default: s = "CSET"; break; 236 } 237 fprintf(stderr, "%s", s); 238 } 239} 240#endif /* DEBUG */ 241 242/* Stuff pertaining to charclasses. */ 243 244static int | 214 default: s = "CSET"; break; 215 } 216 fprintf(stderr, "%s", s); 217 } 218} 219#endif /* DEBUG */ 220 221/* Stuff pertaining to charclasses. */ 222 223static int |
245tstbit (int b, charclass c) | 224tstbit (unsigned b, charclass c) |
246{ 247 return c[b / INTBITS] & 1 << b % INTBITS; 248} 249 250static void | 225{ 226 return c[b / INTBITS] & 1 << b % INTBITS; 227} 228 229static void |
251setbit (int b, charclass c) | 230setbit (unsigned b, charclass c) |
252{ 253 c[b / INTBITS] |= 1 << b % INTBITS; 254} 255 256static void | 231{ 232 c[b / INTBITS] |= 1 << b % INTBITS; 233} 234 235static void |
257clrbit (int b, charclass c) | 236clrbit (unsigned b, charclass c) |
258{ 259 c[b / INTBITS] &= ~(1 << b % INTBITS); 260} 261 262static void 263copyset (charclass src, charclass dst) 264{ | 237{ 238 c[b / INTBITS] &= ~(1 << b % INTBITS); 239} 240 241static void 242copyset (charclass src, charclass dst) 243{ |
265 int i; 266 267 for (i = 0; i < CHARCLASS_INTS; ++i) 268 dst[i] = src[i]; | 244 memcpy (dst, src, sizeof (charclass)); |
269} 270 271static void 272zeroset (charclass s) 273{ | 245} 246 247static void 248zeroset (charclass s) 249{ |
274 int i; 275 276 for (i = 0; i < CHARCLASS_INTS; ++i) 277 s[i] = 0; | 250 memset (s, 0, sizeof (charclass)); |
278} 279 280static void 281notset (charclass s) 282{ 283 int i; 284 285 for (i = 0; i < CHARCLASS_INTS; ++i) 286 s[i] = ~s[i]; 287} 288 289static int 290equal (charclass s1, charclass s2) 291{ | 251} 252 253static void 254notset (charclass s) 255{ 256 int i; 257 258 for (i = 0; i < CHARCLASS_INTS; ++i) 259 s[i] = ~s[i]; 260} 261 262static int 263equal (charclass s1, charclass s2) 264{ |
292 int i; 293 294 for (i = 0; i < CHARCLASS_INTS; ++i) 295 if (s1[i] != s2[i]) 296 return 0; 297 return 1; | 265 return memcmp (s1, s2, sizeof (charclass)) == 0; |
298} 299 300/* A pointer to the current dfa is kept here during parsing. */ 301static struct dfa *dfa; 302 303/* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */ 304static int 305charclass_index (charclass s) --- 15 unchanged lines hidden (view full) --- 321/* Flag for case-folding letters into sets. */ 322static int case_fold; 323 324/* End-of-line byte in data. */ 325static unsigned char eolbyte; 326 327/* Entry point to set syntax options. */ 328void | 266} 267 268/* A pointer to the current dfa is kept here during parsing. */ 269static struct dfa *dfa; 270 271/* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */ 272static int 273charclass_index (charclass s) --- 15 unchanged lines hidden (view full) --- 289/* Flag for case-folding letters into sets. */ 290static int case_fold; 291 292/* End-of-line byte in data. */ 293static unsigned char eolbyte; 294 295/* Entry point to set syntax options. */ 296void |
329dfasyntax (reg_syntax_t bits, int fold, int eol) | 297dfasyntax (reg_syntax_t bits, int fold, unsigned char eol) |
330{ 331 syntax_bits_set = 1; 332 syntax_bits = bits; 333 case_fold = fold; 334 eolbyte = eol; 335} 336 | 298{ 299 syntax_bits_set = 1; 300 syntax_bits = bits; 301 case_fold = fold; 302 eolbyte = eol; 303} 304 |
305/* Like setbit, but if case is folded, set both cases of a letter. */ 306static void 307setbit_case_fold (unsigned b, charclass c) 308{ 309 setbit (b, c); 310 if (case_fold) 311 { 312 if (ISUPPER (b)) 313 setbit (tolower (b), c); 314 else if (ISLOWER (b)) 315 setbit (toupper (b), c); 316 } 317} 318 |
|
337/* Lexical analyzer. All the dross that deals with the obnoxious 338 GNU Regex syntax bits is located here. The poor, suffering 339 reader is referred to the GNU Regex documentation for the 340 meaning of the @#%!@#%^!@ syntax bits. */ 341 | 319/* Lexical analyzer. All the dross that deals with the obnoxious 320 GNU Regex syntax bits is located here. The poor, suffering 321 reader is referred to the GNU Regex documentation for the 322 meaning of the @#%!@#%^!@ syntax bits. */ 323 |
342static char *lexstart; /* Pointer to beginning of input string. */ 343static char *lexptr; /* Pointer to next input character. */ | 324static char const *lexstart; /* Pointer to beginning of input string. */ 325static char const *lexptr; /* Pointer to next input character. */ |
344static int lexleft; /* Number of characters remaining. */ 345static token lasttok; /* Previous token returned; initially END. */ 346static int laststart; /* True if we're separated from beginning or (, | 347 only by zero-width characters. */ 348static int parens; /* Count of outstanding left parens. */ 349static int minrep, maxrep; /* Repeat counts for {m,n}. */ | 326static int lexleft; /* Number of characters remaining. */ 327static token lasttok; /* Previous token returned; initially END. */ 328static int laststart; /* True if we're separated from beginning or (, | 329 only by zero-width characters. */ 330static int parens; /* Count of outstanding left parens. */ 331static int minrep, maxrep; /* Repeat counts for {m,n}. */ |
332static int hard_LC_COLLATE; /* Nonzero if LC_COLLATE is hard. */ |
|
350 | 333 |
334#ifdef MBS_SUPPORT 335/* These variables are used only if (MB_CUR_MAX > 1). */ 336static mbstate_t mbs; /* Mbstate for mbrlen(). */ 337static int cur_mb_len; /* Byte length of the current scanning 338 multibyte character. */ 339static int cur_mb_index; /* Byte index of the current scanning multibyte 340 character. 341 342 singlebyte character : cur_mb_index = 0 343 multibyte character 344 1st byte : cur_mb_index = 1 345 2nd byte : cur_mb_index = 2 346 ... 347 nth byte : cur_mb_index = n */ 348static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec(). 349 Each element store the amount of remain 350 byte of corresponding multibyte character 351 in the input string. A element's value 352 is 0 if corresponding character is a 353 singlebyte chracter. 354 e.g. input : 'a', <mb(0)>, <mb(1)>, <mb(2)> 355 mblen_buf : 0, 3, 2, 1 356 */ 357static wchar_t *inputwcs; /* Wide character representation of input 358 string in dfaexec(). 359 The length of this array is same as 360 the length of input string(char array). 361 inputstring[i] is a single-byte char, 362 or 1st byte of a multibyte char. 363 And inputwcs[i] is the codepoint. */ 364static unsigned char const *buf_begin;/* refference to begin in dfaexec(). */ 365static unsigned char const *buf_end; /* refference to end in dfaexec(). */ 366#endif /* MBS_SUPPORT */ 367 368#ifdef MBS_SUPPORT 369/* This function update cur_mb_len, and cur_mb_index. 370 p points current lexptr, len is the remaining buffer length. */ 371static void 372update_mb_len_index (unsigned char const *p, int len) 373{ 374 /* If last character is a part of a multibyte character, 375 we update cur_mb_index. */ 376 if (cur_mb_index) 377 cur_mb_index = (cur_mb_index >= cur_mb_len)? 0 378 : cur_mb_index + 1; 379 380 /* If last character is a single byte character, or the 381 last portion of a multibyte character, we check whether 382 next character is a multibyte character or not. */ 383 if (! cur_mb_index) 384 { 385 cur_mb_len = mbrlen(p, len, &mbs); 386 if (cur_mb_len > 1) 387 /* It is a multibyte character. 388 cur_mb_len was already set by mbrlen(). */ 389 cur_mb_index = 1; 390 else if (cur_mb_len < 1) 391 /* Invalid sequence. We treat it as a singlebyte character. 392 cur_mb_index is aleady 0. */ 393 cur_mb_len = 1; 394 /* Otherwise, cur_mb_len == 1, it is a singlebyte character. 395 cur_mb_index is aleady 0. */ 396 } 397} 398#endif /* MBS_SUPPORT */ 399 400#ifdef MBS_SUPPORT |
|
351/* Note that characters become unsigned here. */ | 401/* Note that characters become unsigned here. */ |
352#define FETCH(c, eoferr) \ | 402# define FETCH(c, eoferr) \ 403 { \ 404 if (! lexleft) \ 405 { \ 406 if (eoferr != 0) \ 407 dfaerror (eoferr); \ 408 else \ 409 return lasttok = END; \ 410 } \ 411 if (MB_CUR_MAX > 1) \ 412 update_mb_len_index(lexptr, lexleft); \ 413 (c) = (unsigned char) *lexptr++; \ 414 --lexleft; \ 415 } 416 417/* This function fetch a wide character, and update cur_mb_len, 418 used only if the current locale is a multibyte environment. */ 419static wchar_t 420fetch_wc (char const *eoferr) 421{ 422 wchar_t wc; 423 if (! lexleft) 424 { 425 if (eoferr != 0) 426 dfaerror (eoferr); 427 else 428 return -1; 429 } 430 431 cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs); 432 if (cur_mb_len <= 0) 433 { 434 cur_mb_len = 1; 435 wc = *lexptr; 436 } 437 lexptr += cur_mb_len; 438 lexleft -= cur_mb_len; 439 return wc; 440} 441#else 442/* Note that characters become unsigned here. */ 443# define FETCH(c, eoferr) \ |
353 { \ 354 if (! lexleft) \ 355 { \ 356 if (eoferr != 0) \ 357 dfaerror (eoferr); \ 358 else \ 359 return lasttok = END; \ 360 } \ 361 (c) = (unsigned char) *lexptr++; \ 362 --lexleft; \ 363 } | 444 { \ 445 if (! lexleft) \ 446 { \ 447 if (eoferr != 0) \ 448 dfaerror (eoferr); \ 449 else \ 450 return lasttok = END; \ 451 } \ 452 (c) = (unsigned char) *lexptr++; \ 453 --lexleft; \ 454 } |
455#endif /* MBS_SUPPORT */ |
|
364 | 456 |
457#ifdef MBS_SUPPORT 458/* Multibyte character handling sub-routin for lex. 459 This function parse a bracket expression and build a struct 460 mb_char_classes. */ 461static void 462parse_bracket_exp_mb () 463{ 464 wchar_t wc, wc1, wc2; 465 466 /* Work area to build a mb_char_classes. */ 467 struct mb_char_classes *work_mbc; 468 int chars_al, range_sts_al, range_ends_al, ch_classes_al, 469 equivs_al, coll_elems_al; 470 471 REALLOC_IF_NECESSARY(dfa->mbcsets, struct mb_char_classes, 472 dfa->mbcsets_alloc, dfa->nmbcsets + 1); 473 /* dfa->multibyte_prop[] hold the index of dfa->mbcsets. 474 We will update dfa->multibyte_prop in addtok(), because we can't 475 decide the index in dfa->tokens[]. */ 476 477 /* Initialize work are */ 478 work_mbc = &(dfa->mbcsets[dfa->nmbcsets++]); 479 480 chars_al = 1; 481 range_sts_al = range_ends_al = 0; 482 ch_classes_al = equivs_al = coll_elems_al = 0; 483 MALLOC(work_mbc->chars, wchar_t, chars_al); 484 485 work_mbc->nchars = work_mbc->nranges = work_mbc->nch_classes = 0; 486 work_mbc->nequivs = work_mbc->ncoll_elems = 0; 487 work_mbc->chars = work_mbc->ch_classes = NULL; 488 work_mbc->range_sts = work_mbc->range_ends = NULL; 489 work_mbc->equivs = work_mbc->coll_elems = NULL; 490 491 wc = fetch_wc(_("Unbalanced [")); 492 if (wc == L'^') 493 { 494 wc = fetch_wc(_("Unbalanced [")); 495 work_mbc->invert = 1; 496 } 497 else 498 work_mbc->invert = 0; 499 do 500 { 501 wc1 = -1; /* mark wc1 is not initialized". */ 502 503 /* Note that if we're looking at some other [:...:] construct, 504 we just treat it as a bunch of ordinary characters. We can do 505 this because we assume regex has checked for syntax errors before 506 dfa is ever called. */ 507 if (wc == L'[' && (syntax_bits & RE_CHAR_CLASSES)) 508 { 509#define BRACKET_BUFFER_SIZE 128 510 char str[BRACKET_BUFFER_SIZE]; 511 wc1 = wc; 512 wc = fetch_wc(_("Unbalanced [")); 513 514 /* If pattern contains `[[:', `[[.', or `[[='. */ 515 if (cur_mb_len == 1 && (wc == L':' || wc == L'.' || wc == L'=')) 516 { 517 unsigned char c; 518 unsigned char delim = (unsigned char)wc; 519 int len = 0; 520 for (;;) 521 { 522 if (! lexleft) 523 dfaerror (_("Unbalanced [")); 524 c = (unsigned char) *lexptr++; 525 --lexleft; 526 527 if ((c == delim && *lexptr == ']') || lexleft == 0) 528 break; 529 if (len < BRACKET_BUFFER_SIZE) 530 str[len++] = c; 531 else 532 /* This is in any case an invalid class name. */ 533 str[0] = '\0'; 534 } 535 str[len] = '\0'; 536 537 if (lexleft == 0) 538 { 539 REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, 540 work_mbc->nchars + 2); 541 work_mbc->chars[work_mbc->nchars++] = L'['; 542 work_mbc->chars[work_mbc->nchars++] = delim; 543 break; 544 } 545 546 if (--lexleft, *lexptr++ != ']') 547 dfaerror (_("Unbalanced [")); 548 if (delim == ':') 549 /* build character class. */ 550 { 551 wctype_t wt; 552 /* Query the character class as wctype_t. */ 553 wt = wctype (str); 554 555 if (ch_classes_al == 0) 556 MALLOC(work_mbc->ch_classes, wchar_t, ++ch_classes_al); 557 REALLOC_IF_NECESSARY(work_mbc->ch_classes, wctype_t, 558 ch_classes_al, 559 work_mbc->nch_classes + 1); 560 work_mbc->ch_classes[work_mbc->nch_classes++] = wt; 561 562 } 563 else if (delim == '=' || delim == '.') 564 { 565 char *elem; 566 MALLOC(elem, char, len + 1); 567 strncpy(elem, str, len + 1); 568 569 if (delim == '=') 570 /* build equivalent class. */ 571 { 572 if (equivs_al == 0) 573 MALLOC(work_mbc->equivs, char*, ++equivs_al); 574 REALLOC_IF_NECESSARY(work_mbc->equivs, char*, 575 equivs_al, 576 work_mbc->nequivs + 1); 577 work_mbc->equivs[work_mbc->nequivs++] = elem; 578 } 579 580 if (delim == '.') 581 /* build collating element. */ 582 { 583 if (coll_elems_al == 0) 584 MALLOC(work_mbc->coll_elems, char*, ++coll_elems_al); 585 REALLOC_IF_NECESSARY(work_mbc->coll_elems, char*, 586 coll_elems_al, 587 work_mbc->ncoll_elems + 1); 588 work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem; 589 } 590 } 591 wc = -1; 592 } 593 else 594 /* We treat '[' as a normal character here. */ 595 { 596 wc2 = wc1; wc1 = wc; wc = wc2; /* swap */ 597 } 598 } 599 else 600 { 601 if (wc == L'\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) 602 wc = fetch_wc(("Unbalanced [")); 603 } 604 605 if (wc1 == -1) 606 wc1 = fetch_wc(_("Unbalanced [")); 607 608 if (wc1 == L'-') 609 /* build range characters. */ 610 { 611 wc2 = fetch_wc(_("Unbalanced [")); 612 if (wc2 == L']') 613 { 614 /* In the case [x-], the - is an ordinary hyphen, 615 which is left in c1, the lookahead character. */ 616 lexptr -= cur_mb_len; 617 lexleft += cur_mb_len; 618 wc2 = wc; 619 } 620 else 621 { 622 if (wc2 == L'\\' 623 && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) 624 wc2 = fetch_wc(_("Unbalanced [")); 625 wc1 = fetch_wc(_("Unbalanced [")); 626 } 627 628 if (range_sts_al == 0) 629 { 630 MALLOC(work_mbc->range_sts, wchar_t, ++range_sts_al); 631 MALLOC(work_mbc->range_ends, wchar_t, ++range_ends_al); 632 } 633 REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t, 634 range_sts_al, work_mbc->nranges + 1); 635 work_mbc->range_sts[work_mbc->nranges] = wc; 636 REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t, 637 range_ends_al, work_mbc->nranges + 1); 638 work_mbc->range_ends[work_mbc->nranges++] = wc2; 639 } 640 else if (wc != -1) 641 /* build normal characters. */ 642 { 643 REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, 644 work_mbc->nchars + 1); 645 work_mbc->chars[work_mbc->nchars++] = wc; 646 } 647 } 648 while ((wc = wc1) != L']'); 649} 650#endif /* MBS_SUPPORT */ 651 |
|
365#ifdef __STDC__ 366#define FUNC(F, P) static int F(int c) { return P(c); } 367#else 368#define FUNC(F, P) static int F(c) int c; { return P(c); } 369#endif 370 371FUNC(is_alpha, ISALPHA) 372FUNC(is_upper, ISUPPER) --- 14 unchanged lines hidden (view full) --- 387} 388 389/* The following list maps the names of the Posix named character classes 390 to predicate functions that determine whether a given character is in 391 the class. The leading [ has already been eaten by the lexical analyzer. */ 392static struct { 393 const char *name; 394 int (*pred) PARAMS ((int)); | 652#ifdef __STDC__ 653#define FUNC(F, P) static int F(int c) { return P(c); } 654#else 655#define FUNC(F, P) static int F(c) int c; { return P(c); } 656#endif 657 658FUNC(is_alpha, ISALPHA) 659FUNC(is_upper, ISUPPER) --- 14 unchanged lines hidden (view full) --- 674} 675 676/* The following list maps the names of the Posix named character classes 677 to predicate functions that determine whether a given character is in 678 the class. The leading [ has already been eaten by the lexical analyzer. */ 679static struct { 680 const char *name; 681 int (*pred) PARAMS ((int)); |
395} prednames[] = { | 682} const prednames[] = { |
396 { ":alpha:]", is_alpha }, 397 { ":upper:]", is_upper }, 398 { ":lower:]", is_lower }, 399 { ":digit:]", is_digit }, 400 { ":xdigit:]", is_xdigit }, 401 { ":space:]", is_space }, 402 { ":punct:]", is_punct }, 403 { ":alnum:]", is_alnum }, --- 16 unchanged lines hidden (view full) --- 420 if (lexleft < len) 421 return 0; 422 return strncmp(s, lexptr, len) == 0; 423} 424 425static token 426lex (void) 427{ | 683 { ":alpha:]", is_alpha }, 684 { ":upper:]", is_upper }, 685 { ":lower:]", is_lower }, 686 { ":digit:]", is_digit }, 687 { ":xdigit:]", is_xdigit }, 688 { ":space:]", is_space }, 689 { ":punct:]", is_punct }, 690 { ":alnum:]", is_alnum }, --- 16 unchanged lines hidden (view full) --- 707 if (lexleft < len) 708 return 0; 709 return strncmp(s, lexptr, len) == 0; 710} 711 712static token 713lex (void) 714{ |
428 token c, c1, c2; | 715 unsigned c, c1, c2; |
429 int backslash = 0, invert; 430 charclass ccl; 431 int i; | 716 int backslash = 0, invert; 717 charclass ccl; 718 int i; |
432 char lo[2]; 433 char hi[2]; | |
434 435 /* Basic plan: We fetch a character. If it's a backslash, 436 we set the backslash flag and go through the loop again. 437 On the plus side, this avoids having a duplicate of the 438 main switch inside the backslash case. On the minus side, 439 it means that just about every case begins with 440 "if (backslash) ...". */ 441 for (i = 0; i < 2; ++i) 442 { 443 FETCH(c, 0); | 719 720 /* Basic plan: We fetch a character. If it's a backslash, 721 we set the backslash flag and go through the loop again. 722 On the plus side, this avoids having a duplicate of the 723 main switch inside the backslash case. On the minus side, 724 it means that just about every case begins with 725 "if (backslash) ...". */ 726 for (i = 0; i < 2; ++i) 727 { 728 FETCH(c, 0); |
729#ifdef MBS_SUPPORT 730 if (MB_CUR_MAX > 1 && cur_mb_index) 731 /* If this is a part of a multi-byte character, we must treat 732 this byte data as a normal character. 733 e.g. In case of SJIS encoding, some character contains '\', 734 but they must not be backslash. */ 735 goto normal_char; 736#endif /* MBS_SUPPORT */ |
|
444 switch (c) 445 { 446 case '\\': 447 if (backslash) 448 goto normal_char; 449 if (lexleft == 0) 450 dfaerror(_("Unfinished \\ escape")); 451 backslash = 1; --- 204 unchanged lines hidden (view full) --- 656 goto normal_char; 657 --parens; 658 laststart = 0; 659 return lasttok = RPAREN; 660 661 case '.': 662 if (backslash) 663 goto normal_char; | 737 switch (c) 738 { 739 case '\\': 740 if (backslash) 741 goto normal_char; 742 if (lexleft == 0) 743 dfaerror(_("Unfinished \\ escape")); 744 backslash = 1; --- 204 unchanged lines hidden (view full) --- 949 goto normal_char; 950 --parens; 951 laststart = 0; 952 return lasttok = RPAREN; 953 954 case '.': 955 if (backslash) 956 goto normal_char; |
957#ifdef MBS_SUPPORT 958 if (MB_CUR_MAX > 1) 959 { 960 /* In multibyte environment period must match with a single 961 character not a byte. So we use ANYCHAR. */ 962 laststart = 0; 963 return lasttok = ANYCHAR; 964 } 965#endif /* MBS_SUPPORT */ |
|
664 zeroset(ccl); 665 notset(ccl); 666 if (!(syntax_bits & RE_DOT_NEWLINE)) 667 clrbit(eolbyte, ccl); 668 if (syntax_bits & RE_DOT_NOT_NULL) 669 clrbit('\0', ccl); 670 laststart = 0; 671 return lasttok = CSET + charclass_index(ccl); --- 9 unchanged lines hidden (view full) --- 681 if (c == 'W') 682 notset(ccl); 683 laststart = 0; 684 return lasttok = CSET + charclass_index(ccl); 685 686 case '[': 687 if (backslash) 688 goto normal_char; | 966 zeroset(ccl); 967 notset(ccl); 968 if (!(syntax_bits & RE_DOT_NEWLINE)) 969 clrbit(eolbyte, ccl); 970 if (syntax_bits & RE_DOT_NOT_NULL) 971 clrbit('\0', ccl); 972 laststart = 0; 973 return lasttok = CSET + charclass_index(ccl); --- 9 unchanged lines hidden (view full) --- 983 if (c == 'W') 984 notset(ccl); 985 laststart = 0; 986 return lasttok = CSET + charclass_index(ccl); 987 988 case '[': 989 if (backslash) 990 goto normal_char; |
991 laststart = 0; 992#ifdef MBS_SUPPORT 993 if (MB_CUR_MAX > 1) 994 { 995 /* In multibyte environment a bracket expression may contain 996 multibyte characters, which must be treated as characters 997 (not bytes). So we parse it by parse_bracket_exp_mb(). */ 998 parse_bracket_exp_mb(); 999 return lasttok = MBCSET; 1000 } 1001#endif |
|
689 zeroset(ccl); 690 FETCH(c, _("Unbalanced [")); 691 if (c == '^') 692 { 693 FETCH(c, _("Unbalanced [")); 694 invert = 1; 695 } 696 else --- 5 unchanged lines hidden (view full) --- 702 construct, we just treat it as a bunch of ordinary 703 characters. We can do this because we assume 704 regex has checked for syntax errors before 705 dfa is ever called. */ 706 if (c == '[' && (syntax_bits & RE_CHAR_CLASSES)) 707 for (c1 = 0; prednames[c1].name; ++c1) 708 if (looking_at(prednames[c1].name)) 709 { | 1002 zeroset(ccl); 1003 FETCH(c, _("Unbalanced [")); 1004 if (c == '^') 1005 { 1006 FETCH(c, _("Unbalanced [")); 1007 invert = 1; 1008 } 1009 else --- 5 unchanged lines hidden (view full) --- 1015 construct, we just treat it as a bunch of ordinary 1016 characters. We can do this because we assume 1017 regex has checked for syntax errors before 1018 dfa is ever called. */ 1019 if (c == '[' && (syntax_bits & RE_CHAR_CLASSES)) 1020 for (c1 = 0; prednames[c1].name; ++c1) 1021 if (looking_at(prednames[c1].name)) 1022 { |
710 int (*pred)() = prednames[c1].pred; 711 if (case_fold 712 && (pred == is_upper || pred == is_lower)) 713 pred = is_alpha; | 1023 int (*pred) PARAMS ((int)) = prednames[c1].pred; |
714 715 for (c2 = 0; c2 < NOTCHAR; ++c2) 716 if ((*pred)(c2)) | 1024 1025 for (c2 = 0; c2 < NOTCHAR; ++c2) 1026 if ((*pred)(c2)) |
717 setbit(c2, ccl); | 1027 setbit_case_fold (c2, ccl); |
718 lexptr += strlen(prednames[c1].name); 719 lexleft -= strlen(prednames[c1].name); 720 FETCH(c1, _("Unbalanced [")); 721 goto skip; 722 } 723 if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) 724 FETCH(c, _("Unbalanced [")); 725 FETCH(c1, _("Unbalanced [")); 726 if (c1 == '-') 727 { 728 FETCH(c2, _("Unbalanced [")); 729 if (c2 == ']') 730 { 731 /* In the case [x-], the - is an ordinary hyphen, 732 which is left in c1, the lookahead character. */ 733 --lexptr; 734 ++lexleft; | 1028 lexptr += strlen(prednames[c1].name); 1029 lexleft -= strlen(prednames[c1].name); 1030 FETCH(c1, _("Unbalanced [")); 1031 goto skip; 1032 } 1033 if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) 1034 FETCH(c, _("Unbalanced [")); 1035 FETCH(c1, _("Unbalanced [")); 1036 if (c1 == '-') 1037 { 1038 FETCH(c2, _("Unbalanced [")); 1039 if (c2 == ']') 1040 { 1041 /* In the case [x-], the - is an ordinary hyphen, 1042 which is left in c1, the lookahead character. */ 1043 --lexptr; 1044 ++lexleft; |
735 c2 = c; | |
736 } 737 else 738 { 739 if (c2 == '\\' 740 && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) 741 FETCH(c2, _("Unbalanced [")); 742 FETCH(c1, _("Unbalanced [")); | 1045 } 1046 else 1047 { 1048 if (c2 == '\\' 1049 && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) 1050 FETCH(c2, _("Unbalanced [")); 1051 FETCH(c1, _("Unbalanced [")); |
743 } 744 } 745 else 746 c2 = c; 747 748 lo[0] = c; lo[1] = '\0'; 749 hi[0] = c2; hi[1] = '\0'; 750 for (c = 0; c < NOTCHAR; c++) 751 { 752 char ch[2]; 753 ch[0] = c; ch[1] = '\0'; 754 if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0) 755 { 756 setbit (c, ccl); 757 if (case_fold) 758 { 759 if (ISUPPER (c)) 760 setbit (tolower (c), ccl); 761 else if (ISLOWER (c)) 762 setbit (toupper (c), ccl); | 1052 if (!hard_LC_COLLATE) { 1053 for (; c <= c2; c++) 1054 setbit_case_fold (c, ccl); 1055 } else { 1056 /* POSIX locales are painful - leave the decision to libc */ 1057 char expr[6] = { '[', c, '-', c2, ']', '\0' }; 1058 regex_t re; 1059 if (regcomp (&re, expr, case_fold ? REG_ICASE : 0) == REG_NOERROR) { 1060 for (c = 0; c < NOTCHAR; ++c) { 1061 char buf[2] = { c, '\0' }; 1062 regmatch_t mat; 1063 if (regexec (&re, buf, 1, &mat, 0) == REG_NOERROR 1064 && mat.rm_so == 0 && mat.rm_eo == 1) 1065 setbit_case_fold (c, ccl); 1066 } 1067 regfree (&re); |
763 } | 1068 } |
1069 } 1070 continue; |
|
764 } 765 } 766 | 1071 } 1072 } 1073 |
1074 setbit_case_fold (c, ccl); 1075 |
|
767 skip: 768 ; 769 } 770 while ((c = c1) != ']'); 771 if (invert) 772 { 773 notset(ccl); 774 if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) 775 clrbit(eolbyte, ccl); 776 } | 1076 skip: 1077 ; 1078 } 1079 while ((c = c1) != ']'); 1080 if (invert) 1081 { 1082 notset(ccl); 1083 if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) 1084 clrbit(eolbyte, ccl); 1085 } |
777 laststart = 0; | |
778 return lasttok = CSET + charclass_index(ccl); 779 780 default: 781 normal_char: 782 laststart = 0; 783 if (case_fold && ISALPHA(c)) 784 { 785 zeroset(ccl); | 1086 return lasttok = CSET + charclass_index(ccl); 1087 1088 default: 1089 normal_char: 1090 laststart = 0; 1091 if (case_fold && ISALPHA(c)) 1092 { 1093 zeroset(ccl); |
786 setbit(c, ccl); 787 if (isupper(c)) 788 setbit(tolower(c), ccl); 789 else 790 setbit(toupper(c), ccl); | 1094 setbit_case_fold (c, ccl); |
791 return lasttok = CSET + charclass_index(ccl); 792 } 793 return c; 794 } 795 } 796 797 /* The above loop should consume at most a backslash 798 and some other character. */ --- 10 unchanged lines hidden (view full) --- 809 required of the real stack later on in 810 dfaanalyze(). */ 811 812/* Add the given token to the parse tree, maintaining the depth count and 813 updating the maximum depth if necessary. */ 814static void 815addtok (token t) 816{ | 1095 return lasttok = CSET + charclass_index(ccl); 1096 } 1097 return c; 1098 } 1099 } 1100 1101 /* The above loop should consume at most a backslash 1102 and some other character. */ --- 10 unchanged lines hidden (view full) --- 1113 required of the real stack later on in 1114 dfaanalyze(). */ 1115 1116/* Add the given token to the parse tree, maintaining the depth count and 1117 updating the maximum depth if necessary. */ 1118static void 1119addtok (token t) 1120{ |
1121#ifdef MBS_SUPPORT 1122 if (MB_CUR_MAX > 1) 1123 { 1124 REALLOC_IF_NECESSARY(dfa->multibyte_prop, int, dfa->nmultibyte_prop, 1125 dfa->tindex); 1126 /* Set dfa->multibyte_prop. See struct dfa in dfa.h. */ 1127 if (t == MBCSET) 1128 dfa->multibyte_prop[dfa->tindex] = ((dfa->nmbcsets - 1) << 2) + 3; 1129 else if (t < NOTCHAR) 1130 dfa->multibyte_prop[dfa->tindex] 1131 = (cur_mb_len == 1)? 3 /* single-byte char */ 1132 : (((cur_mb_index == 1)? 1 : 0) /* 1st-byte of multibyte char */ 1133 + ((cur_mb_index == cur_mb_len)? 2 : 0)); /* last-byte */ 1134 else 1135 /* It may be unnecesssary, but it is safer to treat other 1136 symbols as singlebyte characters. */ 1137 dfa->multibyte_prop[dfa->tindex] = 3; 1138 } 1139#endif 1140 |
|
817 REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex); 818 dfa->tokens[dfa->tindex++] = t; 819 820 switch (t) 821 { 822 case QMARK: 823 case STAR: 824 case PLUS: --- 24 unchanged lines hidden (view full) --- 849 branch: 850 branch closure 851 closure 852 853 closure: 854 closure QMARK 855 closure STAR 856 closure PLUS | 1141 REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex); 1142 dfa->tokens[dfa->tindex++] = t; 1143 1144 switch (t) 1145 { 1146 case QMARK: 1147 case STAR: 1148 case PLUS: --- 24 unchanged lines hidden (view full) --- 1173 branch: 1174 branch closure 1175 closure 1176 1177 closure: 1178 closure QMARK 1179 closure STAR 1180 closure PLUS |
1181 closure REPMN |
|
857 atom 858 859 atom: 860 <normal character> | 1182 atom 1183 1184 atom: 1185 <normal character> |
1186 <multibyte character> 1187 ANYCHAR 1188 MBCSET |
|
861 CSET 862 BACKREF 863 BEGLINE 864 ENDLINE 865 BEGWORD 866 ENDWORD 867 LIMWORD 868 NOTLIMWORD | 1189 CSET 1190 BACKREF 1191 BEGLINE 1192 ENDLINE 1193 BEGWORD 1194 ENDWORD 1195 LIMWORD 1196 NOTLIMWORD |
1197 CRANGE 1198 LPAREN regexp RPAREN |
|
869 <empty> 870 871 The parser builds a parse tree in postfix form in an array of tokens. */ 872 873static void 874atom (void) 875{ 876 if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF 877 || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD | 1199 <empty> 1200 1201 The parser builds a parse tree in postfix form in an array of tokens. */ 1202 1203static void 1204atom (void) 1205{ 1206 if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF 1207 || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD |
1208#ifdef MBS_SUPPORT 1209 || tok == ANYCHAR || tok == MBCSET /* MB_CUR_MAX > 1 */ 1210#endif /* MBS_SUPPORT */ |
|
878 || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD) 879 { 880 addtok(tok); 881 tok = lex(); | 1211 || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD) 1212 { 1213 addtok(tok); 1214 tok = lex(); |
1215#ifdef MBS_SUPPORT 1216 /* We treat a multibyte character as a single atom, so that DFA 1217 can treat a multibyte character as a single expression. 1218 1219 e.g. We construct following tree from "<mb1><mb2>". 1220 <mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT> 1221 <mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT> 1222 */ 1223 if (MB_CUR_MAX > 1) 1224 { 1225 while (cur_mb_index > 1 && tok >= 0 && tok < NOTCHAR) 1226 { 1227 addtok(tok); 1228 addtok(CAT); 1229 tok = lex(); 1230 } 1231 } 1232#endif /* MBS_SUPPORT */ |
|
882 } | 1233 } |
1234 else if (tok == CRANGE) 1235 { 1236 /* A character range like "[a-z]" in a locale other than "C" or 1237 "POSIX". This range might any sequence of one or more 1238 characters. Unfortunately the POSIX locale primitives give 1239 us no practical way to find what character sequences might be 1240 matched. Treat this approximately like "(.\1)" -- i.e. match 1241 one character, and then punt to the full matcher. */ 1242 charclass ccl; 1243 zeroset (ccl); 1244 notset (ccl); 1245 addtok (CSET + charclass_index (ccl)); 1246 addtok (BACKREF); 1247 addtok (CAT); 1248 tok = lex (); 1249 } |
|
883 else if (tok == LPAREN) 884 { 885 tok = lex(); 886 regexp(0); 887 if (tok != RPAREN) 888 dfaerror(_("Unbalanced (")); 889 tok = lex(); 890 } --- 93 unchanged lines hidden (view full) --- 984 addtok(OR); 985 } 986} 987 988/* Main entry point for the parser. S is a string to be parsed, len is the 989 length of the string, so s can include NUL characters. D is a pointer to 990 the struct dfa to parse into. */ 991void | 1250 else if (tok == LPAREN) 1251 { 1252 tok = lex(); 1253 regexp(0); 1254 if (tok != RPAREN) 1255 dfaerror(_("Unbalanced (")); 1256 tok = lex(); 1257 } --- 93 unchanged lines hidden (view full) --- 1351 addtok(OR); 1352 } 1353} 1354 1355/* Main entry point for the parser. S is a string to be parsed, len is the 1356 length of the string, so s can include NUL characters. D is a pointer to 1357 the struct dfa to parse into. */ 1358void |
992dfaparse (char *s, size_t len, struct dfa *d) | 1359dfaparse (char const *s, size_t len, struct dfa *d) |
993{ 994 dfa = d; 995 lexstart = lexptr = s; 996 lexleft = len; 997 lasttok = END; 998 laststart = 1; 999 parens = 0; | 1360{ 1361 dfa = d; 1362 lexstart = lexptr = s; 1363 lexleft = len; 1364 lasttok = END; 1365 laststart = 1; 1366 parens = 0; |
1367#if ENABLE_NLS 1368 hard_LC_COLLATE = hard_locale (LC_COLLATE); 1369#endif 1370#ifdef MBS_SUPPORT 1371 if (MB_CUR_MAX > 1) 1372 { 1373 cur_mb_index = 0; 1374 cur_mb_len = 0; 1375 memset(&mbs, 0, sizeof(mbstate_t)); 1376 } 1377#endif /* MBS_SUPPORT */ |
|
1000 1001 if (! syntax_bits_set) 1002 dfaerror(_("No syntax specified")); 1003 1004 tok = lex(); 1005 depth = d->depth; 1006 1007 regexp(1); --- 9 unchanged lines hidden (view full) --- 1017 1018 ++d->nregexps; 1019} 1020 1021/* Some primitives for operating on sets of positions. */ 1022 1023/* Copy one set to another; the destination must be large enough. */ 1024static void | 1378 1379 if (! syntax_bits_set) 1380 dfaerror(_("No syntax specified")); 1381 1382 tok = lex(); 1383 depth = d->depth; 1384 1385 regexp(1); --- 9 unchanged lines hidden (view full) --- 1395 1396 ++d->nregexps; 1397} 1398 1399/* Some primitives for operating on sets of positions. */ 1400 1401/* Copy one set to another; the destination must be large enough. */ 1402static void |
1025copy (position_set *src, position_set *dst) | 1403copy (position_set const *src, position_set *dst) |
1026{ 1027 int i; 1028 1029 for (i = 0; i < src->nelem; ++i) 1030 dst->elems[i] = src->elems[i]; 1031 dst->nelem = src->nelem; 1032} 1033 --- 22 unchanged lines hidden (view full) --- 1056 t1 = t2; 1057 } 1058 } 1059} 1060 1061/* Merge two sets of positions into a third. The result is exactly as if 1062 the positions of both sets were inserted into an initially empty set. */ 1063static void | 1404{ 1405 int i; 1406 1407 for (i = 0; i < src->nelem; ++i) 1408 dst->elems[i] = src->elems[i]; 1409 dst->nelem = src->nelem; 1410} 1411 --- 22 unchanged lines hidden (view full) --- 1434 t1 = t2; 1435 } 1436 } 1437} 1438 1439/* Merge two sets of positions into a third. The result is exactly as if 1440 the positions of both sets were inserted into an initially empty set. */ 1441static void |
1064merge (position_set *s1, position_set *s2, position_set *m) | 1442merge (position_set const *s1, position_set const *s2, position_set *m) |
1065{ 1066 int i = 0, j = 0; 1067 1068 m->nelem = 0; 1069 while (i < s1->nelem && j < s2->nelem) 1070 if (s1->elems[i].index > s2->elems[j].index) 1071 m->elems[m->nelem++] = s1->elems[i++]; 1072 else if (s1->elems[i].index < s2->elems[j].index) --- 23 unchanged lines hidden (view full) --- 1096 s->elems[i] = s->elems[i + 1]; 1097} 1098 1099/* Find the index of the state corresponding to the given position set with 1100 the given preceding context, or create a new state if there is no such 1101 state. Newline and letter tell whether we got here on a newline or 1102 letter, respectively. */ 1103static int | 1443{ 1444 int i = 0, j = 0; 1445 1446 m->nelem = 0; 1447 while (i < s1->nelem && j < s2->nelem) 1448 if (s1->elems[i].index > s2->elems[j].index) 1449 m->elems[m->nelem++] = s1->elems[i++]; 1450 else if (s1->elems[i].index < s2->elems[j].index) --- 23 unchanged lines hidden (view full) --- 1474 s->elems[i] = s->elems[i + 1]; 1475} 1476 1477/* Find the index of the state corresponding to the given position set with 1478 the given preceding context, or create a new state if there is no such 1479 state. Newline and letter tell whether we got here on a newline or 1480 letter, respectively. */ 1481static int |
1104state_index (struct dfa *d, position_set *s, int newline, int letter) | 1482state_index (struct dfa *d, position_set const *s, int newline, int letter) |
1105{ 1106 int hash = 0; 1107 int constraint; 1108 int i, j; 1109 1110 newline = newline ? 1 : 0; 1111 letter = letter ? 1 : 0; 1112 --- 20 unchanged lines hidden (view full) --- 1133 d->states[i].hash = hash; 1134 MALLOC(d->states[i].elems.elems, position, s->nelem); 1135 copy(s, &d->states[i].elems); 1136 d->states[i].newline = newline; 1137 d->states[i].letter = letter; 1138 d->states[i].backref = 0; 1139 d->states[i].constraint = 0; 1140 d->states[i].first_end = 0; | 1483{ 1484 int hash = 0; 1485 int constraint; 1486 int i, j; 1487 1488 newline = newline ? 1 : 0; 1489 letter = letter ? 1 : 0; 1490 --- 20 unchanged lines hidden (view full) --- 1511 d->states[i].hash = hash; 1512 MALLOC(d->states[i].elems.elems, position, s->nelem); 1513 copy(s, &d->states[i].elems); 1514 d->states[i].newline = newline; 1515 d->states[i].letter = letter; 1516 d->states[i].backref = 0; 1517 d->states[i].constraint = 0; 1518 d->states[i].first_end = 0; |
1519#ifdef MBS_SUPPORT 1520 if (MB_CUR_MAX > 1) 1521 d->states[i].mbps.nelem = 0; 1522#endif |
|
1141 for (j = 0; j < s->nelem; ++j) 1142 if (d->tokens[s->elems[j].index] < 0) 1143 { 1144 constraint = s->elems[j].constraint; 1145 if (SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0) 1146 || SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1) 1147 || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0) 1148 || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1)) --- 13 unchanged lines hidden (view full) --- 1162} 1163 1164/* Find the epsilon closure of a set of positions. If any position of the set 1165 contains a symbol that matches the empty string in some context, replace 1166 that position with the elements of its follow labeled with an appropriate 1167 constraint. Repeat exhaustively until no funny positions are left. 1168 S->elems must be large enough to hold the result. */ 1169static void | 1523 for (j = 0; j < s->nelem; ++j) 1524 if (d->tokens[s->elems[j].index] < 0) 1525 { 1526 constraint = s->elems[j].constraint; 1527 if (SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0) 1528 || SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1) 1529 || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0) 1530 || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1)) --- 13 unchanged lines hidden (view full) --- 1544} 1545 1546/* Find the epsilon closure of a set of positions. If any position of the set 1547 contains a symbol that matches the empty string in some context, replace 1548 that position with the elements of its follow labeled with an appropriate 1549 constraint. Repeat exhaustively until no funny positions are left. 1550 S->elems must be large enough to hold the result. */ 1551static void |
1170epsclosure (position_set *s, struct dfa *d) | 1552epsclosure (position_set *s, struct dfa const *d) |
1171{ 1172 int i, j; 1173 int *visited; 1174 position p, old; 1175 1176 MALLOC(visited, int, d->tindex); 1177 for (i = 0; i < d->tindex; ++i) 1178 visited[i] = 0; 1179 1180 for (i = 0; i < s->nelem; ++i) 1181 if (d->tokens[s->elems[i].index] >= NOTCHAR 1182 && d->tokens[s->elems[i].index] != BACKREF | 1553{ 1554 int i, j; 1555 int *visited; 1556 position p, old; 1557 1558 MALLOC(visited, int, d->tindex); 1559 for (i = 0; i < d->tindex; ++i) 1560 visited[i] = 0; 1561 1562 for (i = 0; i < s->nelem; ++i) 1563 if (d->tokens[s->elems[i].index] >= NOTCHAR 1564 && d->tokens[s->elems[i].index] != BACKREF |
1565#ifdef MBS_SUPPORT 1566 && d->tokens[s->elems[i].index] != ANYCHAR 1567 && d->tokens[s->elems[i].index] != MBCSET 1568#endif |
|
1183 && d->tokens[s->elems[i].index] < CSET) 1184 { 1185 old = s->elems[i]; 1186 p.constraint = old.constraint; 1187 delete(s->elems[i], s); 1188 if (visited[old.index]) 1189 { 1190 --i; --- 265 unchanged lines hidden (view full) --- 1456 putc('\n', stderr); 1457 } 1458#endif 1459 1460 /* For each follow set that is the follow set of a real position, replace 1461 it with its epsilon closure. */ 1462 for (i = 0; i < d->tindex; ++i) 1463 if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF | 1569 && d->tokens[s->elems[i].index] < CSET) 1570 { 1571 old = s->elems[i]; 1572 p.constraint = old.constraint; 1573 delete(s->elems[i], s); 1574 if (visited[old.index]) 1575 { 1576 --i; --- 265 unchanged lines hidden (view full) --- 1842 putc('\n', stderr); 1843 } 1844#endif 1845 1846 /* For each follow set that is the follow set of a real position, replace 1847 it with its epsilon closure. */ 1848 for (i = 0; i < d->tindex; ++i) 1849 if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF |
1850#ifdef MBS_SUPPORT 1851 || d->tokens[i] == ANYCHAR 1852 || d->tokens[i] == MBCSET 1853#endif |
|
1464 || d->tokens[i] >= CSET) 1465 { 1466#ifdef DEBUG 1467 fprintf(stderr, "follows(%d:", i); 1468 prtok(d->tokens[i]); 1469 fprintf(stderr, "):"); 1470 for (j = d->follows[i].nelem - 1; j >= 0; --j) 1471 { --- 85 unchanged lines hidden (view full) --- 1557 position_set follows; /* Union of the follows of some group. */ 1558 position_set tmp; /* Temporary space for merging sets. */ 1559 int state; /* New state. */ 1560 int wants_newline; /* New state wants to know newline context. */ 1561 int state_newline; /* New state on a newline transition. */ 1562 int wants_letter; /* New state wants to know letter context. */ 1563 int state_letter; /* New state on a letter transition. */ 1564 static int initialized; /* Flag for static initialization. */ | 1854 || d->tokens[i] >= CSET) 1855 { 1856#ifdef DEBUG 1857 fprintf(stderr, "follows(%d:", i); 1858 prtok(d->tokens[i]); 1859 fprintf(stderr, "):"); 1860 for (j = d->follows[i].nelem - 1; j >= 0; --j) 1861 { --- 85 unchanged lines hidden (view full) --- 1947 position_set follows; /* Union of the follows of some group. */ 1948 position_set tmp; /* Temporary space for merging sets. */ 1949 int state; /* New state. */ 1950 int wants_newline; /* New state wants to know newline context. */ 1951 int state_newline; /* New state on a newline transition. */ 1952 int wants_letter; /* New state wants to know letter context. */ 1953 int state_letter; /* New state on a letter transition. */ 1954 static int initialized; /* Flag for static initialization. */ |
1955#ifdef MBS_SUPPORT 1956 int next_isnt_1st_byte = 0; /* Flag If we can't add state0. */ 1957#endif |
|
1565 int i, j, k; 1566 1567 /* Initialize the set of letters, if necessary. */ 1568 if (! initialized) 1569 { 1570 initialized = 1; 1571 for (i = 0; i < NOTCHAR; ++i) 1572 if (IS_WORD_CONSTITUENT(i)) --- 5 unchanged lines hidden (view full) --- 1578 1579 for (i = 0; i < d->states[s].elems.nelem; ++i) 1580 { 1581 pos = d->states[s].elems.elems[i]; 1582 if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR) 1583 setbit(d->tokens[pos.index], matches); 1584 else if (d->tokens[pos.index] >= CSET) 1585 copyset(d->charclasses[d->tokens[pos.index] - CSET], matches); | 1958 int i, j, k; 1959 1960 /* Initialize the set of letters, if necessary. */ 1961 if (! initialized) 1962 { 1963 initialized = 1; 1964 for (i = 0; i < NOTCHAR; ++i) 1965 if (IS_WORD_CONSTITUENT(i)) --- 5 unchanged lines hidden (view full) --- 1971 1972 for (i = 0; i < d->states[s].elems.nelem; ++i) 1973 { 1974 pos = d->states[s].elems.elems[i]; 1975 if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR) 1976 setbit(d->tokens[pos.index], matches); 1977 else if (d->tokens[pos.index] >= CSET) 1978 copyset(d->charclasses[d->tokens[pos.index] - CSET], matches); |
1979#ifdef MBS_SUPPORT 1980 else if (d->tokens[pos.index] == ANYCHAR 1981 || d->tokens[pos.index] == MBCSET) 1982 /* MB_CUR_MAX > 1 */ 1983 { 1984 /* ANYCHAR and MBCSET must match with a single character, so we 1985 must put it to d->states[s].mbps, which contains the positions 1986 which can match with a single character not a byte. */ 1987 if (d->states[s].mbps.nelem == 0) 1988 { 1989 MALLOC(d->states[s].mbps.elems, position, 1990 d->states[s].elems.nelem); 1991 } 1992 insert(pos, &(d->states[s].mbps)); 1993 continue; 1994 } 1995#endif /* MBS_SUPPORT */ |
|
1586 else 1587 continue; 1588 1589 /* Some characters may need to be eliminated from matches because 1590 they fail in the current context. */ 1591 if (pos.constraint != 0xFF) 1592 { 1593 if (! MATCHES_NEWLINE_CONTEXT(pos.constraint, --- 120 unchanged lines hidden (view full) --- 1714 follows.nelem = 0; 1715 1716 /* Find the union of the follows of the positions of the group. 1717 This is a hideously inefficient loop. Fix it someday. */ 1718 for (j = 0; j < grps[i].nelem; ++j) 1719 for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k) 1720 insert(d->follows[grps[i].elems[j].index].elems[k], &follows); 1721 | 1996 else 1997 continue; 1998 1999 /* Some characters may need to be eliminated from matches because 2000 they fail in the current context. */ 2001 if (pos.constraint != 0xFF) 2002 { 2003 if (! MATCHES_NEWLINE_CONTEXT(pos.constraint, --- 120 unchanged lines hidden (view full) --- 2124 follows.nelem = 0; 2125 2126 /* Find the union of the follows of the positions of the group. 2127 This is a hideously inefficient loop. Fix it someday. */ 2128 for (j = 0; j < grps[i].nelem; ++j) 2129 for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k) 2130 insert(d->follows[grps[i].elems[j].index].elems[k], &follows); 2131 |
2132#ifdef MBS_SUPPORT 2133 if (MB_CUR_MAX > 1) 2134 { 2135 /* If a token in follows.elems is not 1st byte of a multibyte 2136 character, or the states of follows must accept the bytes 2137 which are not 1st byte of the multibyte character. 2138 Then, if a state of follows encounter a byte, it must not be 2139 a 1st byte of a multibyte character nor singlebyte character. 2140 We cansel to add state[0].follows to next state, because 2141 state[0] must accept 1st-byte 2142 2143 For example, we assume <sb a> is a certain singlebyte 2144 character, <mb A> is a certain multibyte character, and the 2145 codepoint of <sb a> equals the 2nd byte of the codepoint of 2146 <mb A>. 2147 When state[0] accepts <sb a>, state[i] transit to state[i+1] 2148 by accepting accepts 1st byte of <mb A>, and state[i+1] 2149 accepts 2nd byte of <mb A>, if state[i+1] encounter the 2150 codepoint of <sb a>, it must not be <sb a> but 2nd byte of 2151 <mb A>, so we can not add state[0]. */ 2152 2153 next_isnt_1st_byte = 0; 2154 for (j = 0; j < follows.nelem; ++j) 2155 { 2156 if (!(d->multibyte_prop[follows.elems[j].index] & 1)) 2157 { 2158 next_isnt_1st_byte = 1; 2159 break; 2160 } 2161 } 2162 } 2163#endif 2164 |
|
1722 /* If we are building a searching matcher, throw in the positions 1723 of state 0 as well. */ | 2165 /* If we are building a searching matcher, throw in the positions 2166 of state 0 as well. */ |
2167#ifdef MBS_SUPPORT 2168 if (d->searchflag && (MB_CUR_MAX == 1 || !next_isnt_1st_byte)) 2169#else |
|
1724 if (d->searchflag) | 2170 if (d->searchflag) |
2171#endif |
|
1725 for (j = 0; j < d->states[0].elems.nelem; ++j) 1726 insert(d->states[0].elems.elems[j], &follows); 1727 1728 /* Find out if the new state will want any context information. */ 1729 wants_newline = 0; 1730 if (tstbit(eolbyte, labels[i])) 1731 for (j = 0; j < follows.nelem; ++j) 1732 if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint)) --- 100 unchanged lines hidden (view full) --- 1833 int oldalloc = d->tralloc; 1834 1835 while (trans[i] >= d->tralloc) 1836 d->tralloc *= 2; 1837 REALLOC(d->realtrans, int *, d->tralloc + 1); 1838 d->trans = d->realtrans + 1; 1839 REALLOC(d->fails, int *, d->tralloc); 1840 REALLOC(d->success, int, d->tralloc); | 2172 for (j = 0; j < d->states[0].elems.nelem; ++j) 2173 insert(d->states[0].elems.elems[j], &follows); 2174 2175 /* Find out if the new state will want any context information. */ 2176 wants_newline = 0; 2177 if (tstbit(eolbyte, labels[i])) 2178 for (j = 0; j < follows.nelem; ++j) 2179 if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint)) --- 100 unchanged lines hidden (view full) --- 2280 int oldalloc = d->tralloc; 2281 2282 while (trans[i] >= d->tralloc) 2283 d->tralloc *= 2; 2284 REALLOC(d->realtrans, int *, d->tralloc + 1); 2285 d->trans = d->realtrans + 1; 2286 REALLOC(d->fails, int *, d->tralloc); 2287 REALLOC(d->success, int, d->tralloc); |
1841 REALLOC(d->newlines, int, d->tralloc); | |
1842 while (oldalloc < d->tralloc) 1843 { 1844 d->trans[oldalloc] = NULL; 1845 d->fails[oldalloc++] = NULL; 1846 } 1847 } 1848 | 2288 while (oldalloc < d->tralloc) 2289 { 2290 d->trans[oldalloc] = NULL; 2291 d->fails[oldalloc++] = NULL; 2292 } 2293 } 2294 |
1849 /* Keep the newline transition in a special place so we can use it as 1850 a sentinel. */ 1851 d->newlines[s] = trans[eolbyte]; | 2295 /* Newline is a sentinel. */ |
1852 trans[eolbyte] = -1; 1853 1854 if (ACCEPTING(s, *d)) 1855 d->fails[s] = trans; 1856 else 1857 d->trans[s] = trans; 1858} 1859 1860static void 1861build_state_zero (struct dfa *d) 1862{ 1863 d->tralloc = 1; 1864 d->trcount = 0; 1865 CALLOC(d->realtrans, int *, d->tralloc + 1); 1866 d->trans = d->realtrans + 1; 1867 CALLOC(d->fails, int *, d->tralloc); 1868 MALLOC(d->success, int, d->tralloc); | 2296 trans[eolbyte] = -1; 2297 2298 if (ACCEPTING(s, *d)) 2299 d->fails[s] = trans; 2300 else 2301 d->trans[s] = trans; 2302} 2303 2304static void 2305build_state_zero (struct dfa *d) 2306{ 2307 d->tralloc = 1; 2308 d->trcount = 0; 2309 CALLOC(d->realtrans, int *, d->tralloc + 1); 2310 d->trans = d->realtrans + 1; 2311 CALLOC(d->fails, int *, d->tralloc); 2312 MALLOC(d->success, int, d->tralloc); |
1869 MALLOC(d->newlines, int, d->tralloc); | |
1870 build_state(0, d); 1871} 1872 | 2313 build_state(0, d); 2314} 2315 |
2316#ifdef MBS_SUPPORT 2317/* Multibyte character handling sub-routins for dfaexec. */ 2318 2319/* Initial state may encounter the byte which is not a singlebyte character 2320 nor 1st byte of a multibyte character. But it is incorrect for initial 2321 state to accept such a byte. 2322 For example, in sjis encoding the regular expression like "\\" accepts 2323 the codepoint 0x5c, but should not accept the 2nd byte of the codepoint 2324 0x815c. Then Initial state must skip the bytes which are not a singlebyte 2325 character nor 1st byte of a multibyte character. */ 2326#define SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p) \ 2327 if (s == 0) \ 2328 { \ 2329 while (inputwcs[p - buf_begin] == 0 \ 2330 && mblen_buf[p - buf_begin] > 0 \ 2331 && p < buf_end) \ 2332 ++p; \ 2333 if (p >= end) \ 2334 { \ 2335 free(mblen_buf); \ 2336 free(inputwcs); \ 2337 return (size_t) -1; \ 2338 } \ 2339 } 2340 2341static void 2342realloc_trans_if_necessary(struct dfa *d, int new_state) 2343{ 2344 /* Make sure that the trans and fail arrays are allocated large enough 2345 to hold a pointer for the new state. */ 2346 if (new_state >= d->tralloc) 2347 { 2348 int oldalloc = d->tralloc; 2349 2350 while (new_state >= d->tralloc) 2351 d->tralloc *= 2; 2352 REALLOC(d->realtrans, int *, d->tralloc + 1); 2353 d->trans = d->realtrans + 1; 2354 REALLOC(d->fails, int *, d->tralloc); 2355 REALLOC(d->success, int, d->tralloc); 2356 while (oldalloc < d->tralloc) 2357 { 2358 d->trans[oldalloc] = NULL; 2359 d->fails[oldalloc++] = NULL; 2360 } 2361 } 2362} 2363 2364/* Return values of transit_state_singlebyte(), and 2365 transit_state_consume_1char. */ 2366typedef enum 2367{ 2368 TRANSIT_STATE_IN_PROGRESS, /* State transition has not finished. */ 2369 TRANSIT_STATE_DONE, /* State transition has finished. */ 2370 TRANSIT_STATE_END_BUFFER /* Reach the end of the buffer. */ 2371} status_transit_state; 2372 2373/* Consume a single byte and transit state from 's' to '*next_state'. 2374 This function is almost same as the state transition routin in dfaexec(). 2375 But state transition is done just once, otherwise matching succeed or 2376 reach the end of the buffer. */ 2377static status_transit_state 2378transit_state_singlebyte (struct dfa *d, int s, unsigned char const *p, 2379 int *next_state) 2380{ 2381 int *t; 2382 int works = s; 2383 2384 status_transit_state rval = TRANSIT_STATE_IN_PROGRESS; 2385 2386 while (rval == TRANSIT_STATE_IN_PROGRESS) 2387 { 2388 if ((t = d->trans[works]) != NULL) 2389 { 2390 works = t[*p]; 2391 rval = TRANSIT_STATE_DONE; 2392 if (works < 0) 2393 works = 0; 2394 } 2395 else if (works < 0) 2396 { 2397 if (p == buf_end) 2398 /* At the moment, it must not happen. */ 2399 return TRANSIT_STATE_END_BUFFER; 2400 works = 0; 2401 } 2402 else if (d->fails[works]) 2403 { 2404 works = d->fails[works][*p]; 2405 rval = TRANSIT_STATE_DONE; 2406 } 2407 else 2408 { 2409 build_state(works, d); 2410 } 2411 } 2412 *next_state = works; 2413 return rval; 2414} 2415 2416/* Check whether period can match or not in the current context. If it can, 2417 return the amount of the bytes with which period can match, otherwise 2418 return 0. 2419 `pos' is the position of the period. `index' is the index from the 2420 buf_begin, and it is the current position in the buffer. */ 2421static int 2422match_anychar (struct dfa *d, int s, position pos, int index) 2423{ 2424 int newline = 0; 2425 int letter = 0; 2426 wchar_t wc; 2427 int mbclen; 2428 2429 wc = inputwcs[index]; 2430 mbclen = (mblen_buf[index] == 0)? 1 : mblen_buf[index]; 2431 2432 /* Check context. */ 2433 if (wc == (wchar_t)eolbyte) 2434 { 2435 if (!(syntax_bits & RE_DOT_NEWLINE)) 2436 return 0; 2437 newline = 1; 2438 } 2439 else if (wc == (wchar_t)'\0') 2440 { 2441 if (syntax_bits & RE_DOT_NOT_NULL) 2442 return 0; 2443 newline = 1; 2444 } 2445 2446 if (iswalnum(wc) || wc == L'_') 2447 letter = 1; 2448 2449 if (!SUCCEEDS_IN_CONTEXT(pos.constraint, d->states[s].newline, 2450 newline, d->states[s].letter, letter)) 2451 return 0; 2452 2453 return mbclen; 2454} 2455 2456/* Check whether bracket expression can match or not in the current context. 2457 If it can, return the amount of the bytes with which expression can match, 2458 otherwise return 0. 2459 `pos' is the position of the bracket expression. `index' is the index 2460 from the buf_begin, and it is the current position in the buffer. */ 2461int 2462match_mb_charset (struct dfa *d, int s, position pos, int index) 2463{ 2464 int i; 2465 int match; /* Flag which represent that matching succeed. */ 2466 int match_len; /* Length of the character (or collating element) 2467 with which this operator match. */ 2468 int op_len; /* Length of the operator. */ 2469 char buffer[128]; 2470 wchar_t wcbuf[6]; 2471 2472 /* Pointer to the structure to which we are currently reffering. */ 2473 struct mb_char_classes *work_mbc; 2474 2475 int newline = 0; 2476 int letter = 0; 2477 wchar_t wc; /* Current reffering character. */ 2478 2479 wc = inputwcs[index]; 2480 2481 /* Check context. */ 2482 if (wc == (wchar_t)eolbyte) 2483 { 2484 if (!(syntax_bits & RE_DOT_NEWLINE)) 2485 return 0; 2486 newline = 1; 2487 } 2488 else if (wc == (wchar_t)'\0') 2489 { 2490 if (syntax_bits & RE_DOT_NOT_NULL) 2491 return 0; 2492 newline = 1; 2493 } 2494 if (iswalnum(wc) || wc == L'_') 2495 letter = 1; 2496 if (!SUCCEEDS_IN_CONTEXT(pos.constraint, d->states[s].newline, 2497 newline, d->states[s].letter, letter)) 2498 return 0; 2499 2500 /* Assign the current reffering operator to work_mbc. */ 2501 work_mbc = &(d->mbcsets[(d->multibyte_prop[pos.index]) >> 2]); 2502 match = !work_mbc->invert; 2503 match_len = (mblen_buf[index] == 0)? 1 : mblen_buf[index]; 2504 2505 /* match with a character class? */ 2506 for (i = 0; i<work_mbc->nch_classes; i++) 2507 { 2508 if (iswctype((wint_t)wc, work_mbc->ch_classes[i])) 2509 goto charset_matched; 2510 } 2511 2512 strncpy(buffer, buf_begin + index, match_len); 2513 buffer[match_len] = '\0'; 2514 2515 /* match with an equivalent class? */ 2516 for (i = 0; i<work_mbc->nequivs; i++) 2517 { 2518 op_len = strlen(work_mbc->equivs[i]); 2519 strncpy(buffer, buf_begin + index, op_len); 2520 buffer[op_len] = '\0'; 2521 if (strcoll(work_mbc->equivs[i], buffer) == 0) 2522 { 2523 match_len = op_len; 2524 goto charset_matched; 2525 } 2526 } 2527 2528 /* match with a collating element? */ 2529 for (i = 0; i<work_mbc->ncoll_elems; i++) 2530 { 2531 op_len = strlen(work_mbc->coll_elems[i]); 2532 strncpy(buffer, buf_begin + index, op_len); 2533 buffer[op_len] = '\0'; 2534 2535 if (strcoll(work_mbc->coll_elems[i], buffer) == 0) 2536 { 2537 match_len = op_len; 2538 goto charset_matched; 2539 } 2540 } 2541 2542 wcbuf[0] = wc; 2543 wcbuf[1] = wcbuf[3] = wcbuf[5] = '\0'; 2544 2545 /* match with a range? */ 2546 for (i = 0; i<work_mbc->nranges; i++) 2547 { 2548 wcbuf[2] = work_mbc->range_sts[i]; 2549 wcbuf[4] = work_mbc->range_ends[i]; 2550 2551 if (wcscoll(wcbuf, wcbuf+2) >= 0 && 2552 wcscoll(wcbuf+4, wcbuf) >= 0) 2553 goto charset_matched; 2554 } 2555 2556 /* match with a character? */ 2557 for (i = 0; i<work_mbc->nchars; i++) 2558 { 2559 if (wc == work_mbc->chars[i]) 2560 goto charset_matched; 2561 } 2562 2563 match = !match; 2564 2565 charset_matched: 2566 return match ? match_len : 0; 2567} 2568 2569/* Check each of `d->states[s].mbps.elem' can match or not. Then return the 2570 array which corresponds to `d->states[s].mbps.elem' and each element of 2571 the array contains the amount of the bytes with which the element can 2572 match. 2573 `index' is the index from the buf_begin, and it is the current position 2574 in the buffer. 2575 Caller MUST free the array which this function return. */ 2576static int* 2577check_matching_with_multibyte_ops (struct dfa *d, int s, int index) 2578{ 2579 int i; 2580 int* rarray; 2581 2582 MALLOC(rarray, int, d->states[s].mbps.nelem); 2583 for (i = 0; i < d->states[s].mbps.nelem; ++i) 2584 { 2585 position pos = d->states[s].mbps.elems[i]; 2586 switch(d->tokens[pos.index]) 2587 { 2588 case ANYCHAR: 2589 rarray[i] = match_anychar(d, s, pos, index); 2590 break; 2591 case MBCSET: 2592 rarray[i] = match_mb_charset(d, s, pos, index); 2593 break; 2594 default: 2595 break; /* can not happen. */ 2596 } 2597 } 2598 return rarray; 2599} 2600 2601/* Consume a single character and enumerate all of the positions which can 2602 be next position from the state `s'. 2603 `match_lens' is the input. It can be NULL, but it can also be the output 2604 of check_matching_with_multibyte_ops() for optimization. 2605 `mbclen' and `pps' are the output. `mbclen' is the length of the 2606 character consumed, and `pps' is the set this function enumerate. */ 2607static status_transit_state 2608transit_state_consume_1char (struct dfa *d, int s, unsigned char const **pp, 2609 int *match_lens, int *mbclen, position_set *pps) 2610{ 2611 int i, j; 2612 int s1, s2; 2613 int* work_mbls; 2614 status_transit_state rs = TRANSIT_STATE_DONE; 2615 2616 /* Calculate the length of the (single/multi byte) character 2617 to which p points. */ 2618 *mbclen = (mblen_buf[*pp - buf_begin] == 0)? 1 2619 : mblen_buf[*pp - buf_begin]; 2620 2621 /* Calculate the state which can be reached from the state `s' by 2622 consuming `*mbclen' single bytes from the buffer. */ 2623 s1 = s; 2624 for (i = 0; i < *mbclen; i++) 2625 { 2626 s2 = s1; 2627 rs = transit_state_singlebyte(d, s2, (*pp)++, &s1); 2628 } 2629 /* Copy the positions contained by `s1' to the set `pps'. */ 2630 copy(&(d->states[s1].elems), pps); 2631 2632 /* Check (inputed)match_lens, and initialize if it is NULL. */ 2633 if (match_lens == NULL && d->states[s].mbps.nelem != 0) 2634 work_mbls = check_matching_with_multibyte_ops(d, s, *pp - buf_begin); 2635 else 2636 work_mbls = match_lens; 2637 2638 /* Add all of the positions which can be reached from `s' by consuming 2639 a single character. */ 2640 for (i = 0; i < d->states[s].mbps.nelem ; i++) 2641 { 2642 if (work_mbls[i] == *mbclen) 2643 for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem; 2644 j++) 2645 insert(d->follows[d->states[s].mbps.elems[i].index].elems[j], 2646 pps); 2647 } 2648 2649 if (match_lens == NULL && work_mbls != NULL) 2650 free(work_mbls); 2651 return rs; 2652} 2653 2654/* Transit state from s, then return new state and update the pointer of the 2655 buffer. This function is for some operator which can match with a multi- 2656 byte character or a collating element(which may be multi characters). */ 2657static int 2658transit_state (struct dfa *d, int s, unsigned char const **pp) 2659{ 2660 int s1; 2661 int mbclen; /* The length of current input multibyte character. */ 2662 int maxlen = 0; 2663 int i, j; 2664 int *match_lens = NULL; 2665 int nelem = d->states[s].mbps.nelem; /* Just a alias. */ 2666 position_set follows; 2667 unsigned char const *p1 = *pp; 2668 status_transit_state rs; 2669 wchar_t wc; 2670 2671 if (nelem > 0) 2672 /* This state has (a) multibyte operator(s). 2673 We check whether each of them can match or not. */ 2674 { 2675 /* Note: caller must free the return value of this function. */ 2676 match_lens = check_matching_with_multibyte_ops(d, s, *pp - buf_begin); 2677 2678 for (i = 0; i < nelem; i++) 2679 /* Search the operator which match the longest string, 2680 in this state. */ 2681 { 2682 if (match_lens[i] > maxlen) 2683 maxlen = match_lens[i]; 2684 } 2685 } 2686 2687 if (nelem == 0 || maxlen == 0) 2688 /* This state has no multibyte operator which can match. 2689 We need to check only one singlebyte character. */ 2690 { 2691 status_transit_state rs; 2692 rs = transit_state_singlebyte(d, s, *pp, &s1); 2693 2694 /* We must update the pointer if state transition succeeded. */ 2695 if (rs == TRANSIT_STATE_DONE) 2696 ++*pp; 2697 2698 if (match_lens != NULL) 2699 free(match_lens); 2700 return s1; 2701 } 2702 2703 /* This state has some operators which can match a multibyte character. */ 2704 follows.nelem = 0; 2705 MALLOC(follows.elems, position, d->nleaves); 2706 2707 /* `maxlen' may be longer than the length of a character, because it may 2708 not be a character but a (multi character) collating element. 2709 We enumerate all of the positions which `s' can reach by consuming 2710 `maxlen' bytes. */ 2711 rs = transit_state_consume_1char(d, s, pp, match_lens, &mbclen, &follows); 2712 2713 wc = inputwcs[*pp - mbclen - buf_begin]; 2714 s1 = state_index(d, &follows, wc == L'\n', iswalnum(wc)); 2715 realloc_trans_if_necessary(d, s1); 2716 2717 while (*pp - p1 < maxlen) 2718 { 2719 follows.nelem = 0; 2720 rs = transit_state_consume_1char(d, s1, pp, NULL, &mbclen, &follows); 2721 2722 for (i = 0; i < nelem ; i++) 2723 { 2724 if (match_lens[i] == *pp - p1) 2725 for (j = 0; 2726 j < d->follows[d->states[s1].mbps.elems[i].index].nelem; j++) 2727 insert(d->follows[d->states[s1].mbps.elems[i].index].elems[j], 2728 &follows); 2729 } 2730 2731 wc = inputwcs[*pp - mbclen - buf_begin]; 2732 s1 = state_index(d, &follows, wc == L'\n', iswalnum(wc)); 2733 realloc_trans_if_necessary(d, s1); 2734 } 2735 free(match_lens); 2736 free(follows.elems); 2737 return s1; 2738} 2739 2740#endif 2741 |
|
1873/* Search through a buffer looking for a match to the given struct dfa. 1874 Find the first occurrence of a string matching the regexp in the buffer, | 2742/* Search through a buffer looking for a match to the given struct dfa. 2743 Find the first occurrence of a string matching the regexp in the buffer, |
1875 and the shortest possible version thereof. Return a pointer to the first 1876 character after the match, or NULL if none is found. Begin points to 1877 the beginning of the buffer, and end points to the first character after 1878 its end. We store a newline in *end to act as a sentinel, so end had 1879 better point somewhere valid. Newline is a flag indicating whether to 1880 allow newlines to be in the matching string. If count is non- 1881 NULL it points to a place we're supposed to increment every time we 1882 see a newline. Finally, if backref is non-NULL it points to a place | 2744 and the shortest possible version thereof. Return the offset of the first 2745 character after the match, or (size_t) -1 if none is found. BEGIN points to 2746 the beginning of the buffer, and SIZE is the size of the buffer. If SIZE 2747 is nonzero, BEGIN[SIZE - 1] must be a newline. BACKREF points to a place |
1883 where we're supposed to store a 1 if backreferencing happened and the 1884 match needs to be verified by a backtracking matcher. Otherwise 1885 we store a 0 in *backref. */ | 2748 where we're supposed to store a 1 if backreferencing happened and the 2749 match needs to be verified by a backtracking matcher. Otherwise 2750 we store a 0 in *backref. */ |
1886char * 1887dfaexec (struct dfa *d, char *begin, char *end, 1888 int newline, int *count, int *backref) | 2751size_t 2752dfaexec (struct dfa *d, char const *begin, size_t size, int *backref) |
1889{ | 2753{ |
1890 register int s, s1, tmp; /* Current state. */ 1891 register unsigned char *p; /* Current input character. */ | 2754 register int s; /* Current state. */ 2755 register unsigned char const *p; /* Current input character. */ 2756 register unsigned char const *end; /* One past the last input character. */ |
1892 register int **trans, *t; /* Copy of d->trans so it can be optimized 1893 into a register. */ 1894 register unsigned char eol = eolbyte; /* Likewise for eolbyte. */ 1895 static int sbit[NOTCHAR]; /* Table for anding with d->success. */ 1896 static int sbit_init; 1897 1898 if (! sbit_init) 1899 { 1900 int i; 1901 1902 sbit_init = 1; 1903 for (i = 0; i < NOTCHAR; ++i) 1904 sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1; 1905 sbit[eol] = 4; 1906 } 1907 1908 if (! d->tralloc) 1909 build_state_zero(d); 1910 | 2757 register int **trans, *t; /* Copy of d->trans so it can be optimized 2758 into a register. */ 2759 register unsigned char eol = eolbyte; /* Likewise for eolbyte. */ 2760 static int sbit[NOTCHAR]; /* Table for anding with d->success. */ 2761 static int sbit_init; 2762 2763 if (! sbit_init) 2764 { 2765 int i; 2766 2767 sbit_init = 1; 2768 for (i = 0; i < NOTCHAR; ++i) 2769 sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1; 2770 sbit[eol] = 4; 2771 } 2772 2773 if (! d->tralloc) 2774 build_state_zero(d); 2775 |
1911 s = s1 = 0; 1912 p = (unsigned char *) begin; | 2776 s = 0; 2777 p = (unsigned char const *) begin; 2778 end = p + size; |
1913 trans = d->trans; | 2779 trans = d->trans; |
1914 *end = eol; | |
1915 | 2780 |
2781#ifdef MBS_SUPPORT 2782 if (MB_CUR_MAX > 1) 2783 { 2784 int remain_bytes, i; 2785 buf_begin = begin; 2786 buf_end = end; 2787 2788 /* initialize mblen_buf, and inputwcs. */ 2789 MALLOC(mblen_buf, unsigned char, end - (unsigned char const *)begin + 2); 2790 MALLOC(inputwcs, wchar_t, end - (unsigned char const *)begin + 2); 2791 memset(&mbs, 0, sizeof(mbstate_t)); 2792 remain_bytes = 0; 2793 for (i = 0; i < end - (unsigned char const *)begin + 1; i++) 2794 { 2795 if (remain_bytes == 0) 2796 { 2797 remain_bytes 2798 = mbrtowc(inputwcs + i, begin + i, 2799 end - (unsigned char const *)begin - i + 1, &mbs); 2800 if (remain_bytes <= 1) 2801 { 2802 remain_bytes = 0; 2803 inputwcs[i] = (wchar_t)begin[i]; 2804 mblen_buf[i] = 0; 2805 } 2806 else 2807 { 2808 mblen_buf[i] = remain_bytes; 2809 remain_bytes--; 2810 } 2811 } 2812 else 2813 { 2814 mblen_buf[i] = remain_bytes; 2815 inputwcs[i] = 0; 2816 remain_bytes--; 2817 } 2818 } 2819 mblen_buf[i] = 0; 2820 inputwcs[i] = 0; /* sentinel */ 2821 } 2822#endif /* MBS_SUPPORT */ 2823 |
|
1916 for (;;) 1917 { | 2824 for (;;) 2825 { |
1918 while ((t = trans[s]) != 0) { /* hand-optimized loop */ 1919 s1 = t[*p++]; 1920 if ((t = trans[s1]) == 0) { 1921 tmp = s ; s = s1 ; s1 = tmp ; /* swap */ 1922 break; 1923 } 1924 s = t[*p++]; 1925 } | 2826#ifdef MBS_SUPPORT 2827 if (MB_CUR_MAX > 1) 2828 while ((t = trans[s])) 2829 { 2830 if (d->states[s].mbps.nelem != 0) 2831 { 2832 /* Can match with a multibyte character( and multi character 2833 collating element). */ 2834 unsigned char const *nextp; |
1926 | 2835 |
1927 if (s >= 0 && p <= (unsigned char *) end && d->fails[s]) | 2836 SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p); 2837 2838 nextp = p; 2839 s = transit_state(d, s, &nextp); 2840 p = nextp; 2841 2842 /* Trans table might be updated. */ 2843 trans = d->trans; 2844 } 2845 else 2846 { 2847 SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p); 2848 s = t[*p++]; 2849 } 2850 } 2851 else 2852#endif /* MBS_SUPPORT */ 2853 while ((t = trans[s])) 2854 s = t[*p++]; 2855 2856 if (s < 0) |
1928 { | 2857 { |
2858 if (p == end) 2859 { 2860#ifdef MBS_SUPPORT 2861 if (MB_CUR_MAX > 1) 2862 { 2863 free(mblen_buf); 2864 free(inputwcs); 2865 } 2866#endif /* MBS_SUPPORT */ 2867 return (size_t) -1; 2868 } 2869 s = 0; 2870 } 2871 else if ((t = d->fails[s])) 2872 { |
|
1929 if (d->success[s] & sbit[*p]) 1930 { 1931 if (backref) 1932 *backref = (d->states[s].backref != 0); | 2873 if (d->success[s] & sbit[*p]) 2874 { 2875 if (backref) 2876 *backref = (d->states[s].backref != 0); |
1933 return (char *) p; | 2877#ifdef MBS_SUPPORT 2878 if (MB_CUR_MAX > 1) 2879 { 2880 free(mblen_buf); 2881 free(inputwcs); 2882 } 2883#endif /* MBS_SUPPORT */ 2884 return (char const *) p - begin; |
1934 } 1935 | 2885 } 2886 |
1936 s1 = s; 1937 s = d->fails[s][*p++]; 1938 continue; 1939 } | 2887#ifdef MBS_SUPPORT 2888 if (MB_CUR_MAX > 1) 2889 { 2890 SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p); 2891 if (d->states[s].mbps.nelem != 0) 2892 { 2893 /* Can match with a multibyte character( and multi 2894 character collating element). */ 2895 unsigned char const *nextp; 2896 nextp = p; 2897 s = transit_state(d, s, &nextp); 2898 p = nextp; |
1940 | 2899 |
1941 /* If the previous character was a newline, count it. */ 1942 if (count && (char *) p <= end && p[-1] == eol) 1943 ++*count; 1944 1945 /* Check if we've run off the end of the buffer. */ 1946 if ((char *) p > end) 1947 return NULL; 1948 1949 if (s >= 0) | 2900 /* Trans table might be updated. */ 2901 trans = d->trans; 2902 } 2903 else 2904 s = t[*p++]; 2905 } 2906 else 2907#endif /* MBS_SUPPORT */ 2908 s = t[*p++]; 2909 } 2910 else |
1950 { 1951 build_state(s, d); 1952 trans = d->trans; | 2911 { 2912 build_state(s, d); 2913 trans = d->trans; |
1953 continue; | |
1954 } | 2914 } |
1955 1956 if (p[-1] == eol && newline) 1957 { 1958 s = d->newlines[s1]; 1959 continue; 1960 } 1961 1962 s = 0; | |
1963 } 1964} 1965 1966/* Initialize the components of a dfa that the other routines don't 1967 initialize for themselves. */ 1968void 1969dfainit (struct dfa *d) 1970{ 1971 d->calloc = 1; 1972 MALLOC(d->charclasses, charclass, d->calloc); 1973 d->cindex = 0; 1974 1975 d->talloc = 1; 1976 MALLOC(d->tokens, token, d->talloc); 1977 d->tindex = d->depth = d->nleaves = d->nregexps = 0; | 2915 } 2916} 2917 2918/* Initialize the components of a dfa that the other routines don't 2919 initialize for themselves. */ 2920void 2921dfainit (struct dfa *d) 2922{ 2923 d->calloc = 1; 2924 MALLOC(d->charclasses, charclass, d->calloc); 2925 d->cindex = 0; 2926 2927 d->talloc = 1; 2928 MALLOC(d->tokens, token, d->talloc); 2929 d->tindex = d->depth = d->nleaves = d->nregexps = 0; |
2930#ifdef MBS_SUPPORT 2931 if (MB_CUR_MAX > 1) 2932 { 2933 d->nmultibyte_prop = 1; 2934 MALLOC(d->multibyte_prop, int, d->nmultibyte_prop); 2935 d->nmbcsets = 0; 2936 d->mbcsets_alloc = 1; 2937 MALLOC(d->mbcsets, struct mb_char_classes, d->mbcsets_alloc); 2938 } 2939#endif |
|
1978 1979 d->searchflag = 0; 1980 d->tralloc = 0; 1981 1982 d->musts = 0; 1983} 1984 1985/* Parse and analyze a single string of the given length. */ 1986void | 2940 2941 d->searchflag = 0; 2942 d->tralloc = 0; 2943 2944 d->musts = 0; 2945} 2946 2947/* Parse and analyze a single string of the given length. */ 2948void |
1987dfacomp (char *s, size_t len, struct dfa *d, int searchflag) | 2949dfacomp (char const *s, size_t len, struct dfa *d, int searchflag) |
1988{ 1989 if (case_fold) /* dummy folding in service of dfamust() */ 1990 { 1991 char *lcopy; 1992 int i; 1993 1994 lcopy = malloc(len); 1995 if (!lcopy) --- 29 unchanged lines hidden (view full) --- 2025void 2026dfafree (struct dfa *d) 2027{ 2028 int i; 2029 struct dfamust *dm, *ndm; 2030 2031 free((ptr_t) d->charclasses); 2032 free((ptr_t) d->tokens); | 2950{ 2951 if (case_fold) /* dummy folding in service of dfamust() */ 2952 { 2953 char *lcopy; 2954 int i; 2955 2956 lcopy = malloc(len); 2957 if (!lcopy) --- 29 unchanged lines hidden (view full) --- 2987void 2988dfafree (struct dfa *d) 2989{ 2990 int i; 2991 struct dfamust *dm, *ndm; 2992 2993 free((ptr_t) d->charclasses); 2994 free((ptr_t) d->tokens); |
2995 2996#ifdef MBS_SUPPORT 2997 if (MB_CUR_MAX > 1) 2998 { 2999 free((ptr_t) d->multibyte_prop); 3000 for (i = 0; i < d->nmbcsets; ++i) 3001 { 3002 int j; 3003 struct mb_char_classes *p = &(d->mbcsets[i]); 3004 if (p->chars != NULL) 3005 free(p->chars); 3006 if (p->ch_classes != NULL) 3007 free(p->ch_classes); 3008 if (p->range_sts != NULL) 3009 free(p->range_sts); 3010 if (p->range_ends != NULL) 3011 free(p->range_ends); 3012 3013 for (j = 0; j < p->nequivs; ++j) 3014 free(p->equivs[j]); 3015 if (p->equivs != NULL) 3016 free(p->equivs); 3017 3018 for (j = 0; j < p->ncoll_elems; ++j) 3019 free(p->coll_elems[j]); 3020 if (p->coll_elems != NULL) 3021 free(p->coll_elems); 3022 } 3023 free((ptr_t) d->mbcsets); 3024 } 3025#endif /* MBS_SUPPORT */ 3026 |
|
2033 for (i = 0; i < d->sindex; ++i) 2034 free((ptr_t) d->states[i].elems.elems); 2035 free((ptr_t) d->states); 2036 for (i = 0; i < d->tindex; ++i) 2037 if (d->follows[i].elems) 2038 free((ptr_t) d->follows[i].elems); 2039 free((ptr_t) d->follows); 2040 for (i = 0; i < d->tralloc; ++i) 2041 if (d->trans[i]) 2042 free((ptr_t) d->trans[i]); 2043 else if (d->fails[i]) 2044 free((ptr_t) d->fails[i]); 2045 if (d->realtrans) free((ptr_t) d->realtrans); 2046 if (d->fails) free((ptr_t) d->fails); | 3027 for (i = 0; i < d->sindex; ++i) 3028 free((ptr_t) d->states[i].elems.elems); 3029 free((ptr_t) d->states); 3030 for (i = 0; i < d->tindex; ++i) 3031 if (d->follows[i].elems) 3032 free((ptr_t) d->follows[i].elems); 3033 free((ptr_t) d->follows); 3034 for (i = 0; i < d->tralloc; ++i) 3035 if (d->trans[i]) 3036 free((ptr_t) d->trans[i]); 3037 else if (d->fails[i]) 3038 free((ptr_t) d->fails[i]); 3039 if (d->realtrans) free((ptr_t) d->realtrans); 3040 if (d->fails) free((ptr_t) d->fails); |
2047 if (d->newlines) free((ptr_t) d->newlines); | |
2048 if (d->success) free((ptr_t) d->success); 2049 for (dm = d->musts; dm; dm = ndm) 2050 { 2051 ndm = dm->next; 2052 free(dm->must); 2053 free((ptr_t) dm); 2054 } 2055} --- 24 unchanged lines hidden (view full) --- 2080 operators. 2081 2082 "ZERO" means "a zero-length sequence" below. 2083 2084 Type left right is in 2085 ---- ---- ----- -- -- 2086 char c # c # c # c # c 2087 | 3041 if (d->success) free((ptr_t) d->success); 3042 for (dm = d->musts; dm; dm = ndm) 3043 { 3044 ndm = dm->next; 3045 free(dm->must); 3046 free((ptr_t) dm); 3047 } 3048} --- 24 unchanged lines hidden (view full) --- 3073 operators. 3074 3075 "ZERO" means "a zero-length sequence" below. 3076 3077 Type left right is in 3078 ---- ---- ----- -- -- 3079 char c # c # c # c # c 3080 |
3081 ANYCHAR ZERO ZERO ZERO ZERO 3082 3083 MBCSET ZERO ZERO ZERO ZERO 3084 |
|
2088 CSET ZERO ZERO ZERO ZERO 2089 2090 STAR ZERO ZERO ZERO ZERO 2091 2092 QMARK ZERO ZERO ZERO ZERO 2093 2094 PLUS p->left p->right ZERO p->in 2095 --- 156 unchanged lines hidden (view full) --- 2252 return NULL; 2253 cpp = (char **) malloc(sizeof *cpp); 2254 if (cpp == NULL) 2255 return NULL; 2256 cpp[0] = NULL; 2257 for (lcp = left; *lcp != '\0'; ++lcp) 2258 { 2259 len = 0; | 3085 CSET ZERO ZERO ZERO ZERO 3086 3087 STAR ZERO ZERO ZERO ZERO 3088 3089 QMARK ZERO ZERO ZERO ZERO 3090 3091 PLUS p->left p->right ZERO p->in 3092 --- 156 unchanged lines hidden (view full) --- 3249 return NULL; 3250 cpp = (char **) malloc(sizeof *cpp); 3251 if (cpp == NULL) 3252 return NULL; 3253 cpp[0] = NULL; 3254 for (lcp = left; *lcp != '\0'; ++lcp) 3255 { 3256 len = 0; |
2260 rcp = index(right, *lcp); | 3257 rcp = strchr (right, *lcp); |
2261 while (rcp != NULL) 2262 { 2263 for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i) 2264 continue; 2265 if (i > len) 2266 len = i; | 3258 while (rcp != NULL) 3259 { 3260 for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i) 3261 continue; 3262 if (i > len) 3263 len = i; |
2267 rcp = index(rcp + 1, *lcp); | 3264 rcp = strchr (rcp + 1, *lcp); |
2268 } 2269 if (len == 0) 2270 continue; 2271 if ((cpp = enlist(cpp, lcp, len)) == NULL) 2272 break; 2273 } 2274 return cpp; 2275} --- 249 unchanged lines hidden (view full) --- 2525 /* "cannot happen" */ 2526 goto done; 2527 } 2528 else if (t == '\0') 2529 { 2530 /* not on *my* shift */ 2531 goto done; 2532 } | 3265 } 3266 if (len == 0) 3267 continue; 3268 if ((cpp = enlist(cpp, lcp, len)) == NULL) 3269 break; 3270 } 3271 return cpp; 3272} --- 249 unchanged lines hidden (view full) --- 3522 /* "cannot happen" */ 3523 goto done; 3524 } 3525 else if (t == '\0') 3526 { 3527 /* not on *my* shift */ 3528 goto done; 3529 } |
2533 else if (t >= CSET) | 3530 else if (t >= CSET 3531#ifdef MBS_SUPPORT 3532 || t == ANYCHAR 3533 || t == MBCSET 3534#endif /* MBS_SUPPORT */ 3535 ) |
2534 { 2535 /* easy enough */ 2536 resetmust(mp); 2537 } 2538 else 2539 { 2540 /* plain character */ 2541 resetmust(mp); --- 33 unchanged lines hidden (view full) --- 2575 freelist(mp[i].in); 2576 ifree((char *) mp[i].in); 2577 ifree(mp[i].left); 2578 ifree(mp[i].right); 2579 ifree(mp[i].is); 2580 } 2581 free((char *) mp); 2582} | 3536 { 3537 /* easy enough */ 3538 resetmust(mp); 3539 } 3540 else 3541 { 3542 /* plain character */ 3543 resetmust(mp); --- 33 unchanged lines hidden (view full) --- 3577 freelist(mp[i].in); 3578 ifree((char *) mp[i].in); 3579 ifree(mp[i].left); 3580 ifree(mp[i].right); 3581 ifree(mp[i].is); 3582 } 3583 free((char *) mp); 3584} |
3585/* vim:set shiftwidth=2: */ |
|