Deleted Added
full compact
dfa.c (126435) dfa.c (131557)
1/* dfa.c - deterministic extended regexp routines for GNU
2 Copyright 1988, 1998, 2000 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8

--- 4 unchanged lines hidden (view full) ---

13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
17
18/* Written June, 1988 by Mike Haertel
19 Modified July, 1988 by Arthur David Olson to assist BMG speedups */
20
1/* dfa.c - deterministic extended regexp routines for GNU
2 Copyright 1988, 1998, 2000 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8

--- 4 unchanged lines hidden (view full) ---

13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
17
18/* Written June, 1988 by Mike Haertel
19 Modified July, 1988 by Arthur David Olson to assist BMG speedups */
20
21/* $FreeBSD: head/gnu/usr.bin/grep/dfa.c 126435 2004-03-01 08:37:20Z ache $ */
21/* $FreeBSD: head/gnu/usr.bin/grep/dfa.c 131557 2004-07-04 10:02:03Z tjr $ */
22
23#ifdef HAVE_CONFIG_H
24#include <config.h>
25#endif
26
27#include <assert.h>
28#include <ctype.h>
29#include <stdio.h>
30
31#include <sys/types.h>
32#ifdef STDC_HEADERS
33#include <stdlib.h>
34#else
35extern char *calloc(), *malloc(), *realloc();
36extern void free();
37#endif
38
39#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
40#include <string.h>
22
23#ifdef HAVE_CONFIG_H
24#include <config.h>
25#endif
26
27#include <assert.h>
28#include <ctype.h>
29#include <stdio.h>
30
31#include <sys/types.h>
32#ifdef STDC_HEADERS
33#include <stdlib.h>
34#else
35extern char *calloc(), *malloc(), *realloc();
36extern void free();
37#endif
38
39#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
40#include <string.h>
41#undef index
42#define index strchr
43#else
44#include <strings.h>
45#endif
46
41#else
42#include <strings.h>
43#endif
44
45#if HAVE_SETLOCALE
46# include <locale.h>
47#endif
48
49#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
50/* We can handle multibyte string. */
51# define MBS_SUPPORT
52#endif
53
54#ifdef MBS_SUPPORT
55# include <wchar.h>
56# include <wctype.h>
57#endif
58
47#ifndef DEBUG /* use the same approach as regex.c */
48#undef assert
49#define assert(e)
50#endif /* DEBUG */
51
52#ifndef isgraph
53#define isgraph(C) (isprint(C) && !isspace(C))
54#endif

--- 44 unchanged lines hidden (view full) ---

99# endif
100# else
101# define _(Str) (Str)
102# endif
103#endif
104
105#include "regex.h"
106#include "dfa.h"
59#ifndef DEBUG /* use the same approach as regex.c */
60#undef assert
61#define assert(e)
62#endif /* DEBUG */
63
64#ifndef isgraph
65#define isgraph(C) (isprint(C) && !isspace(C))
66#endif

--- 44 unchanged lines hidden (view full) ---

111# endif
112# else
113# define _(Str) (Str)
114# endif
115#endif
116
117#include "regex.h"
118#include "dfa.h"
119#include "hard-locale.h"
107
108/* HPUX, define those as macros in sys/param.h */
109#ifdef setbit
110# undef setbit
111#endif
112#ifdef clrbit
113# undef clrbit
114#endif
115
116static void dfamust PARAMS ((struct dfa *dfa));
120
121/* HPUX, define those as macros in sys/param.h */
122#ifdef setbit
123# undef setbit
124#endif
125#ifdef clrbit
126# undef clrbit
127#endif
128
129static void dfamust PARAMS ((struct dfa *dfa));
117
118static ptr_t xcalloc PARAMS ((size_t n, size_t s));
119static ptr_t xmalloc PARAMS ((size_t n));
120static ptr_t xrealloc PARAMS ((ptr_t p, size_t n));
121#ifdef DEBUG
122static void prtok PARAMS ((token t));
123#endif
124static int tstbit PARAMS ((int b, charclass c));
125static void setbit PARAMS ((int b, charclass c));
126static void clrbit PARAMS ((int b, charclass c));
127static void copyset PARAMS ((charclass src, charclass dst));
128static void zeroset PARAMS ((charclass s));
129static void notset PARAMS ((charclass s));
130static int equal PARAMS ((charclass s1, charclass s2));
131static int charclass_index PARAMS ((charclass s));
132static int looking_at PARAMS ((const char *s));
133static token lex PARAMS ((void));
134static void addtok PARAMS ((token t));
135static void atom PARAMS ((void));
136static int nsubtoks PARAMS ((int tindex));
137static void copytoks PARAMS ((int tindex, int ntokens));
138static void closure PARAMS ((void));
139static void branch PARAMS ((void));
140static void regexp PARAMS ((int toplevel));
130static void regexp PARAMS ((int toplevel));
141static void copy PARAMS ((position_set *src, position_set *dst));
142static void insert PARAMS ((position p, position_set *s));
143static void merge PARAMS ((position_set *s1, position_set *s2, position_set *m));
144static void delete PARAMS ((position p, position_set *s));
145static int state_index PARAMS ((struct dfa *d, position_set *s,
146 int newline, int letter));
147static void build_state PARAMS ((int s, struct dfa *d));
148static void build_state_zero PARAMS ((struct dfa *d));
149static char *icatalloc PARAMS ((char *old, char *new));
150static char *icpyalloc PARAMS ((char *string));
151static char *istrstr PARAMS ((char *lookin, char *lookfor));
152static void ifree PARAMS ((char *cp));
153static void freelist PARAMS ((char **cpp));
154static char **enlist PARAMS ((char **cpp, char *new, size_t len));
155static char **comsubs PARAMS ((char *left, char *right));
156static char **addlists PARAMS ((char **old, char **new));
157static char **inboth PARAMS ((char **left, char **right));
158
159static ptr_t
160xcalloc (size_t n, size_t s)
161{
162 ptr_t r = calloc(n, s);
163
164 if (!r)
165 dfaerror(_("Memory exhausted"));

--- 25 unchanged lines hidden (view full) ---

191#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t)))
192#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
193#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
194
195/* Reallocate an array of type t if nalloc is too small for index. */
196#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \
197 if ((index) >= (nalloc)) \
198 { \
131
132static ptr_t
133xcalloc (size_t n, size_t s)
134{
135 ptr_t r = calloc(n, s);
136
137 if (!r)
138 dfaerror(_("Memory exhausted"));

--- 25 unchanged lines hidden (view full) ---

164#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t)))
165#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
166#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
167
168/* Reallocate an array of type t if nalloc is too small for index. */
169#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \
170 if ((index) >= (nalloc)) \
171 { \
199 while ((index) >= (nalloc)) \
172 do \
200 (nalloc) *= 2; \
173 (nalloc) *= 2; \
174 while ((index) >= (nalloc)); \
201 REALLOC(p, t, nalloc); \
202 }
203
204#ifdef DEBUG
205
206static void
207prtok (token t)
208{
175 REALLOC(p, t, nalloc); \
176 }
177
178#ifdef DEBUG
179
180static void
181prtok (token t)
182{
209 char *s;
183 char const *s;
210
211 if (t < 0)
212 fprintf(stderr, "END");
213 else if (t < NOTCHAR)
214 fprintf(stderr, "%c", t);
215 else
216 {
217 switch (t)

--- 9 unchanged lines hidden (view full) ---

227 case QMARK: s = "QMARK"; break;
228 case STAR: s = "STAR"; break;
229 case PLUS: s = "PLUS"; break;
230 case CAT: s = "CAT"; break;
231 case OR: s = "OR"; break;
232 case ORTOP: s = "ORTOP"; break;
233 case LPAREN: s = "LPAREN"; break;
234 case RPAREN: s = "RPAREN"; break;
184
185 if (t < 0)
186 fprintf(stderr, "END");
187 else if (t < NOTCHAR)
188 fprintf(stderr, "%c", t);
189 else
190 {
191 switch (t)

--- 9 unchanged lines hidden (view full) ---

201 case QMARK: s = "QMARK"; break;
202 case STAR: s = "STAR"; break;
203 case PLUS: s = "PLUS"; break;
204 case CAT: s = "CAT"; break;
205 case OR: s = "OR"; break;
206 case ORTOP: s = "ORTOP"; break;
207 case LPAREN: s = "LPAREN"; break;
208 case RPAREN: s = "RPAREN"; break;
209 case CRANGE: s = "CRANGE"; break;
210#ifdef MBS_SUPPORT
211 case ANYCHAR: s = "ANYCHAR"; break;
212 case MBCSET: s = "MBCSET"; break;
213#endif /* MBS_SUPPORT */
235 default: s = "CSET"; break;
236 }
237 fprintf(stderr, "%s", s);
238 }
239}
240#endif /* DEBUG */
241
242/* Stuff pertaining to charclasses. */
243
244static int
214 default: s = "CSET"; break;
215 }
216 fprintf(stderr, "%s", s);
217 }
218}
219#endif /* DEBUG */
220
221/* Stuff pertaining to charclasses. */
222
223static int
245tstbit (int b, charclass c)
224tstbit (unsigned b, charclass c)
246{
247 return c[b / INTBITS] & 1 << b % INTBITS;
248}
249
250static void
225{
226 return c[b / INTBITS] & 1 << b % INTBITS;
227}
228
229static void
251setbit (int b, charclass c)
230setbit (unsigned b, charclass c)
252{
253 c[b / INTBITS] |= 1 << b % INTBITS;
254}
255
256static void
231{
232 c[b / INTBITS] |= 1 << b % INTBITS;
233}
234
235static void
257clrbit (int b, charclass c)
236clrbit (unsigned b, charclass c)
258{
259 c[b / INTBITS] &= ~(1 << b % INTBITS);
260}
261
262static void
263copyset (charclass src, charclass dst)
264{
237{
238 c[b / INTBITS] &= ~(1 << b % INTBITS);
239}
240
241static void
242copyset (charclass src, charclass dst)
243{
265 int i;
266
267 for (i = 0; i < CHARCLASS_INTS; ++i)
268 dst[i] = src[i];
244 memcpy (dst, src, sizeof (charclass));
269}
270
271static void
272zeroset (charclass s)
273{
245}
246
247static void
248zeroset (charclass s)
249{
274 int i;
275
276 for (i = 0; i < CHARCLASS_INTS; ++i)
277 s[i] = 0;
250 memset (s, 0, sizeof (charclass));
278}
279
280static void
281notset (charclass s)
282{
283 int i;
284
285 for (i = 0; i < CHARCLASS_INTS; ++i)
286 s[i] = ~s[i];
287}
288
289static int
290equal (charclass s1, charclass s2)
291{
251}
252
253static void
254notset (charclass s)
255{
256 int i;
257
258 for (i = 0; i < CHARCLASS_INTS; ++i)
259 s[i] = ~s[i];
260}
261
262static int
263equal (charclass s1, charclass s2)
264{
292 int i;
293
294 for (i = 0; i < CHARCLASS_INTS; ++i)
295 if (s1[i] != s2[i])
296 return 0;
297 return 1;
265 return memcmp (s1, s2, sizeof (charclass)) == 0;
298}
299
300/* A pointer to the current dfa is kept here during parsing. */
301static struct dfa *dfa;
302
303/* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
304static int
305charclass_index (charclass s)

--- 15 unchanged lines hidden (view full) ---

321/* Flag for case-folding letters into sets. */
322static int case_fold;
323
324/* End-of-line byte in data. */
325static unsigned char eolbyte;
326
327/* Entry point to set syntax options. */
328void
266}
267
268/* A pointer to the current dfa is kept here during parsing. */
269static struct dfa *dfa;
270
271/* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
272static int
273charclass_index (charclass s)

--- 15 unchanged lines hidden (view full) ---

289/* Flag for case-folding letters into sets. */
290static int case_fold;
291
292/* End-of-line byte in data. */
293static unsigned char eolbyte;
294
295/* Entry point to set syntax options. */
296void
329dfasyntax (reg_syntax_t bits, int fold, int eol)
297dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
330{
331 syntax_bits_set = 1;
332 syntax_bits = bits;
333 case_fold = fold;
334 eolbyte = eol;
335}
336
298{
299 syntax_bits_set = 1;
300 syntax_bits = bits;
301 case_fold = fold;
302 eolbyte = eol;
303}
304
305/* Like setbit, but if case is folded, set both cases of a letter. */
306static void
307setbit_case_fold (unsigned b, charclass c)
308{
309 setbit (b, c);
310 if (case_fold)
311 {
312 if (ISUPPER (b))
313 setbit (tolower (b), c);
314 else if (ISLOWER (b))
315 setbit (toupper (b), c);
316 }
317}
318
337/* Lexical analyzer. All the dross that deals with the obnoxious
338 GNU Regex syntax bits is located here. The poor, suffering
339 reader is referred to the GNU Regex documentation for the
340 meaning of the @#%!@#%^!@ syntax bits. */
341
319/* Lexical analyzer. All the dross that deals with the obnoxious
320 GNU Regex syntax bits is located here. The poor, suffering
321 reader is referred to the GNU Regex documentation for the
322 meaning of the @#%!@#%^!@ syntax bits. */
323
342static char *lexstart; /* Pointer to beginning of input string. */
343static char *lexptr; /* Pointer to next input character. */
324static char const *lexstart; /* Pointer to beginning of input string. */
325static char const *lexptr; /* Pointer to next input character. */
344static int lexleft; /* Number of characters remaining. */
345static token lasttok; /* Previous token returned; initially END. */
346static int laststart; /* True if we're separated from beginning or (, |
347 only by zero-width characters. */
348static int parens; /* Count of outstanding left parens. */
349static int minrep, maxrep; /* Repeat counts for {m,n}. */
326static int lexleft; /* Number of characters remaining. */
327static token lasttok; /* Previous token returned; initially END. */
328static int laststart; /* True if we're separated from beginning or (, |
329 only by zero-width characters. */
330static int parens; /* Count of outstanding left parens. */
331static int minrep, maxrep; /* Repeat counts for {m,n}. */
332static int hard_LC_COLLATE; /* Nonzero if LC_COLLATE is hard. */
350
333
334#ifdef MBS_SUPPORT
335/* These variables are used only if (MB_CUR_MAX > 1). */
336static mbstate_t mbs; /* Mbstate for mbrlen(). */
337static int cur_mb_len; /* Byte length of the current scanning
338 multibyte character. */
339static int cur_mb_index; /* Byte index of the current scanning multibyte
340 character.
341
342 singlebyte character : cur_mb_index = 0
343 multibyte character
344 1st byte : cur_mb_index = 1
345 2nd byte : cur_mb_index = 2
346 ...
347 nth byte : cur_mb_index = n */
348static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec().
349 Each element store the amount of remain
350 byte of corresponding multibyte character
351 in the input string. A element's value
352 is 0 if corresponding character is a
353 singlebyte chracter.
354 e.g. input : 'a', <mb(0)>, <mb(1)>, <mb(2)>
355 mblen_buf : 0, 3, 2, 1
356 */
357static wchar_t *inputwcs; /* Wide character representation of input
358 string in dfaexec().
359 The length of this array is same as
360 the length of input string(char array).
361 inputstring[i] is a single-byte char,
362 or 1st byte of a multibyte char.
363 And inputwcs[i] is the codepoint. */
364static unsigned char const *buf_begin;/* refference to begin in dfaexec(). */
365static unsigned char const *buf_end; /* refference to end in dfaexec(). */
366#endif /* MBS_SUPPORT */
367
368#ifdef MBS_SUPPORT
369/* This function update cur_mb_len, and cur_mb_index.
370 p points current lexptr, len is the remaining buffer length. */
371static void
372update_mb_len_index (unsigned char const *p, int len)
373{
374 /* If last character is a part of a multibyte character,
375 we update cur_mb_index. */
376 if (cur_mb_index)
377 cur_mb_index = (cur_mb_index >= cur_mb_len)? 0
378 : cur_mb_index + 1;
379
380 /* If last character is a single byte character, or the
381 last portion of a multibyte character, we check whether
382 next character is a multibyte character or not. */
383 if (! cur_mb_index)
384 {
385 cur_mb_len = mbrlen(p, len, &mbs);
386 if (cur_mb_len > 1)
387 /* It is a multibyte character.
388 cur_mb_len was already set by mbrlen(). */
389 cur_mb_index = 1;
390 else if (cur_mb_len < 1)
391 /* Invalid sequence. We treat it as a singlebyte character.
392 cur_mb_index is aleady 0. */
393 cur_mb_len = 1;
394 /* Otherwise, cur_mb_len == 1, it is a singlebyte character.
395 cur_mb_index is aleady 0. */
396 }
397}
398#endif /* MBS_SUPPORT */
399
400#ifdef MBS_SUPPORT
351/* Note that characters become unsigned here. */
401/* Note that characters become unsigned here. */
352#define FETCH(c, eoferr) \
402# define FETCH(c, eoferr) \
403 { \
404 if (! lexleft) \
405 { \
406 if (eoferr != 0) \
407 dfaerror (eoferr); \
408 else \
409 return lasttok = END; \
410 } \
411 if (MB_CUR_MAX > 1) \
412 update_mb_len_index(lexptr, lexleft); \
413 (c) = (unsigned char) *lexptr++; \
414 --lexleft; \
415 }
416
417/* This function fetch a wide character, and update cur_mb_len,
418 used only if the current locale is a multibyte environment. */
419static wchar_t
420fetch_wc (char const *eoferr)
421{
422 wchar_t wc;
423 if (! lexleft)
424 {
425 if (eoferr != 0)
426 dfaerror (eoferr);
427 else
428 return -1;
429 }
430
431 cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs);
432 if (cur_mb_len <= 0)
433 {
434 cur_mb_len = 1;
435 wc = *lexptr;
436 }
437 lexptr += cur_mb_len;
438 lexleft -= cur_mb_len;
439 return wc;
440}
441#else
442/* Note that characters become unsigned here. */
443# define FETCH(c, eoferr) \
353 { \
354 if (! lexleft) \
355 { \
356 if (eoferr != 0) \
357 dfaerror (eoferr); \
358 else \
359 return lasttok = END; \
360 } \
361 (c) = (unsigned char) *lexptr++; \
362 --lexleft; \
363 }
444 { \
445 if (! lexleft) \
446 { \
447 if (eoferr != 0) \
448 dfaerror (eoferr); \
449 else \
450 return lasttok = END; \
451 } \
452 (c) = (unsigned char) *lexptr++; \
453 --lexleft; \
454 }
455#endif /* MBS_SUPPORT */
364
456
457#ifdef MBS_SUPPORT
458/* Multibyte character handling sub-routin for lex.
459 This function parse a bracket expression and build a struct
460 mb_char_classes. */
461static void
462parse_bracket_exp_mb ()
463{
464 wchar_t wc, wc1, wc2;
465
466 /* Work area to build a mb_char_classes. */
467 struct mb_char_classes *work_mbc;
468 int chars_al, range_sts_al, range_ends_al, ch_classes_al,
469 equivs_al, coll_elems_al;
470
471 REALLOC_IF_NECESSARY(dfa->mbcsets, struct mb_char_classes,
472 dfa->mbcsets_alloc, dfa->nmbcsets + 1);
473 /* dfa->multibyte_prop[] hold the index of dfa->mbcsets.
474 We will update dfa->multibyte_prop in addtok(), because we can't
475 decide the index in dfa->tokens[]. */
476
477 /* Initialize work are */
478 work_mbc = &(dfa->mbcsets[dfa->nmbcsets++]);
479
480 chars_al = 1;
481 range_sts_al = range_ends_al = 0;
482 ch_classes_al = equivs_al = coll_elems_al = 0;
483 MALLOC(work_mbc->chars, wchar_t, chars_al);
484
485 work_mbc->nchars = work_mbc->nranges = work_mbc->nch_classes = 0;
486 work_mbc->nequivs = work_mbc->ncoll_elems = 0;
487 work_mbc->chars = work_mbc->ch_classes = NULL;
488 work_mbc->range_sts = work_mbc->range_ends = NULL;
489 work_mbc->equivs = work_mbc->coll_elems = NULL;
490
491 wc = fetch_wc(_("Unbalanced ["));
492 if (wc == L'^')
493 {
494 wc = fetch_wc(_("Unbalanced ["));
495 work_mbc->invert = 1;
496 }
497 else
498 work_mbc->invert = 0;
499 do
500 {
501 wc1 = -1; /* mark wc1 is not initialized". */
502
503 /* Note that if we're looking at some other [:...:] construct,
504 we just treat it as a bunch of ordinary characters. We can do
505 this because we assume regex has checked for syntax errors before
506 dfa is ever called. */
507 if (wc == L'[' && (syntax_bits & RE_CHAR_CLASSES))
508 {
509#define BRACKET_BUFFER_SIZE 128
510 char str[BRACKET_BUFFER_SIZE];
511 wc1 = wc;
512 wc = fetch_wc(_("Unbalanced ["));
513
514 /* If pattern contains `[[:', `[[.', or `[[='. */
515 if (cur_mb_len == 1 && (wc == L':' || wc == L'.' || wc == L'='))
516 {
517 unsigned char c;
518 unsigned char delim = (unsigned char)wc;
519 int len = 0;
520 for (;;)
521 {
522 if (! lexleft)
523 dfaerror (_("Unbalanced ["));
524 c = (unsigned char) *lexptr++;
525 --lexleft;
526
527 if ((c == delim && *lexptr == ']') || lexleft == 0)
528 break;
529 if (len < BRACKET_BUFFER_SIZE)
530 str[len++] = c;
531 else
532 /* This is in any case an invalid class name. */
533 str[0] = '\0';
534 }
535 str[len] = '\0';
536
537 if (lexleft == 0)
538 {
539 REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
540 work_mbc->nchars + 2);
541 work_mbc->chars[work_mbc->nchars++] = L'[';
542 work_mbc->chars[work_mbc->nchars++] = delim;
543 break;
544 }
545
546 if (--lexleft, *lexptr++ != ']')
547 dfaerror (_("Unbalanced ["));
548 if (delim == ':')
549 /* build character class. */
550 {
551 wctype_t wt;
552 /* Query the character class as wctype_t. */
553 wt = wctype (str);
554
555 if (ch_classes_al == 0)
556 MALLOC(work_mbc->ch_classes, wchar_t, ++ch_classes_al);
557 REALLOC_IF_NECESSARY(work_mbc->ch_classes, wctype_t,
558 ch_classes_al,
559 work_mbc->nch_classes + 1);
560 work_mbc->ch_classes[work_mbc->nch_classes++] = wt;
561
562 }
563 else if (delim == '=' || delim == '.')
564 {
565 char *elem;
566 MALLOC(elem, char, len + 1);
567 strncpy(elem, str, len + 1);
568
569 if (delim == '=')
570 /* build equivalent class. */
571 {
572 if (equivs_al == 0)
573 MALLOC(work_mbc->equivs, char*, ++equivs_al);
574 REALLOC_IF_NECESSARY(work_mbc->equivs, char*,
575 equivs_al,
576 work_mbc->nequivs + 1);
577 work_mbc->equivs[work_mbc->nequivs++] = elem;
578 }
579
580 if (delim == '.')
581 /* build collating element. */
582 {
583 if (coll_elems_al == 0)
584 MALLOC(work_mbc->coll_elems, char*, ++coll_elems_al);
585 REALLOC_IF_NECESSARY(work_mbc->coll_elems, char*,
586 coll_elems_al,
587 work_mbc->ncoll_elems + 1);
588 work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
589 }
590 }
591 wc = -1;
592 }
593 else
594 /* We treat '[' as a normal character here. */
595 {
596 wc2 = wc1; wc1 = wc; wc = wc2; /* swap */
597 }
598 }
599 else
600 {
601 if (wc == L'\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
602 wc = fetch_wc(("Unbalanced ["));
603 }
604
605 if (wc1 == -1)
606 wc1 = fetch_wc(_("Unbalanced ["));
607
608 if (wc1 == L'-')
609 /* build range characters. */
610 {
611 wc2 = fetch_wc(_("Unbalanced ["));
612 if (wc2 == L']')
613 {
614 /* In the case [x-], the - is an ordinary hyphen,
615 which is left in c1, the lookahead character. */
616 lexptr -= cur_mb_len;
617 lexleft += cur_mb_len;
618 wc2 = wc;
619 }
620 else
621 {
622 if (wc2 == L'\\'
623 && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
624 wc2 = fetch_wc(_("Unbalanced ["));
625 wc1 = fetch_wc(_("Unbalanced ["));
626 }
627
628 if (range_sts_al == 0)
629 {
630 MALLOC(work_mbc->range_sts, wchar_t, ++range_sts_al);
631 MALLOC(work_mbc->range_ends, wchar_t, ++range_ends_al);
632 }
633 REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
634 range_sts_al, work_mbc->nranges + 1);
635 work_mbc->range_sts[work_mbc->nranges] = wc;
636 REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
637 range_ends_al, work_mbc->nranges + 1);
638 work_mbc->range_ends[work_mbc->nranges++] = wc2;
639 }
640 else if (wc != -1)
641 /* build normal characters. */
642 {
643 REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
644 work_mbc->nchars + 1);
645 work_mbc->chars[work_mbc->nchars++] = wc;
646 }
647 }
648 while ((wc = wc1) != L']');
649}
650#endif /* MBS_SUPPORT */
651
365#ifdef __STDC__
366#define FUNC(F, P) static int F(int c) { return P(c); }
367#else
368#define FUNC(F, P) static int F(c) int c; { return P(c); }
369#endif
370
371FUNC(is_alpha, ISALPHA)
372FUNC(is_upper, ISUPPER)

--- 14 unchanged lines hidden (view full) ---

387}
388
389/* The following list maps the names of the Posix named character classes
390 to predicate functions that determine whether a given character is in
391 the class. The leading [ has already been eaten by the lexical analyzer. */
392static struct {
393 const char *name;
394 int (*pred) PARAMS ((int));
652#ifdef __STDC__
653#define FUNC(F, P) static int F(int c) { return P(c); }
654#else
655#define FUNC(F, P) static int F(c) int c; { return P(c); }
656#endif
657
658FUNC(is_alpha, ISALPHA)
659FUNC(is_upper, ISUPPER)

--- 14 unchanged lines hidden (view full) ---

674}
675
676/* The following list maps the names of the Posix named character classes
677 to predicate functions that determine whether a given character is in
678 the class. The leading [ has already been eaten by the lexical analyzer. */
679static struct {
680 const char *name;
681 int (*pred) PARAMS ((int));
395} prednames[] = {
682} const prednames[] = {
396 { ":alpha:]", is_alpha },
397 { ":upper:]", is_upper },
398 { ":lower:]", is_lower },
399 { ":digit:]", is_digit },
400 { ":xdigit:]", is_xdigit },
401 { ":space:]", is_space },
402 { ":punct:]", is_punct },
403 { ":alnum:]", is_alnum },

--- 16 unchanged lines hidden (view full) ---

420 if (lexleft < len)
421 return 0;
422 return strncmp(s, lexptr, len) == 0;
423}
424
425static token
426lex (void)
427{
683 { ":alpha:]", is_alpha },
684 { ":upper:]", is_upper },
685 { ":lower:]", is_lower },
686 { ":digit:]", is_digit },
687 { ":xdigit:]", is_xdigit },
688 { ":space:]", is_space },
689 { ":punct:]", is_punct },
690 { ":alnum:]", is_alnum },

--- 16 unchanged lines hidden (view full) ---

707 if (lexleft < len)
708 return 0;
709 return strncmp(s, lexptr, len) == 0;
710}
711
712static token
713lex (void)
714{
428 token c, c1, c2;
715 unsigned c, c1, c2;
429 int backslash = 0, invert;
430 charclass ccl;
431 int i;
716 int backslash = 0, invert;
717 charclass ccl;
718 int i;
432 char lo[2];
433 char hi[2];
434
435 /* Basic plan: We fetch a character. If it's a backslash,
436 we set the backslash flag and go through the loop again.
437 On the plus side, this avoids having a duplicate of the
438 main switch inside the backslash case. On the minus side,
439 it means that just about every case begins with
440 "if (backslash) ...". */
441 for (i = 0; i < 2; ++i)
442 {
443 FETCH(c, 0);
719
720 /* Basic plan: We fetch a character. If it's a backslash,
721 we set the backslash flag and go through the loop again.
722 On the plus side, this avoids having a duplicate of the
723 main switch inside the backslash case. On the minus side,
724 it means that just about every case begins with
725 "if (backslash) ...". */
726 for (i = 0; i < 2; ++i)
727 {
728 FETCH(c, 0);
729#ifdef MBS_SUPPORT
730 if (MB_CUR_MAX > 1 && cur_mb_index)
731 /* If this is a part of a multi-byte character, we must treat
732 this byte data as a normal character.
733 e.g. In case of SJIS encoding, some character contains '\',
734 but they must not be backslash. */
735 goto normal_char;
736#endif /* MBS_SUPPORT */
444 switch (c)
445 {
446 case '\\':
447 if (backslash)
448 goto normal_char;
449 if (lexleft == 0)
450 dfaerror(_("Unfinished \\ escape"));
451 backslash = 1;

--- 204 unchanged lines hidden (view full) ---

656 goto normal_char;
657 --parens;
658 laststart = 0;
659 return lasttok = RPAREN;
660
661 case '.':
662 if (backslash)
663 goto normal_char;
737 switch (c)
738 {
739 case '\\':
740 if (backslash)
741 goto normal_char;
742 if (lexleft == 0)
743 dfaerror(_("Unfinished \\ escape"));
744 backslash = 1;

--- 204 unchanged lines hidden (view full) ---

949 goto normal_char;
950 --parens;
951 laststart = 0;
952 return lasttok = RPAREN;
953
954 case '.':
955 if (backslash)
956 goto normal_char;
957#ifdef MBS_SUPPORT
958 if (MB_CUR_MAX > 1)
959 {
960 /* In multibyte environment period must match with a single
961 character not a byte. So we use ANYCHAR. */
962 laststart = 0;
963 return lasttok = ANYCHAR;
964 }
965#endif /* MBS_SUPPORT */
664 zeroset(ccl);
665 notset(ccl);
666 if (!(syntax_bits & RE_DOT_NEWLINE))
667 clrbit(eolbyte, ccl);
668 if (syntax_bits & RE_DOT_NOT_NULL)
669 clrbit('\0', ccl);
670 laststart = 0;
671 return lasttok = CSET + charclass_index(ccl);

--- 9 unchanged lines hidden (view full) ---

681 if (c == 'W')
682 notset(ccl);
683 laststart = 0;
684 return lasttok = CSET + charclass_index(ccl);
685
686 case '[':
687 if (backslash)
688 goto normal_char;
966 zeroset(ccl);
967 notset(ccl);
968 if (!(syntax_bits & RE_DOT_NEWLINE))
969 clrbit(eolbyte, ccl);
970 if (syntax_bits & RE_DOT_NOT_NULL)
971 clrbit('\0', ccl);
972 laststart = 0;
973 return lasttok = CSET + charclass_index(ccl);

--- 9 unchanged lines hidden (view full) ---

983 if (c == 'W')
984 notset(ccl);
985 laststart = 0;
986 return lasttok = CSET + charclass_index(ccl);
987
988 case '[':
989 if (backslash)
990 goto normal_char;
991 laststart = 0;
992#ifdef MBS_SUPPORT
993 if (MB_CUR_MAX > 1)
994 {
995 /* In multibyte environment a bracket expression may contain
996 multibyte characters, which must be treated as characters
997 (not bytes). So we parse it by parse_bracket_exp_mb(). */
998 parse_bracket_exp_mb();
999 return lasttok = MBCSET;
1000 }
1001#endif
689 zeroset(ccl);
690 FETCH(c, _("Unbalanced ["));
691 if (c == '^')
692 {
693 FETCH(c, _("Unbalanced ["));
694 invert = 1;
695 }
696 else

--- 5 unchanged lines hidden (view full) ---

702 construct, we just treat it as a bunch of ordinary
703 characters. We can do this because we assume
704 regex has checked for syntax errors before
705 dfa is ever called. */
706 if (c == '[' && (syntax_bits & RE_CHAR_CLASSES))
707 for (c1 = 0; prednames[c1].name; ++c1)
708 if (looking_at(prednames[c1].name))
709 {
1002 zeroset(ccl);
1003 FETCH(c, _("Unbalanced ["));
1004 if (c == '^')
1005 {
1006 FETCH(c, _("Unbalanced ["));
1007 invert = 1;
1008 }
1009 else

--- 5 unchanged lines hidden (view full) ---

1015 construct, we just treat it as a bunch of ordinary
1016 characters. We can do this because we assume
1017 regex has checked for syntax errors before
1018 dfa is ever called. */
1019 if (c == '[' && (syntax_bits & RE_CHAR_CLASSES))
1020 for (c1 = 0; prednames[c1].name; ++c1)
1021 if (looking_at(prednames[c1].name))
1022 {
710 int (*pred)() = prednames[c1].pred;
711 if (case_fold
712 && (pred == is_upper || pred == is_lower))
713 pred = is_alpha;
1023 int (*pred) PARAMS ((int)) = prednames[c1].pred;
714
715 for (c2 = 0; c2 < NOTCHAR; ++c2)
716 if ((*pred)(c2))
1024
1025 for (c2 = 0; c2 < NOTCHAR; ++c2)
1026 if ((*pred)(c2))
717 setbit(c2, ccl);
1027 setbit_case_fold (c2, ccl);
718 lexptr += strlen(prednames[c1].name);
719 lexleft -= strlen(prednames[c1].name);
720 FETCH(c1, _("Unbalanced ["));
721 goto skip;
722 }
723 if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
724 FETCH(c, _("Unbalanced ["));
725 FETCH(c1, _("Unbalanced ["));
726 if (c1 == '-')
727 {
728 FETCH(c2, _("Unbalanced ["));
729 if (c2 == ']')
730 {
731 /* In the case [x-], the - is an ordinary hyphen,
732 which is left in c1, the lookahead character. */
733 --lexptr;
734 ++lexleft;
1028 lexptr += strlen(prednames[c1].name);
1029 lexleft -= strlen(prednames[c1].name);
1030 FETCH(c1, _("Unbalanced ["));
1031 goto skip;
1032 }
1033 if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
1034 FETCH(c, _("Unbalanced ["));
1035 FETCH(c1, _("Unbalanced ["));
1036 if (c1 == '-')
1037 {
1038 FETCH(c2, _("Unbalanced ["));
1039 if (c2 == ']')
1040 {
1041 /* In the case [x-], the - is an ordinary hyphen,
1042 which is left in c1, the lookahead character. */
1043 --lexptr;
1044 ++lexleft;
735 c2 = c;
736 }
737 else
738 {
739 if (c2 == '\\'
740 && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
741 FETCH(c2, _("Unbalanced ["));
742 FETCH(c1, _("Unbalanced ["));
1045 }
1046 else
1047 {
1048 if (c2 == '\\'
1049 && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
1050 FETCH(c2, _("Unbalanced ["));
1051 FETCH(c1, _("Unbalanced ["));
743 }
744 }
745 else
746 c2 = c;
747
748 lo[0] = c; lo[1] = '\0';
749 hi[0] = c2; hi[1] = '\0';
750 for (c = 0; c < NOTCHAR; c++)
751 {
752 char ch[2];
753 ch[0] = c; ch[1] = '\0';
754 if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0)
755 {
756 setbit (c, ccl);
757 if (case_fold)
758 {
759 if (ISUPPER (c))
760 setbit (tolower (c), ccl);
761 else if (ISLOWER (c))
762 setbit (toupper (c), ccl);
1052 if (!hard_LC_COLLATE) {
1053 for (; c <= c2; c++)
1054 setbit_case_fold (c, ccl);
1055 } else {
1056 /* POSIX locales are painful - leave the decision to libc */
1057 char expr[6] = { '[', c, '-', c2, ']', '\0' };
1058 regex_t re;
1059 if (regcomp (&re, expr, case_fold ? REG_ICASE : 0) == REG_NOERROR) {
1060 for (c = 0; c < NOTCHAR; ++c) {
1061 char buf[2] = { c, '\0' };
1062 regmatch_t mat;
1063 if (regexec (&re, buf, 1, &mat, 0) == REG_NOERROR
1064 && mat.rm_so == 0 && mat.rm_eo == 1)
1065 setbit_case_fold (c, ccl);
1066 }
1067 regfree (&re);
763 }
1068 }
1069 }
1070 continue;
764 }
765 }
766
1071 }
1072 }
1073
1074 setbit_case_fold (c, ccl);
1075
767 skip:
768 ;
769 }
770 while ((c = c1) != ']');
771 if (invert)
772 {
773 notset(ccl);
774 if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
775 clrbit(eolbyte, ccl);
776 }
1076 skip:
1077 ;
1078 }
1079 while ((c = c1) != ']');
1080 if (invert)
1081 {
1082 notset(ccl);
1083 if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
1084 clrbit(eolbyte, ccl);
1085 }
777 laststart = 0;
778 return lasttok = CSET + charclass_index(ccl);
779
780 default:
781 normal_char:
782 laststart = 0;
783 if (case_fold && ISALPHA(c))
784 {
785 zeroset(ccl);
1086 return lasttok = CSET + charclass_index(ccl);
1087
1088 default:
1089 normal_char:
1090 laststart = 0;
1091 if (case_fold && ISALPHA(c))
1092 {
1093 zeroset(ccl);
786 setbit(c, ccl);
787 if (isupper(c))
788 setbit(tolower(c), ccl);
789 else
790 setbit(toupper(c), ccl);
1094 setbit_case_fold (c, ccl);
791 return lasttok = CSET + charclass_index(ccl);
792 }
793 return c;
794 }
795 }
796
797 /* The above loop should consume at most a backslash
798 and some other character. */

--- 10 unchanged lines hidden (view full) ---

809 required of the real stack later on in
810 dfaanalyze(). */
811
812/* Add the given token to the parse tree, maintaining the depth count and
813 updating the maximum depth if necessary. */
814static void
815addtok (token t)
816{
1095 return lasttok = CSET + charclass_index(ccl);
1096 }
1097 return c;
1098 }
1099 }
1100
1101 /* The above loop should consume at most a backslash
1102 and some other character. */

--- 10 unchanged lines hidden (view full) ---

1113 required of the real stack later on in
1114 dfaanalyze(). */
1115
1116/* Add the given token to the parse tree, maintaining the depth count and
1117 updating the maximum depth if necessary. */
1118static void
1119addtok (token t)
1120{
1121#ifdef MBS_SUPPORT
1122 if (MB_CUR_MAX > 1)
1123 {
1124 REALLOC_IF_NECESSARY(dfa->multibyte_prop, int, dfa->nmultibyte_prop,
1125 dfa->tindex);
1126 /* Set dfa->multibyte_prop. See struct dfa in dfa.h. */
1127 if (t == MBCSET)
1128 dfa->multibyte_prop[dfa->tindex] = ((dfa->nmbcsets - 1) << 2) + 3;
1129 else if (t < NOTCHAR)
1130 dfa->multibyte_prop[dfa->tindex]
1131 = (cur_mb_len == 1)? 3 /* single-byte char */
1132 : (((cur_mb_index == 1)? 1 : 0) /* 1st-byte of multibyte char */
1133 + ((cur_mb_index == cur_mb_len)? 2 : 0)); /* last-byte */
1134 else
1135 /* It may be unnecesssary, but it is safer to treat other
1136 symbols as singlebyte characters. */
1137 dfa->multibyte_prop[dfa->tindex] = 3;
1138 }
1139#endif
1140
817 REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
818 dfa->tokens[dfa->tindex++] = t;
819
820 switch (t)
821 {
822 case QMARK:
823 case STAR:
824 case PLUS:

--- 24 unchanged lines hidden (view full) ---

849 branch:
850 branch closure
851 closure
852
853 closure:
854 closure QMARK
855 closure STAR
856 closure PLUS
1141 REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
1142 dfa->tokens[dfa->tindex++] = t;
1143
1144 switch (t)
1145 {
1146 case QMARK:
1147 case STAR:
1148 case PLUS:

--- 24 unchanged lines hidden (view full) ---

1173 branch:
1174 branch closure
1175 closure
1176
1177 closure:
1178 closure QMARK
1179 closure STAR
1180 closure PLUS
1181 closure REPMN
857 atom
858
859 atom:
860 <normal character>
1182 atom
1183
1184 atom:
1185 <normal character>
1186 <multibyte character>
1187 ANYCHAR
1188 MBCSET
861 CSET
862 BACKREF
863 BEGLINE
864 ENDLINE
865 BEGWORD
866 ENDWORD
867 LIMWORD
868 NOTLIMWORD
1189 CSET
1190 BACKREF
1191 BEGLINE
1192 ENDLINE
1193 BEGWORD
1194 ENDWORD
1195 LIMWORD
1196 NOTLIMWORD
1197 CRANGE
1198 LPAREN regexp RPAREN
869 <empty>
870
871 The parser builds a parse tree in postfix form in an array of tokens. */
872
873static void
874atom (void)
875{
876 if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
877 || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
1199 <empty>
1200
1201 The parser builds a parse tree in postfix form in an array of tokens. */
1202
1203static void
1204atom (void)
1205{
1206 if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
1207 || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
1208#ifdef MBS_SUPPORT
1209 || tok == ANYCHAR || tok == MBCSET /* MB_CUR_MAX > 1 */
1210#endif /* MBS_SUPPORT */
878 || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
879 {
880 addtok(tok);
881 tok = lex();
1211 || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
1212 {
1213 addtok(tok);
1214 tok = lex();
1215#ifdef MBS_SUPPORT
1216 /* We treat a multibyte character as a single atom, so that DFA
1217 can treat a multibyte character as a single expression.
1218
1219 e.g. We construct following tree from "<mb1><mb2>".
1220 <mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT>
1221 <mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT>
1222 */
1223 if (MB_CUR_MAX > 1)
1224 {
1225 while (cur_mb_index > 1 && tok >= 0 && tok < NOTCHAR)
1226 {
1227 addtok(tok);
1228 addtok(CAT);
1229 tok = lex();
1230 }
1231 }
1232#endif /* MBS_SUPPORT */
882 }
1233 }
1234 else if (tok == CRANGE)
1235 {
1236 /* A character range like "[a-z]" in a locale other than "C" or
1237 "POSIX". This range might any sequence of one or more
1238 characters. Unfortunately the POSIX locale primitives give
1239 us no practical way to find what character sequences might be
1240 matched. Treat this approximately like "(.\1)" -- i.e. match
1241 one character, and then punt to the full matcher. */
1242 charclass ccl;
1243 zeroset (ccl);
1244 notset (ccl);
1245 addtok (CSET + charclass_index (ccl));
1246 addtok (BACKREF);
1247 addtok (CAT);
1248 tok = lex ();
1249 }
883 else if (tok == LPAREN)
884 {
885 tok = lex();
886 regexp(0);
887 if (tok != RPAREN)
888 dfaerror(_("Unbalanced ("));
889 tok = lex();
890 }

--- 93 unchanged lines hidden (view full) ---

984 addtok(OR);
985 }
986}
987
988/* Main entry point for the parser. S is a string to be parsed, len is the
989 length of the string, so s can include NUL characters. D is a pointer to
990 the struct dfa to parse into. */
991void
1250 else if (tok == LPAREN)
1251 {
1252 tok = lex();
1253 regexp(0);
1254 if (tok != RPAREN)
1255 dfaerror(_("Unbalanced ("));
1256 tok = lex();
1257 }

--- 93 unchanged lines hidden (view full) ---

1351 addtok(OR);
1352 }
1353}
1354
1355/* Main entry point for the parser. S is a string to be parsed, len is the
1356 length of the string, so s can include NUL characters. D is a pointer to
1357 the struct dfa to parse into. */
1358void
992dfaparse (char *s, size_t len, struct dfa *d)
1359dfaparse (char const *s, size_t len, struct dfa *d)
993{
994 dfa = d;
995 lexstart = lexptr = s;
996 lexleft = len;
997 lasttok = END;
998 laststart = 1;
999 parens = 0;
1360{
1361 dfa = d;
1362 lexstart = lexptr = s;
1363 lexleft = len;
1364 lasttok = END;
1365 laststart = 1;
1366 parens = 0;
1367#if ENABLE_NLS
1368 hard_LC_COLLATE = hard_locale (LC_COLLATE);
1369#endif
1370#ifdef MBS_SUPPORT
1371 if (MB_CUR_MAX > 1)
1372 {
1373 cur_mb_index = 0;
1374 cur_mb_len = 0;
1375 memset(&mbs, 0, sizeof(mbstate_t));
1376 }
1377#endif /* MBS_SUPPORT */
1000
1001 if (! syntax_bits_set)
1002 dfaerror(_("No syntax specified"));
1003
1004 tok = lex();
1005 depth = d->depth;
1006
1007 regexp(1);

--- 9 unchanged lines hidden (view full) ---

1017
1018 ++d->nregexps;
1019}
1020
1021/* Some primitives for operating on sets of positions. */
1022
1023/* Copy one set to another; the destination must be large enough. */
1024static void
1378
1379 if (! syntax_bits_set)
1380 dfaerror(_("No syntax specified"));
1381
1382 tok = lex();
1383 depth = d->depth;
1384
1385 regexp(1);

--- 9 unchanged lines hidden (view full) ---

1395
1396 ++d->nregexps;
1397}
1398
1399/* Some primitives for operating on sets of positions. */
1400
1401/* Copy one set to another; the destination must be large enough. */
1402static void
1025copy (position_set *src, position_set *dst)
1403copy (position_set const *src, position_set *dst)
1026{
1027 int i;
1028
1029 for (i = 0; i < src->nelem; ++i)
1030 dst->elems[i] = src->elems[i];
1031 dst->nelem = src->nelem;
1032}
1033

--- 22 unchanged lines hidden (view full) ---

1056 t1 = t2;
1057 }
1058 }
1059}
1060
1061/* Merge two sets of positions into a third. The result is exactly as if
1062 the positions of both sets were inserted into an initially empty set. */
1063static void
1404{
1405 int i;
1406
1407 for (i = 0; i < src->nelem; ++i)
1408 dst->elems[i] = src->elems[i];
1409 dst->nelem = src->nelem;
1410}
1411

--- 22 unchanged lines hidden (view full) ---

1434 t1 = t2;
1435 }
1436 }
1437}
1438
1439/* Merge two sets of positions into a third. The result is exactly as if
1440 the positions of both sets were inserted into an initially empty set. */
1441static void
1064merge (position_set *s1, position_set *s2, position_set *m)
1442merge (position_set const *s1, position_set const *s2, position_set *m)
1065{
1066 int i = 0, j = 0;
1067
1068 m->nelem = 0;
1069 while (i < s1->nelem && j < s2->nelem)
1070 if (s1->elems[i].index > s2->elems[j].index)
1071 m->elems[m->nelem++] = s1->elems[i++];
1072 else if (s1->elems[i].index < s2->elems[j].index)

--- 23 unchanged lines hidden (view full) ---

1096 s->elems[i] = s->elems[i + 1];
1097}
1098
1099/* Find the index of the state corresponding to the given position set with
1100 the given preceding context, or create a new state if there is no such
1101 state. Newline and letter tell whether we got here on a newline or
1102 letter, respectively. */
1103static int
1443{
1444 int i = 0, j = 0;
1445
1446 m->nelem = 0;
1447 while (i < s1->nelem && j < s2->nelem)
1448 if (s1->elems[i].index > s2->elems[j].index)
1449 m->elems[m->nelem++] = s1->elems[i++];
1450 else if (s1->elems[i].index < s2->elems[j].index)

--- 23 unchanged lines hidden (view full) ---

1474 s->elems[i] = s->elems[i + 1];
1475}
1476
1477/* Find the index of the state corresponding to the given position set with
1478 the given preceding context, or create a new state if there is no such
1479 state. Newline and letter tell whether we got here on a newline or
1480 letter, respectively. */
1481static int
1104state_index (struct dfa *d, position_set *s, int newline, int letter)
1482state_index (struct dfa *d, position_set const *s, int newline, int letter)
1105{
1106 int hash = 0;
1107 int constraint;
1108 int i, j;
1109
1110 newline = newline ? 1 : 0;
1111 letter = letter ? 1 : 0;
1112

--- 20 unchanged lines hidden (view full) ---

1133 d->states[i].hash = hash;
1134 MALLOC(d->states[i].elems.elems, position, s->nelem);
1135 copy(s, &d->states[i].elems);
1136 d->states[i].newline = newline;
1137 d->states[i].letter = letter;
1138 d->states[i].backref = 0;
1139 d->states[i].constraint = 0;
1140 d->states[i].first_end = 0;
1483{
1484 int hash = 0;
1485 int constraint;
1486 int i, j;
1487
1488 newline = newline ? 1 : 0;
1489 letter = letter ? 1 : 0;
1490

--- 20 unchanged lines hidden (view full) ---

1511 d->states[i].hash = hash;
1512 MALLOC(d->states[i].elems.elems, position, s->nelem);
1513 copy(s, &d->states[i].elems);
1514 d->states[i].newline = newline;
1515 d->states[i].letter = letter;
1516 d->states[i].backref = 0;
1517 d->states[i].constraint = 0;
1518 d->states[i].first_end = 0;
1519#ifdef MBS_SUPPORT
1520 if (MB_CUR_MAX > 1)
1521 d->states[i].mbps.nelem = 0;
1522#endif
1141 for (j = 0; j < s->nelem; ++j)
1142 if (d->tokens[s->elems[j].index] < 0)
1143 {
1144 constraint = s->elems[j].constraint;
1145 if (SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0)
1146 || SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1)
1147 || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0)
1148 || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1))

--- 13 unchanged lines hidden (view full) ---

1162}
1163
1164/* Find the epsilon closure of a set of positions. If any position of the set
1165 contains a symbol that matches the empty string in some context, replace
1166 that position with the elements of its follow labeled with an appropriate
1167 constraint. Repeat exhaustively until no funny positions are left.
1168 S->elems must be large enough to hold the result. */
1169static void
1523 for (j = 0; j < s->nelem; ++j)
1524 if (d->tokens[s->elems[j].index] < 0)
1525 {
1526 constraint = s->elems[j].constraint;
1527 if (SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0)
1528 || SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1)
1529 || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0)
1530 || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1))

--- 13 unchanged lines hidden (view full) ---

1544}
1545
1546/* Find the epsilon closure of a set of positions. If any position of the set
1547 contains a symbol that matches the empty string in some context, replace
1548 that position with the elements of its follow labeled with an appropriate
1549 constraint. Repeat exhaustively until no funny positions are left.
1550 S->elems must be large enough to hold the result. */
1551static void
1170epsclosure (position_set *s, struct dfa *d)
1552epsclosure (position_set *s, struct dfa const *d)
1171{
1172 int i, j;
1173 int *visited;
1174 position p, old;
1175
1176 MALLOC(visited, int, d->tindex);
1177 for (i = 0; i < d->tindex; ++i)
1178 visited[i] = 0;
1179
1180 for (i = 0; i < s->nelem; ++i)
1181 if (d->tokens[s->elems[i].index] >= NOTCHAR
1182 && d->tokens[s->elems[i].index] != BACKREF
1553{
1554 int i, j;
1555 int *visited;
1556 position p, old;
1557
1558 MALLOC(visited, int, d->tindex);
1559 for (i = 0; i < d->tindex; ++i)
1560 visited[i] = 0;
1561
1562 for (i = 0; i < s->nelem; ++i)
1563 if (d->tokens[s->elems[i].index] >= NOTCHAR
1564 && d->tokens[s->elems[i].index] != BACKREF
1565#ifdef MBS_SUPPORT
1566 && d->tokens[s->elems[i].index] != ANYCHAR
1567 && d->tokens[s->elems[i].index] != MBCSET
1568#endif
1183 && d->tokens[s->elems[i].index] < CSET)
1184 {
1185 old = s->elems[i];
1186 p.constraint = old.constraint;
1187 delete(s->elems[i], s);
1188 if (visited[old.index])
1189 {
1190 --i;

--- 265 unchanged lines hidden (view full) ---

1456 putc('\n', stderr);
1457 }
1458#endif
1459
1460 /* For each follow set that is the follow set of a real position, replace
1461 it with its epsilon closure. */
1462 for (i = 0; i < d->tindex; ++i)
1463 if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
1569 && d->tokens[s->elems[i].index] < CSET)
1570 {
1571 old = s->elems[i];
1572 p.constraint = old.constraint;
1573 delete(s->elems[i], s);
1574 if (visited[old.index])
1575 {
1576 --i;

--- 265 unchanged lines hidden (view full) ---

1842 putc('\n', stderr);
1843 }
1844#endif
1845
1846 /* For each follow set that is the follow set of a real position, replace
1847 it with its epsilon closure. */
1848 for (i = 0; i < d->tindex; ++i)
1849 if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
1850#ifdef MBS_SUPPORT
1851 || d->tokens[i] == ANYCHAR
1852 || d->tokens[i] == MBCSET
1853#endif
1464 || d->tokens[i] >= CSET)
1465 {
1466#ifdef DEBUG
1467 fprintf(stderr, "follows(%d:", i);
1468 prtok(d->tokens[i]);
1469 fprintf(stderr, "):");
1470 for (j = d->follows[i].nelem - 1; j >= 0; --j)
1471 {

--- 85 unchanged lines hidden (view full) ---

1557 position_set follows; /* Union of the follows of some group. */
1558 position_set tmp; /* Temporary space for merging sets. */
1559 int state; /* New state. */
1560 int wants_newline; /* New state wants to know newline context. */
1561 int state_newline; /* New state on a newline transition. */
1562 int wants_letter; /* New state wants to know letter context. */
1563 int state_letter; /* New state on a letter transition. */
1564 static int initialized; /* Flag for static initialization. */
1854 || d->tokens[i] >= CSET)
1855 {
1856#ifdef DEBUG
1857 fprintf(stderr, "follows(%d:", i);
1858 prtok(d->tokens[i]);
1859 fprintf(stderr, "):");
1860 for (j = d->follows[i].nelem - 1; j >= 0; --j)
1861 {

--- 85 unchanged lines hidden (view full) ---

1947 position_set follows; /* Union of the follows of some group. */
1948 position_set tmp; /* Temporary space for merging sets. */
1949 int state; /* New state. */
1950 int wants_newline; /* New state wants to know newline context. */
1951 int state_newline; /* New state on a newline transition. */
1952 int wants_letter; /* New state wants to know letter context. */
1953 int state_letter; /* New state on a letter transition. */
1954 static int initialized; /* Flag for static initialization. */
1955#ifdef MBS_SUPPORT
1956 int next_isnt_1st_byte = 0; /* Flag If we can't add state0. */
1957#endif
1565 int i, j, k;
1566
1567 /* Initialize the set of letters, if necessary. */
1568 if (! initialized)
1569 {
1570 initialized = 1;
1571 for (i = 0; i < NOTCHAR; ++i)
1572 if (IS_WORD_CONSTITUENT(i))

--- 5 unchanged lines hidden (view full) ---

1578
1579 for (i = 0; i < d->states[s].elems.nelem; ++i)
1580 {
1581 pos = d->states[s].elems.elems[i];
1582 if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
1583 setbit(d->tokens[pos.index], matches);
1584 else if (d->tokens[pos.index] >= CSET)
1585 copyset(d->charclasses[d->tokens[pos.index] - CSET], matches);
1958 int i, j, k;
1959
1960 /* Initialize the set of letters, if necessary. */
1961 if (! initialized)
1962 {
1963 initialized = 1;
1964 for (i = 0; i < NOTCHAR; ++i)
1965 if (IS_WORD_CONSTITUENT(i))

--- 5 unchanged lines hidden (view full) ---

1971
1972 for (i = 0; i < d->states[s].elems.nelem; ++i)
1973 {
1974 pos = d->states[s].elems.elems[i];
1975 if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
1976 setbit(d->tokens[pos.index], matches);
1977 else if (d->tokens[pos.index] >= CSET)
1978 copyset(d->charclasses[d->tokens[pos.index] - CSET], matches);
1979#ifdef MBS_SUPPORT
1980 else if (d->tokens[pos.index] == ANYCHAR
1981 || d->tokens[pos.index] == MBCSET)
1982 /* MB_CUR_MAX > 1 */
1983 {
1984 /* ANYCHAR and MBCSET must match with a single character, so we
1985 must put it to d->states[s].mbps, which contains the positions
1986 which can match with a single character not a byte. */
1987 if (d->states[s].mbps.nelem == 0)
1988 {
1989 MALLOC(d->states[s].mbps.elems, position,
1990 d->states[s].elems.nelem);
1991 }
1992 insert(pos, &(d->states[s].mbps));
1993 continue;
1994 }
1995#endif /* MBS_SUPPORT */
1586 else
1587 continue;
1588
1589 /* Some characters may need to be eliminated from matches because
1590 they fail in the current context. */
1591 if (pos.constraint != 0xFF)
1592 {
1593 if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,

--- 120 unchanged lines hidden (view full) ---

1714 follows.nelem = 0;
1715
1716 /* Find the union of the follows of the positions of the group.
1717 This is a hideously inefficient loop. Fix it someday. */
1718 for (j = 0; j < grps[i].nelem; ++j)
1719 for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k)
1720 insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
1721
1996 else
1997 continue;
1998
1999 /* Some characters may need to be eliminated from matches because
2000 they fail in the current context. */
2001 if (pos.constraint != 0xFF)
2002 {
2003 if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,

--- 120 unchanged lines hidden (view full) ---

2124 follows.nelem = 0;
2125
2126 /* Find the union of the follows of the positions of the group.
2127 This is a hideously inefficient loop. Fix it someday. */
2128 for (j = 0; j < grps[i].nelem; ++j)
2129 for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k)
2130 insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
2131
2132#ifdef MBS_SUPPORT
2133 if (MB_CUR_MAX > 1)
2134 {
2135 /* If a token in follows.elems is not 1st byte of a multibyte
2136 character, or the states of follows must accept the bytes
2137 which are not 1st byte of the multibyte character.
2138 Then, if a state of follows encounter a byte, it must not be
2139 a 1st byte of a multibyte character nor singlebyte character.
2140 We cansel to add state[0].follows to next state, because
2141 state[0] must accept 1st-byte
2142
2143 For example, we assume <sb a> is a certain singlebyte
2144 character, <mb A> is a certain multibyte character, and the
2145 codepoint of <sb a> equals the 2nd byte of the codepoint of
2146 <mb A>.
2147 When state[0] accepts <sb a>, state[i] transit to state[i+1]
2148 by accepting accepts 1st byte of <mb A>, and state[i+1]
2149 accepts 2nd byte of <mb A>, if state[i+1] encounter the
2150 codepoint of <sb a>, it must not be <sb a> but 2nd byte of
2151 <mb A>, so we can not add state[0]. */
2152
2153 next_isnt_1st_byte = 0;
2154 for (j = 0; j < follows.nelem; ++j)
2155 {
2156 if (!(d->multibyte_prop[follows.elems[j].index] & 1))
2157 {
2158 next_isnt_1st_byte = 1;
2159 break;
2160 }
2161 }
2162 }
2163#endif
2164
1722 /* If we are building a searching matcher, throw in the positions
1723 of state 0 as well. */
2165 /* If we are building a searching matcher, throw in the positions
2166 of state 0 as well. */
2167#ifdef MBS_SUPPORT
2168 if (d->searchflag && (MB_CUR_MAX == 1 || !next_isnt_1st_byte))
2169#else
1724 if (d->searchflag)
2170 if (d->searchflag)
2171#endif
1725 for (j = 0; j < d->states[0].elems.nelem; ++j)
1726 insert(d->states[0].elems.elems[j], &follows);
1727
1728 /* Find out if the new state will want any context information. */
1729 wants_newline = 0;
1730 if (tstbit(eolbyte, labels[i]))
1731 for (j = 0; j < follows.nelem; ++j)
1732 if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))

--- 100 unchanged lines hidden (view full) ---

1833 int oldalloc = d->tralloc;
1834
1835 while (trans[i] >= d->tralloc)
1836 d->tralloc *= 2;
1837 REALLOC(d->realtrans, int *, d->tralloc + 1);
1838 d->trans = d->realtrans + 1;
1839 REALLOC(d->fails, int *, d->tralloc);
1840 REALLOC(d->success, int, d->tralloc);
2172 for (j = 0; j < d->states[0].elems.nelem; ++j)
2173 insert(d->states[0].elems.elems[j], &follows);
2174
2175 /* Find out if the new state will want any context information. */
2176 wants_newline = 0;
2177 if (tstbit(eolbyte, labels[i]))
2178 for (j = 0; j < follows.nelem; ++j)
2179 if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))

--- 100 unchanged lines hidden (view full) ---

2280 int oldalloc = d->tralloc;
2281
2282 while (trans[i] >= d->tralloc)
2283 d->tralloc *= 2;
2284 REALLOC(d->realtrans, int *, d->tralloc + 1);
2285 d->trans = d->realtrans + 1;
2286 REALLOC(d->fails, int *, d->tralloc);
2287 REALLOC(d->success, int, d->tralloc);
1841 REALLOC(d->newlines, int, d->tralloc);
1842 while (oldalloc < d->tralloc)
1843 {
1844 d->trans[oldalloc] = NULL;
1845 d->fails[oldalloc++] = NULL;
1846 }
1847 }
1848
2288 while (oldalloc < d->tralloc)
2289 {
2290 d->trans[oldalloc] = NULL;
2291 d->fails[oldalloc++] = NULL;
2292 }
2293 }
2294
1849 /* Keep the newline transition in a special place so we can use it as
1850 a sentinel. */
1851 d->newlines[s] = trans[eolbyte];
2295 /* Newline is a sentinel. */
1852 trans[eolbyte] = -1;
1853
1854 if (ACCEPTING(s, *d))
1855 d->fails[s] = trans;
1856 else
1857 d->trans[s] = trans;
1858}
1859
1860static void
1861build_state_zero (struct dfa *d)
1862{
1863 d->tralloc = 1;
1864 d->trcount = 0;
1865 CALLOC(d->realtrans, int *, d->tralloc + 1);
1866 d->trans = d->realtrans + 1;
1867 CALLOC(d->fails, int *, d->tralloc);
1868 MALLOC(d->success, int, d->tralloc);
2296 trans[eolbyte] = -1;
2297
2298 if (ACCEPTING(s, *d))
2299 d->fails[s] = trans;
2300 else
2301 d->trans[s] = trans;
2302}
2303
2304static void
2305build_state_zero (struct dfa *d)
2306{
2307 d->tralloc = 1;
2308 d->trcount = 0;
2309 CALLOC(d->realtrans, int *, d->tralloc + 1);
2310 d->trans = d->realtrans + 1;
2311 CALLOC(d->fails, int *, d->tralloc);
2312 MALLOC(d->success, int, d->tralloc);
1869 MALLOC(d->newlines, int, d->tralloc);
1870 build_state(0, d);
1871}
1872
2313 build_state(0, d);
2314}
2315
2316#ifdef MBS_SUPPORT
2317/* Multibyte character handling sub-routins for dfaexec. */
2318
2319/* Initial state may encounter the byte which is not a singlebyte character
2320 nor 1st byte of a multibyte character. But it is incorrect for initial
2321 state to accept such a byte.
2322 For example, in sjis encoding the regular expression like "\\" accepts
2323 the codepoint 0x5c, but should not accept the 2nd byte of the codepoint
2324 0x815c. Then Initial state must skip the bytes which are not a singlebyte
2325 character nor 1st byte of a multibyte character. */
2326#define SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p) \
2327 if (s == 0) \
2328 { \
2329 while (inputwcs[p - buf_begin] == 0 \
2330 && mblen_buf[p - buf_begin] > 0 \
2331 && p < buf_end) \
2332 ++p; \
2333 if (p >= end) \
2334 { \
2335 free(mblen_buf); \
2336 free(inputwcs); \
2337 return (size_t) -1; \
2338 } \
2339 }
2340
2341static void
2342realloc_trans_if_necessary(struct dfa *d, int new_state)
2343{
2344 /* Make sure that the trans and fail arrays are allocated large enough
2345 to hold a pointer for the new state. */
2346 if (new_state >= d->tralloc)
2347 {
2348 int oldalloc = d->tralloc;
2349
2350 while (new_state >= d->tralloc)
2351 d->tralloc *= 2;
2352 REALLOC(d->realtrans, int *, d->tralloc + 1);
2353 d->trans = d->realtrans + 1;
2354 REALLOC(d->fails, int *, d->tralloc);
2355 REALLOC(d->success, int, d->tralloc);
2356 while (oldalloc < d->tralloc)
2357 {
2358 d->trans[oldalloc] = NULL;
2359 d->fails[oldalloc++] = NULL;
2360 }
2361 }
2362}
2363
2364/* Return values of transit_state_singlebyte(), and
2365 transit_state_consume_1char. */
2366typedef enum
2367{
2368 TRANSIT_STATE_IN_PROGRESS, /* State transition has not finished. */
2369 TRANSIT_STATE_DONE, /* State transition has finished. */
2370 TRANSIT_STATE_END_BUFFER /* Reach the end of the buffer. */
2371} status_transit_state;
2372
2373/* Consume a single byte and transit state from 's' to '*next_state'.
2374 This function is almost same as the state transition routin in dfaexec().
2375 But state transition is done just once, otherwise matching succeed or
2376 reach the end of the buffer. */
2377static status_transit_state
2378transit_state_singlebyte (struct dfa *d, int s, unsigned char const *p,
2379 int *next_state)
2380{
2381 int *t;
2382 int works = s;
2383
2384 status_transit_state rval = TRANSIT_STATE_IN_PROGRESS;
2385
2386 while (rval == TRANSIT_STATE_IN_PROGRESS)
2387 {
2388 if ((t = d->trans[works]) != NULL)
2389 {
2390 works = t[*p];
2391 rval = TRANSIT_STATE_DONE;
2392 if (works < 0)
2393 works = 0;
2394 }
2395 else if (works < 0)
2396 {
2397 if (p == buf_end)
2398 /* At the moment, it must not happen. */
2399 return TRANSIT_STATE_END_BUFFER;
2400 works = 0;
2401 }
2402 else if (d->fails[works])
2403 {
2404 works = d->fails[works][*p];
2405 rval = TRANSIT_STATE_DONE;
2406 }
2407 else
2408 {
2409 build_state(works, d);
2410 }
2411 }
2412 *next_state = works;
2413 return rval;
2414}
2415
2416/* Check whether period can match or not in the current context. If it can,
2417 return the amount of the bytes with which period can match, otherwise
2418 return 0.
2419 `pos' is the position of the period. `index' is the index from the
2420 buf_begin, and it is the current position in the buffer. */
2421static int
2422match_anychar (struct dfa *d, int s, position pos, int index)
2423{
2424 int newline = 0;
2425 int letter = 0;
2426 wchar_t wc;
2427 int mbclen;
2428
2429 wc = inputwcs[index];
2430 mbclen = (mblen_buf[index] == 0)? 1 : mblen_buf[index];
2431
2432 /* Check context. */
2433 if (wc == (wchar_t)eolbyte)
2434 {
2435 if (!(syntax_bits & RE_DOT_NEWLINE))
2436 return 0;
2437 newline = 1;
2438 }
2439 else if (wc == (wchar_t)'\0')
2440 {
2441 if (syntax_bits & RE_DOT_NOT_NULL)
2442 return 0;
2443 newline = 1;
2444 }
2445
2446 if (iswalnum(wc) || wc == L'_')
2447 letter = 1;
2448
2449 if (!SUCCEEDS_IN_CONTEXT(pos.constraint, d->states[s].newline,
2450 newline, d->states[s].letter, letter))
2451 return 0;
2452
2453 return mbclen;
2454}
2455
2456/* Check whether bracket expression can match or not in the current context.
2457 If it can, return the amount of the bytes with which expression can match,
2458 otherwise return 0.
2459 `pos' is the position of the bracket expression. `index' is the index
2460 from the buf_begin, and it is the current position in the buffer. */
2461int
2462match_mb_charset (struct dfa *d, int s, position pos, int index)
2463{
2464 int i;
2465 int match; /* Flag which represent that matching succeed. */
2466 int match_len; /* Length of the character (or collating element)
2467 with which this operator match. */
2468 int op_len; /* Length of the operator. */
2469 char buffer[128];
2470 wchar_t wcbuf[6];
2471
2472 /* Pointer to the structure to which we are currently reffering. */
2473 struct mb_char_classes *work_mbc;
2474
2475 int newline = 0;
2476 int letter = 0;
2477 wchar_t wc; /* Current reffering character. */
2478
2479 wc = inputwcs[index];
2480
2481 /* Check context. */
2482 if (wc == (wchar_t)eolbyte)
2483 {
2484 if (!(syntax_bits & RE_DOT_NEWLINE))
2485 return 0;
2486 newline = 1;
2487 }
2488 else if (wc == (wchar_t)'\0')
2489 {
2490 if (syntax_bits & RE_DOT_NOT_NULL)
2491 return 0;
2492 newline = 1;
2493 }
2494 if (iswalnum(wc) || wc == L'_')
2495 letter = 1;
2496 if (!SUCCEEDS_IN_CONTEXT(pos.constraint, d->states[s].newline,
2497 newline, d->states[s].letter, letter))
2498 return 0;
2499
2500 /* Assign the current reffering operator to work_mbc. */
2501 work_mbc = &(d->mbcsets[(d->multibyte_prop[pos.index]) >> 2]);
2502 match = !work_mbc->invert;
2503 match_len = (mblen_buf[index] == 0)? 1 : mblen_buf[index];
2504
2505 /* match with a character class? */
2506 for (i = 0; i<work_mbc->nch_classes; i++)
2507 {
2508 if (iswctype((wint_t)wc, work_mbc->ch_classes[i]))
2509 goto charset_matched;
2510 }
2511
2512 strncpy(buffer, buf_begin + index, match_len);
2513 buffer[match_len] = '\0';
2514
2515 /* match with an equivalent class? */
2516 for (i = 0; i<work_mbc->nequivs; i++)
2517 {
2518 op_len = strlen(work_mbc->equivs[i]);
2519 strncpy(buffer, buf_begin + index, op_len);
2520 buffer[op_len] = '\0';
2521 if (strcoll(work_mbc->equivs[i], buffer) == 0)
2522 {
2523 match_len = op_len;
2524 goto charset_matched;
2525 }
2526 }
2527
2528 /* match with a collating element? */
2529 for (i = 0; i<work_mbc->ncoll_elems; i++)
2530 {
2531 op_len = strlen(work_mbc->coll_elems[i]);
2532 strncpy(buffer, buf_begin + index, op_len);
2533 buffer[op_len] = '\0';
2534
2535 if (strcoll(work_mbc->coll_elems[i], buffer) == 0)
2536 {
2537 match_len = op_len;
2538 goto charset_matched;
2539 }
2540 }
2541
2542 wcbuf[0] = wc;
2543 wcbuf[1] = wcbuf[3] = wcbuf[5] = '\0';
2544
2545 /* match with a range? */
2546 for (i = 0; i<work_mbc->nranges; i++)
2547 {
2548 wcbuf[2] = work_mbc->range_sts[i];
2549 wcbuf[4] = work_mbc->range_ends[i];
2550
2551 if (wcscoll(wcbuf, wcbuf+2) >= 0 &&
2552 wcscoll(wcbuf+4, wcbuf) >= 0)
2553 goto charset_matched;
2554 }
2555
2556 /* match with a character? */
2557 for (i = 0; i<work_mbc->nchars; i++)
2558 {
2559 if (wc == work_mbc->chars[i])
2560 goto charset_matched;
2561 }
2562
2563 match = !match;
2564
2565 charset_matched:
2566 return match ? match_len : 0;
2567}
2568
2569/* Check each of `d->states[s].mbps.elem' can match or not. Then return the
2570 array which corresponds to `d->states[s].mbps.elem' and each element of
2571 the array contains the amount of the bytes with which the element can
2572 match.
2573 `index' is the index from the buf_begin, and it is the current position
2574 in the buffer.
2575 Caller MUST free the array which this function return. */
2576static int*
2577check_matching_with_multibyte_ops (struct dfa *d, int s, int index)
2578{
2579 int i;
2580 int* rarray;
2581
2582 MALLOC(rarray, int, d->states[s].mbps.nelem);
2583 for (i = 0; i < d->states[s].mbps.nelem; ++i)
2584 {
2585 position pos = d->states[s].mbps.elems[i];
2586 switch(d->tokens[pos.index])
2587 {
2588 case ANYCHAR:
2589 rarray[i] = match_anychar(d, s, pos, index);
2590 break;
2591 case MBCSET:
2592 rarray[i] = match_mb_charset(d, s, pos, index);
2593 break;
2594 default:
2595 break; /* can not happen. */
2596 }
2597 }
2598 return rarray;
2599}
2600
2601/* Consume a single character and enumerate all of the positions which can
2602 be next position from the state `s'.
2603 `match_lens' is the input. It can be NULL, but it can also be the output
2604 of check_matching_with_multibyte_ops() for optimization.
2605 `mbclen' and `pps' are the output. `mbclen' is the length of the
2606 character consumed, and `pps' is the set this function enumerate. */
2607static status_transit_state
2608transit_state_consume_1char (struct dfa *d, int s, unsigned char const **pp,
2609 int *match_lens, int *mbclen, position_set *pps)
2610{
2611 int i, j;
2612 int s1, s2;
2613 int* work_mbls;
2614 status_transit_state rs = TRANSIT_STATE_DONE;
2615
2616 /* Calculate the length of the (single/multi byte) character
2617 to which p points. */
2618 *mbclen = (mblen_buf[*pp - buf_begin] == 0)? 1
2619 : mblen_buf[*pp - buf_begin];
2620
2621 /* Calculate the state which can be reached from the state `s' by
2622 consuming `*mbclen' single bytes from the buffer. */
2623 s1 = s;
2624 for (i = 0; i < *mbclen; i++)
2625 {
2626 s2 = s1;
2627 rs = transit_state_singlebyte(d, s2, (*pp)++, &s1);
2628 }
2629 /* Copy the positions contained by `s1' to the set `pps'. */
2630 copy(&(d->states[s1].elems), pps);
2631
2632 /* Check (inputed)match_lens, and initialize if it is NULL. */
2633 if (match_lens == NULL && d->states[s].mbps.nelem != 0)
2634 work_mbls = check_matching_with_multibyte_ops(d, s, *pp - buf_begin);
2635 else
2636 work_mbls = match_lens;
2637
2638 /* Add all of the positions which can be reached from `s' by consuming
2639 a single character. */
2640 for (i = 0; i < d->states[s].mbps.nelem ; i++)
2641 {
2642 if (work_mbls[i] == *mbclen)
2643 for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem;
2644 j++)
2645 insert(d->follows[d->states[s].mbps.elems[i].index].elems[j],
2646 pps);
2647 }
2648
2649 if (match_lens == NULL && work_mbls != NULL)
2650 free(work_mbls);
2651 return rs;
2652}
2653
2654/* Transit state from s, then return new state and update the pointer of the
2655 buffer. This function is for some operator which can match with a multi-
2656 byte character or a collating element(which may be multi characters). */
2657static int
2658transit_state (struct dfa *d, int s, unsigned char const **pp)
2659{
2660 int s1;
2661 int mbclen; /* The length of current input multibyte character. */
2662 int maxlen = 0;
2663 int i, j;
2664 int *match_lens = NULL;
2665 int nelem = d->states[s].mbps.nelem; /* Just a alias. */
2666 position_set follows;
2667 unsigned char const *p1 = *pp;
2668 status_transit_state rs;
2669 wchar_t wc;
2670
2671 if (nelem > 0)
2672 /* This state has (a) multibyte operator(s).
2673 We check whether each of them can match or not. */
2674 {
2675 /* Note: caller must free the return value of this function. */
2676 match_lens = check_matching_with_multibyte_ops(d, s, *pp - buf_begin);
2677
2678 for (i = 0; i < nelem; i++)
2679 /* Search the operator which match the longest string,
2680 in this state. */
2681 {
2682 if (match_lens[i] > maxlen)
2683 maxlen = match_lens[i];
2684 }
2685 }
2686
2687 if (nelem == 0 || maxlen == 0)
2688 /* This state has no multibyte operator which can match.
2689 We need to check only one singlebyte character. */
2690 {
2691 status_transit_state rs;
2692 rs = transit_state_singlebyte(d, s, *pp, &s1);
2693
2694 /* We must update the pointer if state transition succeeded. */
2695 if (rs == TRANSIT_STATE_DONE)
2696 ++*pp;
2697
2698 if (match_lens != NULL)
2699 free(match_lens);
2700 return s1;
2701 }
2702
2703 /* This state has some operators which can match a multibyte character. */
2704 follows.nelem = 0;
2705 MALLOC(follows.elems, position, d->nleaves);
2706
2707 /* `maxlen' may be longer than the length of a character, because it may
2708 not be a character but a (multi character) collating element.
2709 We enumerate all of the positions which `s' can reach by consuming
2710 `maxlen' bytes. */
2711 rs = transit_state_consume_1char(d, s, pp, match_lens, &mbclen, &follows);
2712
2713 wc = inputwcs[*pp - mbclen - buf_begin];
2714 s1 = state_index(d, &follows, wc == L'\n', iswalnum(wc));
2715 realloc_trans_if_necessary(d, s1);
2716
2717 while (*pp - p1 < maxlen)
2718 {
2719 follows.nelem = 0;
2720 rs = transit_state_consume_1char(d, s1, pp, NULL, &mbclen, &follows);
2721
2722 for (i = 0; i < nelem ; i++)
2723 {
2724 if (match_lens[i] == *pp - p1)
2725 for (j = 0;
2726 j < d->follows[d->states[s1].mbps.elems[i].index].nelem; j++)
2727 insert(d->follows[d->states[s1].mbps.elems[i].index].elems[j],
2728 &follows);
2729 }
2730
2731 wc = inputwcs[*pp - mbclen - buf_begin];
2732 s1 = state_index(d, &follows, wc == L'\n', iswalnum(wc));
2733 realloc_trans_if_necessary(d, s1);
2734 }
2735 free(match_lens);
2736 free(follows.elems);
2737 return s1;
2738}
2739
2740#endif
2741
1873/* Search through a buffer looking for a match to the given struct dfa.
1874 Find the first occurrence of a string matching the regexp in the buffer,
2742/* Search through a buffer looking for a match to the given struct dfa.
2743 Find the first occurrence of a string matching the regexp in the buffer,
1875 and the shortest possible version thereof. Return a pointer to the first
1876 character after the match, or NULL if none is found. Begin points to
1877 the beginning of the buffer, and end points to the first character after
1878 its end. We store a newline in *end to act as a sentinel, so end had
1879 better point somewhere valid. Newline is a flag indicating whether to
1880 allow newlines to be in the matching string. If count is non-
1881 NULL it points to a place we're supposed to increment every time we
1882 see a newline. Finally, if backref is non-NULL it points to a place
2744 and the shortest possible version thereof. Return the offset of the first
2745 character after the match, or (size_t) -1 if none is found. BEGIN points to
2746 the beginning of the buffer, and SIZE is the size of the buffer. If SIZE
2747 is nonzero, BEGIN[SIZE - 1] must be a newline. BACKREF points to a place
1883 where we're supposed to store a 1 if backreferencing happened and the
1884 match needs to be verified by a backtracking matcher. Otherwise
1885 we store a 0 in *backref. */
2748 where we're supposed to store a 1 if backreferencing happened and the
2749 match needs to be verified by a backtracking matcher. Otherwise
2750 we store a 0 in *backref. */
1886char *
1887dfaexec (struct dfa *d, char *begin, char *end,
1888 int newline, int *count, int *backref)
2751size_t
2752dfaexec (struct dfa *d, char const *begin, size_t size, int *backref)
1889{
2753{
1890 register int s, s1, tmp; /* Current state. */
1891 register unsigned char *p; /* Current input character. */
2754 register int s; /* Current state. */
2755 register unsigned char const *p; /* Current input character. */
2756 register unsigned char const *end; /* One past the last input character. */
1892 register int **trans, *t; /* Copy of d->trans so it can be optimized
1893 into a register. */
1894 register unsigned char eol = eolbyte; /* Likewise for eolbyte. */
1895 static int sbit[NOTCHAR]; /* Table for anding with d->success. */
1896 static int sbit_init;
1897
1898 if (! sbit_init)
1899 {
1900 int i;
1901
1902 sbit_init = 1;
1903 for (i = 0; i < NOTCHAR; ++i)
1904 sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
1905 sbit[eol] = 4;
1906 }
1907
1908 if (! d->tralloc)
1909 build_state_zero(d);
1910
2757 register int **trans, *t; /* Copy of d->trans so it can be optimized
2758 into a register. */
2759 register unsigned char eol = eolbyte; /* Likewise for eolbyte. */
2760 static int sbit[NOTCHAR]; /* Table for anding with d->success. */
2761 static int sbit_init;
2762
2763 if (! sbit_init)
2764 {
2765 int i;
2766
2767 sbit_init = 1;
2768 for (i = 0; i < NOTCHAR; ++i)
2769 sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
2770 sbit[eol] = 4;
2771 }
2772
2773 if (! d->tralloc)
2774 build_state_zero(d);
2775
1911 s = s1 = 0;
1912 p = (unsigned char *) begin;
2776 s = 0;
2777 p = (unsigned char const *) begin;
2778 end = p + size;
1913 trans = d->trans;
2779 trans = d->trans;
1914 *end = eol;
1915
2780
2781#ifdef MBS_SUPPORT
2782 if (MB_CUR_MAX > 1)
2783 {
2784 int remain_bytes, i;
2785 buf_begin = begin;
2786 buf_end = end;
2787
2788 /* initialize mblen_buf, and inputwcs. */
2789 MALLOC(mblen_buf, unsigned char, end - (unsigned char const *)begin + 2);
2790 MALLOC(inputwcs, wchar_t, end - (unsigned char const *)begin + 2);
2791 memset(&mbs, 0, sizeof(mbstate_t));
2792 remain_bytes = 0;
2793 for (i = 0; i < end - (unsigned char const *)begin + 1; i++)
2794 {
2795 if (remain_bytes == 0)
2796 {
2797 remain_bytes
2798 = mbrtowc(inputwcs + i, begin + i,
2799 end - (unsigned char const *)begin - i + 1, &mbs);
2800 if (remain_bytes <= 1)
2801 {
2802 remain_bytes = 0;
2803 inputwcs[i] = (wchar_t)begin[i];
2804 mblen_buf[i] = 0;
2805 }
2806 else
2807 {
2808 mblen_buf[i] = remain_bytes;
2809 remain_bytes--;
2810 }
2811 }
2812 else
2813 {
2814 mblen_buf[i] = remain_bytes;
2815 inputwcs[i] = 0;
2816 remain_bytes--;
2817 }
2818 }
2819 mblen_buf[i] = 0;
2820 inputwcs[i] = 0; /* sentinel */
2821 }
2822#endif /* MBS_SUPPORT */
2823
1916 for (;;)
1917 {
2824 for (;;)
2825 {
1918 while ((t = trans[s]) != 0) { /* hand-optimized loop */
1919 s1 = t[*p++];
1920 if ((t = trans[s1]) == 0) {
1921 tmp = s ; s = s1 ; s1 = tmp ; /* swap */
1922 break;
1923 }
1924 s = t[*p++];
1925 }
2826#ifdef MBS_SUPPORT
2827 if (MB_CUR_MAX > 1)
2828 while ((t = trans[s]))
2829 {
2830 if (d->states[s].mbps.nelem != 0)
2831 {
2832 /* Can match with a multibyte character( and multi character
2833 collating element). */
2834 unsigned char const *nextp;
1926
2835
1927 if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
2836 SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p);
2837
2838 nextp = p;
2839 s = transit_state(d, s, &nextp);
2840 p = nextp;
2841
2842 /* Trans table might be updated. */
2843 trans = d->trans;
2844 }
2845 else
2846 {
2847 SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p);
2848 s = t[*p++];
2849 }
2850 }
2851 else
2852#endif /* MBS_SUPPORT */
2853 while ((t = trans[s]))
2854 s = t[*p++];
2855
2856 if (s < 0)
1928 {
2857 {
2858 if (p == end)
2859 {
2860#ifdef MBS_SUPPORT
2861 if (MB_CUR_MAX > 1)
2862 {
2863 free(mblen_buf);
2864 free(inputwcs);
2865 }
2866#endif /* MBS_SUPPORT */
2867 return (size_t) -1;
2868 }
2869 s = 0;
2870 }
2871 else if ((t = d->fails[s]))
2872 {
1929 if (d->success[s] & sbit[*p])
1930 {
1931 if (backref)
1932 *backref = (d->states[s].backref != 0);
2873 if (d->success[s] & sbit[*p])
2874 {
2875 if (backref)
2876 *backref = (d->states[s].backref != 0);
1933 return (char *) p;
2877#ifdef MBS_SUPPORT
2878 if (MB_CUR_MAX > 1)
2879 {
2880 free(mblen_buf);
2881 free(inputwcs);
2882 }
2883#endif /* MBS_SUPPORT */
2884 return (char const *) p - begin;
1934 }
1935
2885 }
2886
1936 s1 = s;
1937 s = d->fails[s][*p++];
1938 continue;
1939 }
2887#ifdef MBS_SUPPORT
2888 if (MB_CUR_MAX > 1)
2889 {
2890 SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p);
2891 if (d->states[s].mbps.nelem != 0)
2892 {
2893 /* Can match with a multibyte character( and multi
2894 character collating element). */
2895 unsigned char const *nextp;
2896 nextp = p;
2897 s = transit_state(d, s, &nextp);
2898 p = nextp;
1940
2899
1941 /* If the previous character was a newline, count it. */
1942 if (count && (char *) p <= end && p[-1] == eol)
1943 ++*count;
1944
1945 /* Check if we've run off the end of the buffer. */
1946 if ((char *) p > end)
1947 return NULL;
1948
1949 if (s >= 0)
2900 /* Trans table might be updated. */
2901 trans = d->trans;
2902 }
2903 else
2904 s = t[*p++];
2905 }
2906 else
2907#endif /* MBS_SUPPORT */
2908 s = t[*p++];
2909 }
2910 else
1950 {
1951 build_state(s, d);
1952 trans = d->trans;
2911 {
2912 build_state(s, d);
2913 trans = d->trans;
1953 continue;
1954 }
2914 }
1955
1956 if (p[-1] == eol && newline)
1957 {
1958 s = d->newlines[s1];
1959 continue;
1960 }
1961
1962 s = 0;
1963 }
1964}
1965
1966/* Initialize the components of a dfa that the other routines don't
1967 initialize for themselves. */
1968void
1969dfainit (struct dfa *d)
1970{
1971 d->calloc = 1;
1972 MALLOC(d->charclasses, charclass, d->calloc);
1973 d->cindex = 0;
1974
1975 d->talloc = 1;
1976 MALLOC(d->tokens, token, d->talloc);
1977 d->tindex = d->depth = d->nleaves = d->nregexps = 0;
2915 }
2916}
2917
2918/* Initialize the components of a dfa that the other routines don't
2919 initialize for themselves. */
2920void
2921dfainit (struct dfa *d)
2922{
2923 d->calloc = 1;
2924 MALLOC(d->charclasses, charclass, d->calloc);
2925 d->cindex = 0;
2926
2927 d->talloc = 1;
2928 MALLOC(d->tokens, token, d->talloc);
2929 d->tindex = d->depth = d->nleaves = d->nregexps = 0;
2930#ifdef MBS_SUPPORT
2931 if (MB_CUR_MAX > 1)
2932 {
2933 d->nmultibyte_prop = 1;
2934 MALLOC(d->multibyte_prop, int, d->nmultibyte_prop);
2935 d->nmbcsets = 0;
2936 d->mbcsets_alloc = 1;
2937 MALLOC(d->mbcsets, struct mb_char_classes, d->mbcsets_alloc);
2938 }
2939#endif
1978
1979 d->searchflag = 0;
1980 d->tralloc = 0;
1981
1982 d->musts = 0;
1983}
1984
1985/* Parse and analyze a single string of the given length. */
1986void
2940
2941 d->searchflag = 0;
2942 d->tralloc = 0;
2943
2944 d->musts = 0;
2945}
2946
2947/* Parse and analyze a single string of the given length. */
2948void
1987dfacomp (char *s, size_t len, struct dfa *d, int searchflag)
2949dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
1988{
1989 if (case_fold) /* dummy folding in service of dfamust() */
1990 {
1991 char *lcopy;
1992 int i;
1993
1994 lcopy = malloc(len);
1995 if (!lcopy)

--- 29 unchanged lines hidden (view full) ---

2025void
2026dfafree (struct dfa *d)
2027{
2028 int i;
2029 struct dfamust *dm, *ndm;
2030
2031 free((ptr_t) d->charclasses);
2032 free((ptr_t) d->tokens);
2950{
2951 if (case_fold) /* dummy folding in service of dfamust() */
2952 {
2953 char *lcopy;
2954 int i;
2955
2956 lcopy = malloc(len);
2957 if (!lcopy)

--- 29 unchanged lines hidden (view full) ---

2987void
2988dfafree (struct dfa *d)
2989{
2990 int i;
2991 struct dfamust *dm, *ndm;
2992
2993 free((ptr_t) d->charclasses);
2994 free((ptr_t) d->tokens);
2995
2996#ifdef MBS_SUPPORT
2997 if (MB_CUR_MAX > 1)
2998 {
2999 free((ptr_t) d->multibyte_prop);
3000 for (i = 0; i < d->nmbcsets; ++i)
3001 {
3002 int j;
3003 struct mb_char_classes *p = &(d->mbcsets[i]);
3004 if (p->chars != NULL)
3005 free(p->chars);
3006 if (p->ch_classes != NULL)
3007 free(p->ch_classes);
3008 if (p->range_sts != NULL)
3009 free(p->range_sts);
3010 if (p->range_ends != NULL)
3011 free(p->range_ends);
3012
3013 for (j = 0; j < p->nequivs; ++j)
3014 free(p->equivs[j]);
3015 if (p->equivs != NULL)
3016 free(p->equivs);
3017
3018 for (j = 0; j < p->ncoll_elems; ++j)
3019 free(p->coll_elems[j]);
3020 if (p->coll_elems != NULL)
3021 free(p->coll_elems);
3022 }
3023 free((ptr_t) d->mbcsets);
3024 }
3025#endif /* MBS_SUPPORT */
3026
2033 for (i = 0; i < d->sindex; ++i)
2034 free((ptr_t) d->states[i].elems.elems);
2035 free((ptr_t) d->states);
2036 for (i = 0; i < d->tindex; ++i)
2037 if (d->follows[i].elems)
2038 free((ptr_t) d->follows[i].elems);
2039 free((ptr_t) d->follows);
2040 for (i = 0; i < d->tralloc; ++i)
2041 if (d->trans[i])
2042 free((ptr_t) d->trans[i]);
2043 else if (d->fails[i])
2044 free((ptr_t) d->fails[i]);
2045 if (d->realtrans) free((ptr_t) d->realtrans);
2046 if (d->fails) free((ptr_t) d->fails);
3027 for (i = 0; i < d->sindex; ++i)
3028 free((ptr_t) d->states[i].elems.elems);
3029 free((ptr_t) d->states);
3030 for (i = 0; i < d->tindex; ++i)
3031 if (d->follows[i].elems)
3032 free((ptr_t) d->follows[i].elems);
3033 free((ptr_t) d->follows);
3034 for (i = 0; i < d->tralloc; ++i)
3035 if (d->trans[i])
3036 free((ptr_t) d->trans[i]);
3037 else if (d->fails[i])
3038 free((ptr_t) d->fails[i]);
3039 if (d->realtrans) free((ptr_t) d->realtrans);
3040 if (d->fails) free((ptr_t) d->fails);
2047 if (d->newlines) free((ptr_t) d->newlines);
2048 if (d->success) free((ptr_t) d->success);
2049 for (dm = d->musts; dm; dm = ndm)
2050 {
2051 ndm = dm->next;
2052 free(dm->must);
2053 free((ptr_t) dm);
2054 }
2055}

--- 24 unchanged lines hidden (view full) ---

2080 operators.
2081
2082 "ZERO" means "a zero-length sequence" below.
2083
2084 Type left right is in
2085 ---- ---- ----- -- --
2086 char c # c # c # c # c
2087
3041 if (d->success) free((ptr_t) d->success);
3042 for (dm = d->musts; dm; dm = ndm)
3043 {
3044 ndm = dm->next;
3045 free(dm->must);
3046 free((ptr_t) dm);
3047 }
3048}

--- 24 unchanged lines hidden (view full) ---

3073 operators.
3074
3075 "ZERO" means "a zero-length sequence" below.
3076
3077 Type left right is in
3078 ---- ---- ----- -- --
3079 char c # c # c # c # c
3080
3081 ANYCHAR ZERO ZERO ZERO ZERO
3082
3083 MBCSET ZERO ZERO ZERO ZERO
3084
2088 CSET ZERO ZERO ZERO ZERO
2089
2090 STAR ZERO ZERO ZERO ZERO
2091
2092 QMARK ZERO ZERO ZERO ZERO
2093
2094 PLUS p->left p->right ZERO p->in
2095

--- 156 unchanged lines hidden (view full) ---

2252 return NULL;
2253 cpp = (char **) malloc(sizeof *cpp);
2254 if (cpp == NULL)
2255 return NULL;
2256 cpp[0] = NULL;
2257 for (lcp = left; *lcp != '\0'; ++lcp)
2258 {
2259 len = 0;
3085 CSET ZERO ZERO ZERO ZERO
3086
3087 STAR ZERO ZERO ZERO ZERO
3088
3089 QMARK ZERO ZERO ZERO ZERO
3090
3091 PLUS p->left p->right ZERO p->in
3092

--- 156 unchanged lines hidden (view full) ---

3249 return NULL;
3250 cpp = (char **) malloc(sizeof *cpp);
3251 if (cpp == NULL)
3252 return NULL;
3253 cpp[0] = NULL;
3254 for (lcp = left; *lcp != '\0'; ++lcp)
3255 {
3256 len = 0;
2260 rcp = index(right, *lcp);
3257 rcp = strchr (right, *lcp);
2261 while (rcp != NULL)
2262 {
2263 for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
2264 continue;
2265 if (i > len)
2266 len = i;
3258 while (rcp != NULL)
3259 {
3260 for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
3261 continue;
3262 if (i > len)
3263 len = i;
2267 rcp = index(rcp + 1, *lcp);
3264 rcp = strchr (rcp + 1, *lcp);
2268 }
2269 if (len == 0)
2270 continue;
2271 if ((cpp = enlist(cpp, lcp, len)) == NULL)
2272 break;
2273 }
2274 return cpp;
2275}

--- 249 unchanged lines hidden (view full) ---

2525 /* "cannot happen" */
2526 goto done;
2527 }
2528 else if (t == '\0')
2529 {
2530 /* not on *my* shift */
2531 goto done;
2532 }
3265 }
3266 if (len == 0)
3267 continue;
3268 if ((cpp = enlist(cpp, lcp, len)) == NULL)
3269 break;
3270 }
3271 return cpp;
3272}

--- 249 unchanged lines hidden (view full) ---

3522 /* "cannot happen" */
3523 goto done;
3524 }
3525 else if (t == '\0')
3526 {
3527 /* not on *my* shift */
3528 goto done;
3529 }
2533 else if (t >= CSET)
3530 else if (t >= CSET
3531#ifdef MBS_SUPPORT
3532 || t == ANYCHAR
3533 || t == MBCSET
3534#endif /* MBS_SUPPORT */
3535 )
2534 {
2535 /* easy enough */
2536 resetmust(mp);
2537 }
2538 else
2539 {
2540 /* plain character */
2541 resetmust(mp);

--- 33 unchanged lines hidden (view full) ---

2575 freelist(mp[i].in);
2576 ifree((char *) mp[i].in);
2577 ifree(mp[i].left);
2578 ifree(mp[i].right);
2579 ifree(mp[i].is);
2580 }
2581 free((char *) mp);
2582}
3536 {
3537 /* easy enough */
3538 resetmust(mp);
3539 }
3540 else
3541 {
3542 /* plain character */
3543 resetmust(mp);

--- 33 unchanged lines hidden (view full) ---

3577 freelist(mp[i].in);
3578 ifree((char *) mp[i].in);
3579 ifree(mp[i].left);
3580 ifree(mp[i].right);
3581 ifree(mp[i].is);
3582 }
3583 free((char *) mp);
3584}
3585/* vim:set shiftwidth=2: */