1/* Extended regular expression matching and search library,
2   version 0.12.
3   (Implements POSIX draft P10003.2/D11.2, except for
4   internationalization features.)
5
6   Copyright (C) 1993 Free Software Foundation, Inc.
7
8   This program is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2, or (at your option)
11   any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program; if not, write to the Free Software
20   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
21
22/* AIX requires this to be the first thing in the file. */
23#if defined (_AIX) && !defined (REGEX_MALLOC)
24  #pragma alloca
25#endif
26
27#define _GNU_SOURCE
28
29/* We need this for `regex.h', and perhaps for the Emacs include files.  */
30#include <sys/types.h>
31
32#ifdef HAVE_CONFIG_H
33#include "config.h"
34#endif
35
36/* The `emacs' switch turns on certain matching commands
37   that make sense only in Emacs. */
38#ifdef emacs
39
40#include "lisp.h"
41#include "buffer.h"
42#include "syntax.h"
43
44/* Emacs uses `NULL' as a predicate.  */
45#undef NULL
46
47#else  /* not emacs */
48
49/* We used to test for `BSTRING' here, but only GCC and Emacs define
50   `BSTRING', as far as I know, and neither of them use this code.  */
51#if HAVE_STRING_H || STDC_HEADERS
52#include <string.h>
53#ifndef bcmp
54#define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
55#endif
56#ifndef bcopy
57#define bcopy(s, d, n)	memcpy ((d), (s), (n))
58#endif
59#ifndef bzero
60#define bzero(s, n)	memset ((s), 0, (n))
61#endif
62#else
63#include <strings.h>
64#endif
65
66#include <stdlib.h>
67
68/* Define the syntax stuff for \<, \>, etc.  */
69
70/* This must be nonzero for the wordchar and notwordchar pattern
71   commands in re_match_2.  */
72#ifndef Sword
73#define Sword 1
74#endif
75
76#ifdef SYNTAX_TABLE
77
78extern char *re_syntax_table;
79
80#else /* not SYNTAX_TABLE */
81
82/* How many characters in the character set.  */
83#define CHAR_SET_SIZE 256
84
85static char re_syntax_table[CHAR_SET_SIZE];
86
87static void
88init_syntax_once ()
89{
90   register int c;
91   static int done = 0;
92
93   if (done)
94     return;
95
96   bzero (re_syntax_table, sizeof re_syntax_table);
97
98   for (c = 'a'; c <= 'z'; c++)
99     re_syntax_table[c] = Sword;
100
101   for (c = 'A'; c <= 'Z'; c++)
102     re_syntax_table[c] = Sword;
103
104   for (c = '0'; c <= '9'; c++)
105     re_syntax_table[c] = Sword;
106
107   re_syntax_table['_'] = Sword;
108
109   done = 1;
110}
111
112#endif /* not SYNTAX_TABLE */
113
114#define SYNTAX(c) re_syntax_table[c]
115
116#endif /* not emacs */
117
118/* Get the interface, including the syntax bits.  */
119#include "regex.h"
120
121/* isalpha etc. are used for the character classes.  */
122#include <ctype.h>
123
124#ifndef isascii
125#define isascii(c) 1
126#endif
127
128#ifdef isblank
129#define ISBLANK(c) (isascii (c) && isblank (c))
130#else
131#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
132#endif
133#ifdef isgraph
134#define ISGRAPH(c) (isascii (c) && isgraph (c))
135#else
136#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
137#endif
138
139#define ISPRINT(c) (isascii (c) && isprint (c))
140#define ISDIGIT(c) (isascii (c) && isdigit (c))
141#define ISALNUM(c) (isascii (c) && isalnum (c))
142#define ISALPHA(c) (isascii (c) && isalpha (c))
143#define ISCNTRL(c) (isascii (c) && iscntrl (c))
144#define ISLOWER(c) (isascii (c) && islower (c))
145#define ISPUNCT(c) (isascii (c) && ispunct (c))
146#define ISSPACE(c) (isascii (c) && isspace (c))
147#define ISUPPER(c) (isascii (c) && isupper (c))
148#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
149
150#ifndef NULL
151#define NULL 0
152#endif
153
154/* We remove any previous definition of `SIGN_EXTEND_CHAR',
155   since ours (we hope) works properly with all combinations of
156   machines, compilers, `char' and `unsigned char' argument types.
157   (Per Bothner suggested the basic approach.)  */
158#undef SIGN_EXTEND_CHAR
159#if __STDC__
160#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
161#else  /* not __STDC__ */
162/* As in Harbison and Steele.  */
163#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
164#endif
165
166/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
167   use `alloca' instead of `malloc'.  This is because using malloc in
168   re_search* or re_match* could cause memory leaks when C-g is used in
169   Emacs; also, malloc is slower and causes storage fragmentation.  On
170   the other hand, malloc is more portable, and easier to debug.
171
172   Because we sometimes use alloca, some routines have to be macros,
173   not functions -- `alloca'-allocated space disappears at the end of the
174   function it is called in.  */
175
176#ifdef REGEX_MALLOC
177
178#define REGEX_ALLOCATE malloc
179#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
180
181#else /* not REGEX_MALLOC  */
182
183/* Emacs already defines alloca, sometimes.  */
184#ifndef alloca
185
186/* Make alloca work the best possible way.  */
187#ifdef __GNUC__
188#define alloca __builtin_alloca
189#else /* not __GNUC__ */
190#if HAVE_ALLOCA_H
191#include <alloca.h>
192#else /* not __GNUC__ or HAVE_ALLOCA_H */
193#ifndef _AIX /* Already did AIX, up at the top.  */
194char *alloca ();
195#endif /* not _AIX */
196#endif /* not HAVE_ALLOCA_H */
197#endif /* not __GNUC__ */
198
199#endif /* not alloca */
200
201#define REGEX_ALLOCATE alloca
202
203/* Assumes a `char *destination' variable.  */
204#define REGEX_REALLOCATE(source, osize, nsize)				\
205  (destination = (char *) alloca (nsize),				\
206   bcopy (source, destination, osize),					\
207   destination)
208
209#endif /* not REGEX_MALLOC */
210
211
212/* True if `size1' is non-NULL and PTR is pointing anywhere inside
213   `string1' or just past its end.  This works if PTR is NULL, which is
214   a good thing.  */
215#define FIRST_STRING_P(ptr) 					\
216  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
217
218/* (Re)Allocate N items of type T using malloc, or fail.  */
219#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
220#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
221#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
222
223#define BYTEWIDTH 8 /* In bits.  */
224
225#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
226
227#define MAX(a, b) ((a) > (b) ? (a) : (b))
228#define MIN(a, b) ((a) < (b) ? (a) : (b))
229
230typedef char boolean;
231#define false 0
232#define true 1
233
234/* These are the command codes that appear in compiled regular
235   expressions.  Some opcodes are followed by argument bytes.  A
236   command code can specify any interpretation whatsoever for its
237   arguments.  Zero bytes may appear in the compiled regular expression.
238
239   The value of `exactn' is needed in search.c (search_buffer) in Emacs.
240   So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
241   `exactn' we use here must also be 1.  */
242
243typedef enum
244{
245  no_op = 0,
246
247        /* Followed by one byte giving n, then by n literal bytes.  */
248  exactn = 1,
249
250        /* Matches any (more or less) character.  */
251  anychar,
252
253        /* Matches any one char belonging to specified set.  First
254           following byte is number of bitmap bytes.  Then come bytes
255           for a bitmap saying which chars are in.  Bits in each byte
256           are ordered low-bit-first.  A character is in the set if its
257           bit is 1.  A character too large to have a bit in the map is
258           automatically not in the set.  */
259  charset,
260
261        /* Same parameters as charset, but match any character that is
262           not one of those specified.  */
263  charset_not,
264
265        /* Start remembering the text that is matched, for storing in a
266           register.  Followed by one byte with the register number, in
267           the range 0 to one less than the pattern buffer's re_nsub
268           field.  Then followed by one byte with the number of groups
269           inner to this one.  (This last has to be part of the
270           start_memory only because we need it in the on_failure_jump
271           of re_match_2.)  */
272  start_memory,
273
274        /* Stop remembering the text that is matched and store it in a
275           memory register.  Followed by one byte with the register
276           number, in the range 0 to one less than `re_nsub' in the
277           pattern buffer, and one byte with the number of inner groups,
278           just like `start_memory'.  (We need the number of inner
279           groups here because we don't have any easy way of finding the
280           corresponding start_memory when we're at a stop_memory.)  */
281  stop_memory,
282
283        /* Match a duplicate of something remembered. Followed by one
284           byte containing the register number.  */
285  duplicate,
286
287        /* Fail unless at beginning of line.  */
288  begline,
289
290        /* Fail unless at end of line.  */
291  endline,
292
293        /* Succeeds if at beginning of buffer (if emacs) or at beginning
294           of string to be matched (if not).  */
295  begbuf,
296
297        /* Analogously, for end of buffer/string.  */
298  endbuf,
299
300        /* Followed by two byte relative address to which to jump.  */
301  jump,
302
303	/* Same as jump, but marks the end of an alternative.  */
304  jump_past_alt,
305
306        /* Followed by two-byte relative address of place to resume at
307           in case of failure.  */
308  on_failure_jump,
309
310        /* Like on_failure_jump, but pushes a placeholder instead of the
311           current string position when executed.  */
312  on_failure_keep_string_jump,
313
314        /* Throw away latest failure point and then jump to following
315           two-byte relative address.  */
316  pop_failure_jump,
317
318        /* Change to pop_failure_jump if know won't have to backtrack to
319           match; otherwise change to jump.  This is used to jump
320           back to the beginning of a repeat.  If what follows this jump
321           clearly won't match what the repeat does, such that we can be
322           sure that there is no use backtracking out of repetitions
323           already matched, then we change it to a pop_failure_jump.
324           Followed by two-byte address.  */
325  maybe_pop_jump,
326
327        /* Jump to following two-byte address, and push a dummy failure
328           point. This failure point will be thrown away if an attempt
329           is made to use it for a failure.  A `+' construct makes this
330           before the first repeat.  Also used as an intermediary kind
331           of jump when compiling an alternative.  */
332  dummy_failure_jump,
333
334	/* Push a dummy failure point and continue.  Used at the end of
335	   alternatives.  */
336  push_dummy_failure,
337
338        /* Followed by two-byte relative address and two-byte number n.
339           After matching N times, jump to the address upon failure.  */
340  succeed_n,
341
342        /* Followed by two-byte relative address, and two-byte number n.
343           Jump to the address N times, then fail.  */
344  jump_n,
345
346        /* Set the following two-byte relative address to the
347           subsequent two-byte number.  The address *includes* the two
348           bytes of number.  */
349  set_number_at,
350
351  wordchar,	/* Matches any word-constituent character.  */
352  notwordchar,	/* Matches any char that is not a word-constituent.  */
353
354  wordbeg,	/* Succeeds if at word beginning.  */
355  wordend,	/* Succeeds if at word end.  */
356
357  wordbound,	/* Succeeds if at a word boundary.  */
358  notwordbound	/* Succeeds if not at a word boundary.  */
359
360#ifdef emacs
361  ,before_dot,	/* Succeeds if before point.  */
362  at_dot,	/* Succeeds if at point.  */
363  after_dot,	/* Succeeds if after point.  */
364
365	/* Matches any character whose syntax is specified.  Followed by
366           a byte which contains a syntax code, e.g., Sword.  */
367  syntaxspec,
368
369	/* Matches any character whose syntax is not that specified.  */
370  notsyntaxspec
371#endif /* emacs */
372} re_opcode_t;
373
374/* Common operations on the compiled pattern.  */
375
376/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
377
378#define STORE_NUMBER(destination, number)				\
379  do {									\
380    (destination)[0] = (number) & 0377;					\
381    (destination)[1] = (number) >> 8;					\
382  } while (0)
383
384/* Same as STORE_NUMBER, except increment DESTINATION to
385   the byte after where the number is stored.  Therefore, DESTINATION
386   must be an lvalue.  */
387
388#define STORE_NUMBER_AND_INCR(destination, number)			\
389  do {									\
390    STORE_NUMBER (destination, number);					\
391    (destination) += 2;							\
392  } while (0)
393
394/* Put into DESTINATION a number stored in two contiguous bytes starting
395   at SOURCE.  */
396
397#define EXTRACT_NUMBER(destination, source)				\
398  do {									\
399    (destination) = *(source) & 0377;					\
400    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
401  } while (0)
402
403#ifdef DEBUG
404static void
405extract_number (dest, source)
406    int *dest;
407    unsigned char *source;
408{
409  int temp = SIGN_EXTEND_CHAR (*(source + 1));
410  *dest = *source & 0377;
411  *dest += temp << 8;
412}
413
414#ifndef EXTRACT_MACROS /* To debug the macros.  */
415#undef EXTRACT_NUMBER
416#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
417#endif /* not EXTRACT_MACROS */
418
419#endif /* DEBUG */
420
421/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
422   SOURCE must be an lvalue.  */
423
424#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
425  do {									\
426    EXTRACT_NUMBER (destination, source);				\
427    (source) += 2; 							\
428  } while (0)
429
430#ifdef DEBUG
431static void
432extract_number_and_incr (destination, source)
433    int *destination;
434    unsigned char **source;
435{
436  extract_number (destination, *source);
437  *source += 2;
438}
439
440#ifndef EXTRACT_MACROS
441#undef EXTRACT_NUMBER_AND_INCR
442#define EXTRACT_NUMBER_AND_INCR(dest, src) \
443  extract_number_and_incr (&dest, &src)
444#endif /* not EXTRACT_MACROS */
445
446#endif /* DEBUG */
447
448/* If DEBUG is defined, Regex prints many voluminous messages about what
449   it is doing (if the variable `debug' is nonzero).  If linked with the
450   main program in `iregex.c', you can enter patterns and strings
451   interactively.  And if linked with the main program in `main.c' and
452   the other test files, you can run the already-written tests.  */
453
454#ifdef DEBUG
455
456/* We use standard I/O for debugging.  */
457#include <stdio.h>
458
459/* It is useful to test things that ``must'' be true when debugging.  */
460#include <assert.h>
461
462static int debug = 0;
463
464#define DEBUG_STATEMENT(e) e
465#define DEBUG_PRINT1(x) if (debug) printf (x)
466#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
467#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
468#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
469#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
470  if (debug) print_partial_compiled_pattern (s, e)
471#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
472  if (debug) print_double_string (w, s1, sz1, s2, sz2)
473
474
475extern void printchar ();
476
477/* Print the fastmap in human-readable form.  */
478
479void
480print_fastmap (fastmap)
481    char *fastmap;
482{
483  unsigned was_a_range = 0;
484  unsigned i = 0;
485
486  while (i < (1 << BYTEWIDTH))
487    {
488      if (fastmap[i++])
489	{
490	  was_a_range = 0;
491          printchar (i - 1);
492          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
493            {
494              was_a_range = 1;
495              i++;
496            }
497	  if (was_a_range)
498            {
499              printf ("-");
500              printchar (i - 1);
501            }
502        }
503    }
504  putchar ('\n');
505}
506
507
508/* Print a compiled pattern string in human-readable form, starting at
509   the START pointer into it and ending just before the pointer END.  */
510
511void
512print_partial_compiled_pattern (start, end)
513    unsigned char *start;
514    unsigned char *end;
515{
516  int mcnt, mcnt2;
517  unsigned char *p = start;
518  unsigned char *pend = end;
519
520  if (start == NULL)
521    {
522      printf ("(null)\n");
523      return;
524    }
525
526  /* Loop over pattern commands.  */
527  while (p < pend)
528    {
529      switch ((re_opcode_t) *p++)
530	{
531        case no_op:
532          printf ("/no_op");
533          break;
534
535	case exactn:
536	  mcnt = *p++;
537          printf ("/exactn/%d", mcnt);
538          do
539	    {
540              putchar ('/');
541	      printchar (*p++);
542            }
543          while (--mcnt);
544          break;
545
546	case start_memory:
547          mcnt = *p++;
548          printf ("/start_memory/%d/%d", mcnt, *p++);
549          break;
550
551	case stop_memory:
552          mcnt = *p++;
553	  printf ("/stop_memory/%d/%d", mcnt, *p++);
554          break;
555
556	case duplicate:
557	  printf ("/duplicate/%d", *p++);
558	  break;
559
560	case anychar:
561	  printf ("/anychar");
562	  break;
563
564	case charset:
565        case charset_not:
566          {
567            register int c;
568
569            printf ("/charset%s",
570	            (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
571
572            assert (p + *p < pend);
573
574            for (c = 0; c < *p; c++)
575              {
576                unsigned bit;
577                unsigned char map_byte = p[1 + c];
578
579                putchar ('/');
580
581		for (bit = 0; bit < BYTEWIDTH; bit++)
582                  if (map_byte & (1 << bit))
583                    printchar (c * BYTEWIDTH + bit);
584              }
585	    p += 1 + *p;
586	    break;
587	  }
588
589	case begline:
590	  printf ("/begline");
591          break;
592
593	case endline:
594          printf ("/endline");
595          break;
596
597	case on_failure_jump:
598          extract_number_and_incr (&mcnt, &p);
599  	  printf ("/on_failure_jump/0/%d", mcnt);
600          break;
601
602	case on_failure_keep_string_jump:
603          extract_number_and_incr (&mcnt, &p);
604  	  printf ("/on_failure_keep_string_jump/0/%d", mcnt);
605          break;
606
607	case dummy_failure_jump:
608          extract_number_and_incr (&mcnt, &p);
609  	  printf ("/dummy_failure_jump/0/%d", mcnt);
610          break;
611
612	case push_dummy_failure:
613          printf ("/push_dummy_failure");
614          break;
615
616        case maybe_pop_jump:
617          extract_number_and_incr (&mcnt, &p);
618  	  printf ("/maybe_pop_jump/0/%d", mcnt);
619	  break;
620
621        case pop_failure_jump:
622	  extract_number_and_incr (&mcnt, &p);
623  	  printf ("/pop_failure_jump/0/%d", mcnt);
624	  break;
625
626        case jump_past_alt:
627	  extract_number_and_incr (&mcnt, &p);
628  	  printf ("/jump_past_alt/0/%d", mcnt);
629	  break;
630
631        case jump:
632	  extract_number_and_incr (&mcnt, &p);
633  	  printf ("/jump/0/%d", mcnt);
634	  break;
635
636        case succeed_n:
637          extract_number_and_incr (&mcnt, &p);
638          extract_number_and_incr (&mcnt2, &p);
639 	  printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
640          break;
641
642        case jump_n:
643          extract_number_and_incr (&mcnt, &p);
644          extract_number_and_incr (&mcnt2, &p);
645 	  printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
646          break;
647
648        case set_number_at:
649          extract_number_and_incr (&mcnt, &p);
650          extract_number_and_incr (&mcnt2, &p);
651 	  printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
652          break;
653
654        case wordbound:
655	  printf ("/wordbound");
656	  break;
657
658	case notwordbound:
659	  printf ("/notwordbound");
660          break;
661
662	case wordbeg:
663	  printf ("/wordbeg");
664	  break;
665
666	case wordend:
667	  printf ("/wordend");
668
669#ifdef emacs
670	case before_dot:
671	  printf ("/before_dot");
672          break;
673
674	case at_dot:
675	  printf ("/at_dot");
676          break;
677
678	case after_dot:
679	  printf ("/after_dot");
680          break;
681
682	case syntaxspec:
683          printf ("/syntaxspec");
684	  mcnt = *p++;
685	  printf ("/%d", mcnt);
686          break;
687
688	case notsyntaxspec:
689          printf ("/notsyntaxspec");
690	  mcnt = *p++;
691	  printf ("/%d", mcnt);
692	  break;
693#endif /* emacs */
694
695	case wordchar:
696	  printf ("/wordchar");
697          break;
698
699	case notwordchar:
700	  printf ("/notwordchar");
701          break;
702
703	case begbuf:
704	  printf ("/begbuf");
705          break;
706
707	case endbuf:
708	  printf ("/endbuf");
709          break;
710
711        default:
712          printf ("?%d", *(p-1));
713	}
714    }
715  printf ("/\n");
716}
717
718
719void
720print_compiled_pattern (bufp)
721    struct re_pattern_buffer *bufp;
722{
723  unsigned char *buffer = bufp->buffer;
724
725  print_partial_compiled_pattern (buffer, buffer + bufp->used);
726  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
727
728  if (bufp->fastmap_accurate && bufp->fastmap)
729    {
730      printf ("fastmap: ");
731      print_fastmap (bufp->fastmap);
732    }
733
734  printf ("re_nsub: %d\t", bufp->re_nsub);
735  printf ("regs_alloc: %d\t", bufp->regs_allocated);
736  printf ("can_be_null: %d\t", bufp->can_be_null);
737  printf ("newline_anchor: %d\n", bufp->newline_anchor);
738  printf ("no_sub: %d\t", bufp->no_sub);
739  printf ("not_bol: %d\t", bufp->not_bol);
740  printf ("not_eol: %d\t", bufp->not_eol);
741  printf ("syntax: %d\n", bufp->syntax);
742  /* Perhaps we should print the translate table?  */
743}
744
745
746void
747print_double_string (where, string1, size1, string2, size2)
748    const char *where;
749    const char *string1;
750    const char *string2;
751    int size1;
752    int size2;
753{
754  unsigned this_char;
755
756  if (where == NULL)
757    printf ("(null)");
758  else
759    {
760      if (FIRST_STRING_P (where))
761        {
762          for (this_char = where - string1; this_char < size1; this_char++)
763            printchar (string1[this_char]);
764
765          where = string2;
766        }
767
768      for (this_char = where - string2; this_char < size2; this_char++)
769        printchar (string2[this_char]);
770    }
771}
772
773#else /* not DEBUG */
774
775#undef assert
776#define assert(e)
777
778#define DEBUG_STATEMENT(e)
779#define DEBUG_PRINT1(x)
780#define DEBUG_PRINT2(x1, x2)
781#define DEBUG_PRINT3(x1, x2, x3)
782#define DEBUG_PRINT4(x1, x2, x3, x4)
783#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
784#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
785
786#endif /* not DEBUG */
787
788/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
789   also be assigned to arbitrarily: each pattern buffer stores its own
790   syntax, so it can be changed between regex compilations.  */
791reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
792
793
794/* Specify the precise syntax of regexps for compilation.  This provides
795   for compatibility for various utilities which historically have
796   different, incompatible syntaxes.
797
798   The argument SYNTAX is a bit mask comprised of the various bits
799   defined in regex.h.  We return the old syntax.  */
800
801reg_syntax_t
802re_set_syntax (syntax)
803    reg_syntax_t syntax;
804{
805  reg_syntax_t ret = re_syntax_options;
806
807  re_syntax_options = syntax;
808  return ret;
809}
810
811/* This table gives an error message for each of the error codes listed
812   in regex.h.  Obviously the order here has to be same as there.  */
813
814static const char *re_error_msg[] =
815  { NULL,					/* REG_NOERROR */
816    "No match",					/* REG_NOMATCH */
817    "Invalid regular expression",		/* REG_BADPAT */
818    "Invalid collation character",		/* REG_ECOLLATE */
819    "Invalid character class name",		/* REG_ECTYPE */
820    "Trailing backslash",			/* REG_EESCAPE */
821    "Invalid back reference",			/* REG_ESUBREG */
822    "Unmatched [ or [^",			/* REG_EBRACK */
823    "Unmatched ( or \\(",			/* REG_EPAREN */
824    "Unmatched \\{",				/* REG_EBRACE */
825    "Invalid content of \\{\\}",		/* REG_BADBR */
826    "Invalid range end",			/* REG_ERANGE */
827    "Memory exhausted",				/* REG_ESPACE */
828    "Invalid preceding regular expression",	/* REG_BADRPT */
829    "Premature end of regular expression",	/* REG_EEND */
830    "Regular expression too big",		/* REG_ESIZE */
831    "Unmatched ) or \\)",			/* REG_ERPAREN */
832  };
833
834/* Subroutine declarations and macros for regex_compile.  */
835
836static void store_op1 (), store_op2 ();
837static void insert_op1 (), insert_op2 ();
838static boolean at_begline_loc_p (), at_endline_loc_p ();
839static boolean group_in_compile_stack ();
840static reg_errcode_t compile_range ();
841
842/* Fetch the next character in the uncompiled pattern---translating it
843   if necessary.  Also cast from a signed character in the constant
844   string passed to us by the user to an unsigned char that we can use
845   as an array index (in, e.g., `translate').  */
846#define PATFETCH(c)							\
847  do {if (p == pend) return REG_EEND;					\
848    c = (unsigned char) *p++;						\
849    if (translate) c = translate[c]; 					\
850  } while (0)
851
852/* Fetch the next character in the uncompiled pattern, with no
853   translation.  */
854#define PATFETCH_RAW(c)							\
855  do {if (p == pend) return REG_EEND;					\
856    c = (unsigned char) *p++; 						\
857  } while (0)
858
859/* Go backwards one character in the pattern.  */
860#define PATUNFETCH p--
861
862
863/* If `translate' is non-null, return translate[D], else just D.  We
864   cast the subscript to translate because some data is declared as
865   `char *', to avoid warnings when a string constant is passed.  But
866   when we use a character as a subscript we must make it unsigned.  */
867#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
868
869
870/* Macros for outputting the compiled pattern into `buffer'.  */
871
872/* If the buffer isn't allocated when it comes in, use this.  */
873#define INIT_BUF_SIZE  32
874
875/* Make sure we have at least N more bytes of space in buffer.  */
876#define GET_BUFFER_SPACE(n)						\
877    while (b - bufp->buffer + (n) > bufp->allocated)			\
878      EXTEND_BUFFER ()
879
880/* Make sure we have one more byte of buffer space and then add C to it.  */
881#define BUF_PUSH(c)							\
882  do {									\
883    GET_BUFFER_SPACE (1);						\
884    *b++ = (unsigned char) (c);						\
885  } while (0)
886
887
888/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
889#define BUF_PUSH_2(c1, c2)						\
890  do {									\
891    GET_BUFFER_SPACE (2);						\
892    *b++ = (unsigned char) (c1);					\
893    *b++ = (unsigned char) (c2);					\
894  } while (0)
895
896
897/* As with BUF_PUSH_2, except for three bytes.  */
898#define BUF_PUSH_3(c1, c2, c3)						\
899  do {									\
900    GET_BUFFER_SPACE (3);						\
901    *b++ = (unsigned char) (c1);					\
902    *b++ = (unsigned char) (c2);					\
903    *b++ = (unsigned char) (c3);					\
904  } while (0)
905
906
907/* Store a jump with opcode OP at LOC to location TO.  We store a
908   relative address offset by the three bytes the jump itself occupies.  */
909#define STORE_JUMP(op, loc, to) \
910  store_op1 (op, loc, (to) - (loc) - 3)
911
912/* Likewise, for a two-argument jump.  */
913#define STORE_JUMP2(op, loc, to, arg) \
914  store_op2 (op, loc, (to) - (loc) - 3, arg)
915
916/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
917#define INSERT_JUMP(op, loc, to) \
918  insert_op1 (op, loc, (to) - (loc) - 3, b)
919
920/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
921#define INSERT_JUMP2(op, loc, to, arg) \
922  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
923
924
925/* This is not an arbitrary limit: the arguments which represent offsets
926   into the pattern are two bytes long.  So if 2^16 bytes turns out to
927   be too small, many things would have to change.  */
928#define MAX_BUF_SIZE (1L << 16)
929
930
931/* Extend the buffer by twice its current size via realloc and
932   reset the pointers that pointed into the old block to point to the
933   correct places in the new one.  If extending the buffer results in it
934   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
935#define EXTEND_BUFFER()							\
936  do { 									\
937    unsigned char *old_buffer = bufp->buffer;				\
938    if (bufp->allocated == MAX_BUF_SIZE) 				\
939      return REG_ESIZE;							\
940    bufp->allocated <<= 1;						\
941    if (bufp->allocated > MAX_BUF_SIZE)					\
942      bufp->allocated = MAX_BUF_SIZE; 					\
943    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
944    if (bufp->buffer == NULL)						\
945      return REG_ESPACE;						\
946    /* If the buffer moved, move all the pointers into it.  */		\
947    if (old_buffer != bufp->buffer)					\
948      {									\
949        b = (b - old_buffer) + bufp->buffer;				\
950        begalt = (begalt - old_buffer) + bufp->buffer;			\
951        if (fixup_alt_jump)						\
952          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
953        if (laststart)							\
954          laststart = (laststart - old_buffer) + bufp->buffer;		\
955        if (pending_exact)						\
956          pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
957      }									\
958  } while (0)
959
960
961/* Since we have one byte reserved for the register number argument to
962   {start,stop}_memory, the maximum number of groups we can report
963   things about is what fits in that byte.  */
964#define MAX_REGNUM 255
965
966/* But patterns can have more than `MAX_REGNUM' registers.  We just
967   ignore the excess.  */
968typedef unsigned regnum_t;
969
970
971/* Macros for the compile stack.  */
972
973/* Since offsets can go either forwards or backwards, this type needs to
974   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
975typedef int pattern_offset_t;
976
977typedef struct
978{
979  pattern_offset_t begalt_offset;
980  pattern_offset_t fixup_alt_jump;
981  pattern_offset_t inner_group_offset;
982  pattern_offset_t laststart_offset;
983  regnum_t regnum;
984} compile_stack_elt_t;
985
986
987typedef struct
988{
989  compile_stack_elt_t *stack;
990  unsigned size;
991  unsigned avail;			/* Offset of next open position.  */
992} compile_stack_type;
993
994
995#define INIT_COMPILE_STACK_SIZE 32
996
997#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
998#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
999
1000/* The next available element.  */
1001#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1002
1003
1004/* Set the bit for character C in a list.  */
1005#define SET_LIST_BIT(c)                               \
1006  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1007   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1008
1009
1010/* Get the next unsigned number in the uncompiled pattern.  */
1011#define GET_UNSIGNED_NUMBER(num) 					\
1012  { if (p != pend)							\
1013     {									\
1014       PATFETCH (c); 							\
1015       while (ISDIGIT (c)) 						\
1016         { 								\
1017           if (num < 0)							\
1018              num = 0;							\
1019           num = num * 10 + c - '0'; 					\
1020           if (p == pend) 						\
1021              break; 							\
1022           PATFETCH (c);						\
1023         } 								\
1024       } 								\
1025    }
1026
1027#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1028
1029#define IS_CHAR_CLASS(string)						\
1030   (STREQ (string, "alpha") || STREQ (string, "upper")			\
1031    || STREQ (string, "lower") || STREQ (string, "digit")		\
1032    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1033    || STREQ (string, "space") || STREQ (string, "print")		\
1034    || STREQ (string, "punct") || STREQ (string, "graph")		\
1035    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1036
1037/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1038   Returns one of error codes defined in `regex.h', or zero for success.
1039
1040   Assumes the `allocated' (and perhaps `buffer') and `translate'
1041   fields are set in BUFP on entry.
1042
1043   If it succeeds, results are put in BUFP (if it returns an error, the
1044   contents of BUFP are undefined):
1045     `buffer' is the compiled pattern;
1046     `syntax' is set to SYNTAX;
1047     `used' is set to the length of the compiled pattern;
1048     `fastmap_accurate' is zero;
1049     `re_nsub' is the number of subexpressions in PATTERN;
1050     `not_bol' and `not_eol' are zero;
1051
1052   The `fastmap' and `newline_anchor' fields are neither
1053   examined nor set.  */
1054
1055static reg_errcode_t
1056regex_compile (pattern, size, syntax, bufp)
1057     const char *pattern;
1058     int size;
1059     reg_syntax_t syntax;
1060     struct re_pattern_buffer *bufp;
1061{
1062  /* We fetch characters from PATTERN here.  Even though PATTERN is
1063     `char *' (i.e., signed), we declare these variables as unsigned, so
1064     they can be reliably used as array indices.  */
1065  register unsigned char c, c1;
1066
1067  /* A random tempory spot in PATTERN.  */
1068  const char *p1;
1069
1070  /* Points to the end of the buffer, where we should append.  */
1071  register unsigned char *b;
1072
1073  /* Keeps track of unclosed groups.  */
1074  compile_stack_type compile_stack;
1075
1076  /* Points to the current (ending) position in the pattern.  */
1077  const char *p = pattern;
1078  const char *pend = pattern + size;
1079
1080  /* How to translate the characters in the pattern.  */
1081  char *translate = bufp->translate;
1082
1083  /* Address of the count-byte of the most recently inserted `exactn'
1084     command.  This makes it possible to tell if a new exact-match
1085     character can be added to that command or if the character requires
1086     a new `exactn' command.  */
1087  unsigned char *pending_exact = 0;
1088
1089  /* Address of start of the most recently finished expression.
1090     This tells, e.g., postfix * where to find the start of its
1091     operand.  Reset at the beginning of groups and alternatives.  */
1092  unsigned char *laststart = 0;
1093
1094  /* Address of beginning of regexp, or inside of last group.  */
1095  unsigned char *begalt;
1096
1097  /* Place in the uncompiled pattern (i.e., the {) to
1098     which to go back if the interval is invalid.  */
1099  const char *beg_interval;
1100
1101  /* Address of the place where a forward jump should go to the end of
1102     the containing expression.  Each alternative of an `or' -- except the
1103     last -- ends with a forward jump of this sort.  */
1104  unsigned char *fixup_alt_jump = 0;
1105
1106  /* Counts open-groups as they are encountered.  Remembered for the
1107     matching close-group on the compile stack, so the same register
1108     number is put in the stop_memory as the start_memory.  */
1109  regnum_t regnum = 0;
1110
1111#ifdef DEBUG
1112  DEBUG_PRINT1 ("\nCompiling pattern: ");
1113  if (debug)
1114    {
1115      unsigned debug_count;
1116
1117      for (debug_count = 0; debug_count < size; debug_count++)
1118        printchar (pattern[debug_count]);
1119      putchar ('\n');
1120    }
1121#endif /* DEBUG */
1122
1123  /* Initialize the compile stack.  */
1124  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1125  if (compile_stack.stack == NULL)
1126    return REG_ESPACE;
1127
1128  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1129  compile_stack.avail = 0;
1130
1131  /* Initialize the pattern buffer.  */
1132  bufp->syntax = syntax;
1133  bufp->fastmap_accurate = 0;
1134  bufp->not_bol = bufp->not_eol = 0;
1135
1136  /* Set `used' to zero, so that if we return an error, the pattern
1137     printer (for debugging) will think there's no pattern.  We reset it
1138     at the end.  */
1139  bufp->used = 0;
1140
1141  /* Always count groups, whether or not bufp->no_sub is set.  */
1142  bufp->re_nsub = 0;
1143
1144#if !defined (emacs) && !defined (SYNTAX_TABLE)
1145  /* Initialize the syntax table.  */
1146   init_syntax_once ();
1147#endif
1148
1149  if (bufp->allocated == 0)
1150    {
1151      if (bufp->buffer)
1152	{ /* If zero allocated, but buffer is non-null, try to realloc
1153             enough space.  This loses if buffer's address is bogus, but
1154             that is the user's responsibility.  */
1155          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1156        }
1157      else
1158        { /* Caller did not allocate a buffer.  Do it for them.  */
1159          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1160        }
1161      if (!bufp->buffer) return REG_ESPACE;
1162
1163      bufp->allocated = INIT_BUF_SIZE;
1164    }
1165
1166  begalt = b = bufp->buffer;
1167
1168  /* Loop through the uncompiled pattern until we're at the end.  */
1169  while (p != pend)
1170    {
1171      PATFETCH (c);
1172
1173      switch (c)
1174        {
1175        case '^':
1176          {
1177            if (   /* If at start of pattern, it's an operator.  */
1178                   p == pattern + 1
1179                   /* If context independent, it's an operator.  */
1180                || syntax & RE_CONTEXT_INDEP_ANCHORS
1181                   /* Otherwise, depends on what's come before.  */
1182                || at_begline_loc_p (pattern, p, syntax))
1183              BUF_PUSH (begline);
1184            else
1185              goto normal_char;
1186          }
1187          break;
1188
1189
1190        case '$':
1191          {
1192            if (   /* If at end of pattern, it's an operator.  */
1193                   p == pend
1194                   /* If context independent, it's an operator.  */
1195                || syntax & RE_CONTEXT_INDEP_ANCHORS
1196                   /* Otherwise, depends on what's next.  */
1197                || at_endline_loc_p (p, pend, syntax))
1198               BUF_PUSH (endline);
1199             else
1200               goto normal_char;
1201           }
1202           break;
1203
1204
1205	case '+':
1206        case '?':
1207          if ((syntax & RE_BK_PLUS_QM)
1208              || (syntax & RE_LIMITED_OPS))
1209            goto normal_char;
1210        handle_plus:
1211        case '*':
1212          /* If there is no previous pattern... */
1213          if (!laststart)
1214            {
1215              if (syntax & RE_CONTEXT_INVALID_OPS)
1216                return REG_BADRPT;
1217              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1218                goto normal_char;
1219            }
1220
1221          {
1222            /* Are we optimizing this jump?  */
1223            boolean keep_string_p = false;
1224
1225            /* 1 means zero (many) matches is allowed.  */
1226            char zero_times_ok = 0, many_times_ok = 0;
1227
1228            /* If there is a sequence of repetition chars, collapse it
1229               down to just one (the right one).  We can't combine
1230               interval operators with these because of, e.g., `a{2}*',
1231               which should only match an even number of `a's.  */
1232
1233            for (;;)
1234              {
1235                zero_times_ok |= c != '+';
1236                many_times_ok |= c != '?';
1237
1238                if (p == pend)
1239                  break;
1240
1241                PATFETCH (c);
1242
1243                if (c == '*'
1244                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1245                  ;
1246
1247                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1248                  {
1249                    if (p == pend) return REG_EESCAPE;
1250
1251                    PATFETCH (c1);
1252                    if (!(c1 == '+' || c1 == '?'))
1253                      {
1254                        PATUNFETCH;
1255                        PATUNFETCH;
1256                        break;
1257                      }
1258
1259                    c = c1;
1260                  }
1261                else
1262                  {
1263                    PATUNFETCH;
1264                    break;
1265                  }
1266
1267                /* If we get here, we found another repeat character.  */
1268               }
1269
1270            /* Star, etc. applied to an empty pattern is equivalent
1271               to an empty pattern.  */
1272            if (!laststart)
1273              break;
1274
1275            /* Now we know whether or not zero matches is allowed
1276               and also whether or not two or more matches is allowed.  */
1277            if (many_times_ok)
1278              { /* More than one repetition is allowed, so put in at the
1279                   end a backward relative jump from `b' to before the next
1280                   jump we're going to put in below (which jumps from
1281                   laststart to after this jump).
1282
1283                   But if we are at the `*' in the exact sequence `.*\n',
1284                   insert an unconditional jump backwards to the .,
1285                   instead of the beginning of the loop.  This way we only
1286                   push a failure point once, instead of every time
1287                   through the loop.  */
1288                assert (p - 1 > pattern);
1289
1290                /* Allocate the space for the jump.  */
1291                GET_BUFFER_SPACE (3);
1292
1293                /* We know we are not at the first character of the pattern,
1294                   because laststart was nonzero.  And we've already
1295                   incremented `p', by the way, to be the character after
1296                   the `*'.  Do we have to do something analogous here
1297                   for null bytes, because of RE_DOT_NOT_NULL?  */
1298                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1299		    && zero_times_ok
1300                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1301                    && !(syntax & RE_DOT_NEWLINE))
1302                  { /* We have .*\n.  */
1303                    STORE_JUMP (jump, b, laststart);
1304                    keep_string_p = true;
1305                  }
1306                else
1307                  /* Anything else.  */
1308                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1309
1310                /* We've added more stuff to the buffer.  */
1311                b += 3;
1312              }
1313
1314            /* On failure, jump from laststart to b + 3, which will be the
1315               end of the buffer after this jump is inserted.  */
1316            GET_BUFFER_SPACE (3);
1317            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1318                                       : on_failure_jump,
1319                         laststart, b + 3);
1320            pending_exact = 0;
1321            b += 3;
1322
1323            if (!zero_times_ok)
1324              {
1325                /* At least one repetition is required, so insert a
1326                   `dummy_failure_jump' before the initial
1327                   `on_failure_jump' instruction of the loop. This
1328                   effects a skip over that instruction the first time
1329                   we hit that loop.  */
1330                GET_BUFFER_SPACE (3);
1331                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1332                b += 3;
1333              }
1334            }
1335	  break;
1336
1337
1338	case '.':
1339          laststart = b;
1340          BUF_PUSH (anychar);
1341          break;
1342
1343
1344        case '[':
1345          {
1346            boolean had_char_class = false;
1347
1348            if (p == pend) return REG_EBRACK;
1349
1350            /* Ensure that we have enough space to push a charset: the
1351               opcode, the length count, and the bitset; 34 bytes in all.  */
1352	    GET_BUFFER_SPACE (34);
1353
1354            laststart = b;
1355
1356            /* We test `*p == '^' twice, instead of using an if
1357               statement, so we only need one BUF_PUSH.  */
1358            BUF_PUSH (*p == '^' ? charset_not : charset);
1359            if (*p == '^')
1360              p++;
1361
1362            /* Remember the first position in the bracket expression.  */
1363            p1 = p;
1364
1365            /* Push the number of bytes in the bitmap.  */
1366            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1367
1368            /* Clear the whole map.  */
1369            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1370
1371            /* charset_not matches newline according to a syntax bit.  */
1372            if ((re_opcode_t) b[-2] == charset_not
1373                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1374              SET_LIST_BIT ('\n');
1375
1376            /* Read in characters and ranges, setting map bits.  */
1377            for (;;)
1378              {
1379                if (p == pend) return REG_EBRACK;
1380
1381                PATFETCH (c);
1382
1383                /* \ might escape characters inside [...] and [^...].  */
1384                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1385                  {
1386                    if (p == pend) return REG_EESCAPE;
1387
1388                    PATFETCH (c1);
1389                    SET_LIST_BIT (c1);
1390                    continue;
1391                  }
1392
1393                /* Could be the end of the bracket expression.  If it's
1394                   not (i.e., when the bracket expression is `[]' so
1395                   far), the ']' character bit gets set way below.  */
1396                if (c == ']' && p != p1 + 1)
1397                  break;
1398
1399                /* Look ahead to see if it's a range when the last thing
1400                   was a character class.  */
1401                if (had_char_class && c == '-' && *p != ']')
1402                  return REG_ERANGE;
1403
1404                /* Look ahead to see if it's a range when the last thing
1405                   was a character: if this is a hyphen not at the
1406                   beginning or the end of a list, then it's the range
1407                   operator.  */
1408                if (c == '-'
1409                    && !(p - 2 >= pattern && p[-2] == '[')
1410                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1411                    && *p != ']')
1412                  {
1413                    reg_errcode_t ret
1414                      = compile_range (&p, pend, translate, syntax, b);
1415                    if (ret != REG_NOERROR) return ret;
1416                  }
1417
1418                else if (p[0] == '-' && p[1] != ']')
1419                  { /* This handles ranges made up of characters only.  */
1420                    reg_errcode_t ret;
1421
1422		    /* Move past the `-'.  */
1423                    PATFETCH (c1);
1424
1425                    ret = compile_range (&p, pend, translate, syntax, b);
1426                    if (ret != REG_NOERROR) return ret;
1427                  }
1428
1429                /* See if we're at the beginning of a possible character
1430                   class.  */
1431
1432                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1433                  { /* Leave room for the null.  */
1434                    char str[CHAR_CLASS_MAX_LENGTH + 1];
1435
1436                    PATFETCH (c);
1437                    c1 = 0;
1438
1439                    /* If pattern is `[[:'.  */
1440                    if (p == pend) return REG_EBRACK;
1441
1442                    for (;;)
1443                      {
1444                        PATFETCH (c);
1445                        if (c == ':' || c == ']' || p == pend
1446                            || c1 == CHAR_CLASS_MAX_LENGTH)
1447                          break;
1448                        str[c1++] = c;
1449                      }
1450                    str[c1] = '\0';
1451
1452                    /* If isn't a word bracketed by `[:' and:`]':
1453                       undo the ending character, the letters, and leave
1454                       the leading `:' and `[' (but set bits for them).  */
1455                    if (c == ':' && *p == ']')
1456                      {
1457                        int ch;
1458                        boolean is_alnum = STREQ (str, "alnum");
1459                        boolean is_alpha = STREQ (str, "alpha");
1460                        boolean is_blank = STREQ (str, "blank");
1461                        boolean is_cntrl = STREQ (str, "cntrl");
1462                        boolean is_digit = STREQ (str, "digit");
1463                        boolean is_graph = STREQ (str, "graph");
1464                        boolean is_lower = STREQ (str, "lower");
1465                        boolean is_print = STREQ (str, "print");
1466                        boolean is_punct = STREQ (str, "punct");
1467                        boolean is_space = STREQ (str, "space");
1468                        boolean is_upper = STREQ (str, "upper");
1469                        boolean is_xdigit = STREQ (str, "xdigit");
1470
1471                        if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1472
1473                        /* Throw away the ] at the end of the character
1474                           class.  */
1475                        PATFETCH (c);
1476
1477                        if (p == pend) return REG_EBRACK;
1478
1479                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1480                          {
1481                            if (   (is_alnum  && ISALNUM (ch))
1482                                || (is_alpha  && ISALPHA (ch))
1483                                || (is_blank  && ISBLANK (ch))
1484                                || (is_cntrl  && ISCNTRL (ch))
1485                                || (is_digit  && ISDIGIT (ch))
1486                                || (is_graph  && ISGRAPH (ch))
1487                                || (is_lower  && ISLOWER (ch))
1488                                || (is_print  && ISPRINT (ch))
1489                                || (is_punct  && ISPUNCT (ch))
1490                                || (is_space  && ISSPACE (ch))
1491                                || (is_upper  && ISUPPER (ch))
1492                                || (is_xdigit && ISXDIGIT (ch)))
1493                            SET_LIST_BIT (ch);
1494                          }
1495                        had_char_class = true;
1496                      }
1497                    else
1498                      {
1499                        c1++;
1500                        while (c1--)
1501                          PATUNFETCH;
1502                        SET_LIST_BIT ('[');
1503                        SET_LIST_BIT (':');
1504                        had_char_class = false;
1505                      }
1506                  }
1507                else
1508                  {
1509                    had_char_class = false;
1510                    SET_LIST_BIT (c);
1511                  }
1512              }
1513
1514            /* Discard any (non)matching list bytes that are all 0 at the
1515               end of the map.  Decrease the map-length byte too.  */
1516            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1517              b[-1]--;
1518            b += b[-1];
1519          }
1520          break;
1521
1522
1523	case '(':
1524          if (syntax & RE_NO_BK_PARENS)
1525            goto handle_open;
1526          else
1527            goto normal_char;
1528
1529
1530        case ')':
1531          if (syntax & RE_NO_BK_PARENS)
1532            goto handle_close;
1533          else
1534            goto normal_char;
1535
1536
1537        case '\n':
1538          if (syntax & RE_NEWLINE_ALT)
1539            goto handle_alt;
1540          else
1541            goto normal_char;
1542
1543
1544	case '|':
1545          if (syntax & RE_NO_BK_VBAR)
1546            goto handle_alt;
1547          else
1548            goto normal_char;
1549
1550
1551        case '{':
1552           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1553             goto handle_interval;
1554           else
1555             goto normal_char;
1556
1557
1558        case '\\':
1559          if (p == pend) return REG_EESCAPE;
1560
1561          /* Do not translate the character after the \, so that we can
1562             distinguish, e.g., \B from \b, even if we normally would
1563             translate, e.g., B to b.  */
1564          PATFETCH_RAW (c);
1565
1566          switch (c)
1567            {
1568            case '(':
1569              if (syntax & RE_NO_BK_PARENS)
1570                goto normal_backslash;
1571
1572            handle_open:
1573              bufp->re_nsub++;
1574              regnum++;
1575
1576              if (COMPILE_STACK_FULL)
1577                {
1578                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
1579                            compile_stack_elt_t);
1580                  if (compile_stack.stack == NULL) return REG_ESPACE;
1581
1582                  compile_stack.size <<= 1;
1583                }
1584
1585              /* These are the values to restore when we hit end of this
1586                 group.  They are all relative offsets, so that if the
1587                 whole pattern moves because of realloc, they will still
1588                 be valid.  */
1589              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1590              COMPILE_STACK_TOP.fixup_alt_jump
1591                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1592              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1593              COMPILE_STACK_TOP.regnum = regnum;
1594
1595              /* We will eventually replace the 0 with the number of
1596                 groups inner to this one.  But do not push a
1597                 start_memory for groups beyond the last one we can
1598                 represent in the compiled pattern.  */
1599              if (regnum <= MAX_REGNUM)
1600                {
1601                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1602                  BUF_PUSH_3 (start_memory, regnum, 0);
1603                }
1604
1605              compile_stack.avail++;
1606
1607              fixup_alt_jump = 0;
1608              laststart = 0;
1609              begalt = b;
1610	      /* If we've reached MAX_REGNUM groups, then this open
1611		 won't actually generate any code, so we'll have to
1612		 clear pending_exact explicitly.  */
1613	      pending_exact = 0;
1614              break;
1615
1616
1617            case ')':
1618              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1619
1620              if (COMPILE_STACK_EMPTY)
1621                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1622                  goto normal_backslash;
1623                else
1624                  return REG_ERPAREN;
1625
1626            handle_close:
1627              if (fixup_alt_jump)
1628                { /* Push a dummy failure point at the end of the
1629                     alternative for a possible future
1630                     `pop_failure_jump' to pop.  See comments at
1631                     `push_dummy_failure' in `re_match_2'.  */
1632                  BUF_PUSH (push_dummy_failure);
1633
1634                  /* We allocated space for this jump when we assigned
1635                     to `fixup_alt_jump', in the `handle_alt' case below.  */
1636                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1637                }
1638
1639              /* See similar code for backslashed left paren above.  */
1640              if (COMPILE_STACK_EMPTY)
1641                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1642                  goto normal_char;
1643                else
1644                  return REG_ERPAREN;
1645
1646              /* Since we just checked for an empty stack above, this
1647                 ``can't happen''.  */
1648              assert (compile_stack.avail != 0);
1649              {
1650                /* We don't just want to restore into `regnum', because
1651                   later groups should continue to be numbered higher,
1652                   as in `(ab)c(de)' -- the second group is #2.  */
1653                regnum_t this_group_regnum;
1654
1655                compile_stack.avail--;
1656                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1657                fixup_alt_jump
1658                  = COMPILE_STACK_TOP.fixup_alt_jump
1659                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1660                    : 0;
1661                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1662                this_group_regnum = COMPILE_STACK_TOP.regnum;
1663		/* If we've reached MAX_REGNUM groups, then this open
1664		   won't actually generate any code, so we'll have to
1665		   clear pending_exact explicitly.  */
1666		pending_exact = 0;
1667
1668                /* We're at the end of the group, so now we know how many
1669                   groups were inside this one.  */
1670                if (this_group_regnum <= MAX_REGNUM)
1671                  {
1672                    unsigned char *inner_group_loc
1673                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1674
1675                    *inner_group_loc = regnum - this_group_regnum;
1676                    BUF_PUSH_3 (stop_memory, this_group_regnum,
1677                                regnum - this_group_regnum);
1678                  }
1679              }
1680              break;
1681
1682
1683            case '|':					/* `\|'.  */
1684              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1685                goto normal_backslash;
1686            handle_alt:
1687              if (syntax & RE_LIMITED_OPS)
1688                goto normal_char;
1689
1690              /* Insert before the previous alternative a jump which
1691                 jumps to this alternative if the former fails.  */
1692              GET_BUFFER_SPACE (3);
1693              INSERT_JUMP (on_failure_jump, begalt, b + 6);
1694              pending_exact = 0;
1695              b += 3;
1696
1697              /* The alternative before this one has a jump after it
1698                 which gets executed if it gets matched.  Adjust that
1699                 jump so it will jump to this alternative's analogous
1700                 jump (put in below, which in turn will jump to the next
1701                 (if any) alternative's such jump, etc.).  The last such
1702                 jump jumps to the correct final destination.  A picture:
1703                          _____ _____
1704                          |   | |   |
1705                          |   v |   v
1706                         a | b   | c
1707
1708                 If we are at `b', then fixup_alt_jump right now points to a
1709                 three-byte space after `a'.  We'll put in the jump, set
1710                 fixup_alt_jump to right after `b', and leave behind three
1711                 bytes which we'll fill in when we get to after `c'.  */
1712
1713              if (fixup_alt_jump)
1714                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1715
1716              /* Mark and leave space for a jump after this alternative,
1717                 to be filled in later either by next alternative or
1718                 when know we're at the end of a series of alternatives.  */
1719              fixup_alt_jump = b;
1720              GET_BUFFER_SPACE (3);
1721              b += 3;
1722
1723              laststart = 0;
1724              begalt = b;
1725              break;
1726
1727
1728            case '{':
1729              /* If \{ is a literal.  */
1730              if (!(syntax & RE_INTERVALS)
1731                     /* If we're at `\{' and it's not the open-interval
1732                        operator.  */
1733                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1734                  || (p - 2 == pattern  &&  p == pend))
1735                goto normal_backslash;
1736
1737            handle_interval:
1738              {
1739                /* If got here, then the syntax allows intervals.  */
1740
1741                /* At least (most) this many matches must be made.  */
1742                int lower_bound = -1, upper_bound = -1;
1743
1744                beg_interval = p - 1;
1745
1746                if (p == pend)
1747                  {
1748                    if (syntax & RE_NO_BK_BRACES)
1749                      goto unfetch_interval;
1750                    else
1751                      return REG_EBRACE;
1752                  }
1753
1754                GET_UNSIGNED_NUMBER (lower_bound);
1755
1756                if (c == ',')
1757                  {
1758                    GET_UNSIGNED_NUMBER (upper_bound);
1759                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1760                  }
1761                else
1762                  /* Interval such as `{1}' => match exactly once. */
1763                  upper_bound = lower_bound;
1764
1765                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1766                    || lower_bound > upper_bound)
1767                  {
1768                    if (syntax & RE_NO_BK_BRACES)
1769                      goto unfetch_interval;
1770                    else
1771                      return REG_BADBR;
1772                  }
1773
1774                if (!(syntax & RE_NO_BK_BRACES))
1775                  {
1776                    if (c != '\\') return REG_EBRACE;
1777
1778                    PATFETCH (c);
1779                  }
1780
1781                if (c != '}')
1782                  {
1783                    if (syntax & RE_NO_BK_BRACES)
1784                      goto unfetch_interval;
1785                    else
1786                      return REG_BADBR;
1787                  }
1788
1789                /* We just parsed a valid interval.  */
1790
1791                /* If it's invalid to have no preceding re.  */
1792                if (!laststart)
1793                  {
1794                    if (syntax & RE_CONTEXT_INVALID_OPS)
1795                      return REG_BADRPT;
1796                    else if (syntax & RE_CONTEXT_INDEP_OPS)
1797                      laststart = b;
1798                    else
1799                      goto unfetch_interval;
1800                  }
1801
1802                /* If the upper bound is zero, don't want to succeed at
1803                   all; jump from `laststart' to `b + 3', which will be
1804                   the end of the buffer after we insert the jump.  */
1805                 if (upper_bound == 0)
1806                   {
1807                     GET_BUFFER_SPACE (3);
1808                     INSERT_JUMP (jump, laststart, b + 3);
1809                     b += 3;
1810                   }
1811
1812                 /* Otherwise, we have a nontrivial interval.  When
1813                    we're all done, the pattern will look like:
1814                      set_number_at <jump count> <upper bound>
1815                      set_number_at <succeed_n count> <lower bound>
1816                      succeed_n <after jump addr> <succed_n count>
1817                      <body of loop>
1818                      jump_n <succeed_n addr> <jump count>
1819                    (The upper bound and `jump_n' are omitted if
1820                    `upper_bound' is 1, though.)  */
1821                 else
1822                   { /* If the upper bound is > 1, we need to insert
1823                        more at the end of the loop.  */
1824                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
1825
1826                     GET_BUFFER_SPACE (nbytes);
1827
1828                     /* Initialize lower bound of the `succeed_n', even
1829                        though it will be set during matching by its
1830                        attendant `set_number_at' (inserted next),
1831                        because `re_compile_fastmap' needs to know.
1832                        Jump to the `jump_n' we might insert below.  */
1833                     INSERT_JUMP2 (succeed_n, laststart,
1834                                   b + 5 + (upper_bound > 1) * 5,
1835                                   lower_bound);
1836                     b += 5;
1837
1838                     /* Code to initialize the lower bound.  Insert
1839                        before the `succeed_n'.  The `5' is the last two
1840                        bytes of this `set_number_at', plus 3 bytes of
1841                        the following `succeed_n'.  */
1842                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1843                     b += 5;
1844
1845                     if (upper_bound > 1)
1846                       { /* More than one repetition is allowed, so
1847                            append a backward jump to the `succeed_n'
1848                            that starts this interval.
1849
1850                            When we've reached this during matching,
1851                            we'll have matched the interval once, so
1852                            jump back only `upper_bound - 1' times.  */
1853                         STORE_JUMP2 (jump_n, b, laststart + 5,
1854                                      upper_bound - 1);
1855                         b += 5;
1856
1857                         /* The location we want to set is the second
1858                            parameter of the `jump_n'; that is `b-2' as
1859                            an absolute address.  `laststart' will be
1860                            the `set_number_at' we're about to insert;
1861                            `laststart+3' the number to set, the source
1862                            for the relative address.  But we are
1863                            inserting into the middle of the pattern --
1864                            so everything is getting moved up by 5.
1865                            Conclusion: (b - 2) - (laststart + 3) + 5,
1866                            i.e., b - laststart.
1867
1868                            We insert this at the beginning of the loop
1869                            so that if we fail during matching, we'll
1870                            reinitialize the bounds.  */
1871                         insert_op2 (set_number_at, laststart, b - laststart,
1872                                     upper_bound - 1, b);
1873                         b += 5;
1874                       }
1875                   }
1876                pending_exact = 0;
1877                beg_interval = NULL;
1878              }
1879              break;
1880
1881            unfetch_interval:
1882              /* If an invalid interval, match the characters as literals.  */
1883               assert (beg_interval);
1884               p = beg_interval;
1885               beg_interval = NULL;
1886
1887               /* normal_char and normal_backslash need `c'.  */
1888               PATFETCH (c);
1889
1890               if (!(syntax & RE_NO_BK_BRACES))
1891                 {
1892                   if (p > pattern  &&  p[-1] == '\\')
1893                     goto normal_backslash;
1894                 }
1895               goto normal_char;
1896
1897#ifdef emacs
1898            /* There is no way to specify the before_dot and after_dot
1899               operators.  rms says this is ok.  --karl  */
1900            case '=':
1901              BUF_PUSH (at_dot);
1902              break;
1903
1904            case 's':
1905              laststart = b;
1906              PATFETCH (c);
1907              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1908              break;
1909
1910            case 'S':
1911              laststart = b;
1912              PATFETCH (c);
1913              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1914              break;
1915#endif /* emacs */
1916
1917
1918            case 'w':
1919              laststart = b;
1920              BUF_PUSH (wordchar);
1921              break;
1922
1923
1924            case 'W':
1925              laststart = b;
1926              BUF_PUSH (notwordchar);
1927              break;
1928
1929
1930            case '<':
1931              BUF_PUSH (wordbeg);
1932              break;
1933
1934            case '>':
1935              BUF_PUSH (wordend);
1936              break;
1937
1938            case 'b':
1939              BUF_PUSH (wordbound);
1940              break;
1941
1942            case 'B':
1943              BUF_PUSH (notwordbound);
1944              break;
1945
1946            case '`':
1947              BUF_PUSH (begbuf);
1948              break;
1949
1950            case '\'':
1951              BUF_PUSH (endbuf);
1952              break;
1953
1954            case '1': case '2': case '3': case '4': case '5':
1955            case '6': case '7': case '8': case '9':
1956              if (syntax & RE_NO_BK_REFS)
1957                goto normal_char;
1958
1959              c1 = c - '0';
1960
1961              if (c1 > regnum)
1962                return REG_ESUBREG;
1963
1964              /* Can't back reference to a subexpression if inside of it.  */
1965              if (group_in_compile_stack (compile_stack, c1))
1966                goto normal_char;
1967
1968              laststart = b;
1969              BUF_PUSH_2 (duplicate, c1);
1970              break;
1971
1972
1973            case '+':
1974            case '?':
1975              if (syntax & RE_BK_PLUS_QM)
1976                goto handle_plus;
1977              else
1978                goto normal_backslash;
1979
1980            default:
1981            normal_backslash:
1982              /* You might think it would be useful for \ to mean
1983                 not to translate; but if we don't translate it
1984                 it will never match anything.  */
1985              c = TRANSLATE (c);
1986              goto normal_char;
1987            }
1988          break;
1989
1990
1991	default:
1992        /* Expects the character in `c'.  */
1993	normal_char:
1994	      /* If no exactn currently being built.  */
1995          if (!pending_exact
1996
1997              /* If last exactn not at current position.  */
1998              || pending_exact + *pending_exact + 1 != b
1999
2000              /* We have only one byte following the exactn for the count.  */
2001	      || *pending_exact == (1 << BYTEWIDTH) - 1
2002
2003              /* If followed by a repetition operator.  */
2004              || *p == '*' || *p == '^'
2005	      || ((syntax & RE_BK_PLUS_QM)
2006		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2007		  : (*p == '+' || *p == '?'))
2008	      || ((syntax & RE_INTERVALS)
2009                  && ((syntax & RE_NO_BK_BRACES)
2010		      ? *p == '{'
2011                      : (p[0] == '\\' && p[1] == '{'))))
2012	    {
2013	      /* Start building a new exactn.  */
2014
2015              laststart = b;
2016
2017	      BUF_PUSH_2 (exactn, 0);
2018	      pending_exact = b - 1;
2019            }
2020
2021	  BUF_PUSH (c);
2022          (*pending_exact)++;
2023	  break;
2024        } /* switch (c) */
2025    } /* while p != pend */
2026
2027
2028  /* Through the pattern now.  */
2029
2030  if (fixup_alt_jump)
2031    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2032
2033  if (!COMPILE_STACK_EMPTY)
2034    return REG_EPAREN;
2035
2036  free (compile_stack.stack);
2037
2038  /* We have succeeded; set the length of the buffer.  */
2039  bufp->used = b - bufp->buffer;
2040
2041#ifdef DEBUG
2042  if (debug)
2043    {
2044      DEBUG_PRINT1 ("\nCompiled pattern: ");
2045      print_compiled_pattern (bufp);
2046    }
2047#endif /* DEBUG */
2048
2049  return REG_NOERROR;
2050} /* regex_compile */
2051
2052/* Subroutines for `regex_compile'.  */
2053
2054/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2055
2056static void
2057store_op1 (op, loc, arg)
2058    re_opcode_t op;
2059    unsigned char *loc;
2060    int arg;
2061{
2062  *loc = (unsigned char) op;
2063  STORE_NUMBER (loc + 1, arg);
2064}
2065
2066
2067/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2068
2069static void
2070store_op2 (op, loc, arg1, arg2)
2071    re_opcode_t op;
2072    unsigned char *loc;
2073    int arg1, arg2;
2074{
2075  *loc = (unsigned char) op;
2076  STORE_NUMBER (loc + 1, arg1);
2077  STORE_NUMBER (loc + 3, arg2);
2078}
2079
2080
2081/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2082   for OP followed by two-byte integer parameter ARG.  */
2083
2084static void
2085insert_op1 (op, loc, arg, end)
2086    re_opcode_t op;
2087    unsigned char *loc;
2088    int arg;
2089    unsigned char *end;
2090{
2091  register unsigned char *pfrom = end;
2092  register unsigned char *pto = end + 3;
2093
2094  while (pfrom != loc)
2095    *--pto = *--pfrom;
2096
2097  store_op1 (op, loc, arg);
2098}
2099
2100
2101/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2102
2103static void
2104insert_op2 (op, loc, arg1, arg2, end)
2105    re_opcode_t op;
2106    unsigned char *loc;
2107    int arg1, arg2;
2108    unsigned char *end;
2109{
2110  register unsigned char *pfrom = end;
2111  register unsigned char *pto = end + 5;
2112
2113  while (pfrom != loc)
2114    *--pto = *--pfrom;
2115
2116  store_op2 (op, loc, arg1, arg2);
2117}
2118
2119
2120/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2121   after an alternative or a begin-subexpression.  We assume there is at
2122   least one character before the ^.  */
2123
2124static boolean
2125at_begline_loc_p (pattern, p, syntax)
2126    const char *pattern, *p;
2127    reg_syntax_t syntax;
2128{
2129  const char *prev = p - 2;
2130  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2131
2132  return
2133       /* After a subexpression?  */
2134       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2135       /* After an alternative?  */
2136    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2137}
2138
2139
2140/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2141   at least one character after the $, i.e., `P < PEND'.  */
2142
2143static boolean
2144at_endline_loc_p (p, pend, syntax)
2145    const char *p, *pend;
2146    int syntax;
2147{
2148  const char *next = p;
2149  boolean next_backslash = *next == '\\';
2150  const char *next_next = p + 1 < pend ? p + 1 : NULL;
2151
2152  return
2153       /* Before a subexpression?  */
2154       (syntax & RE_NO_BK_PARENS ? *next == ')'
2155        : next_backslash && next_next && *next_next == ')')
2156       /* Before an alternative?  */
2157    || (syntax & RE_NO_BK_VBAR ? *next == '|'
2158        : next_backslash && next_next && *next_next == '|');
2159}
2160
2161
2162/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2163   false if it's not.  */
2164
2165static boolean
2166group_in_compile_stack (compile_stack, regnum)
2167    compile_stack_type compile_stack;
2168    regnum_t regnum;
2169{
2170  int this_element;
2171
2172  for (this_element = compile_stack.avail - 1;
2173       this_element >= 0;
2174       this_element--)
2175    if (compile_stack.stack[this_element].regnum == regnum)
2176      return true;
2177
2178  return false;
2179}
2180
2181
2182/* Read the ending character of a range (in a bracket expression) from the
2183   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2184   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2185   Then we set the translation of all bits between the starting and
2186   ending characters (inclusive) in the compiled pattern B.
2187
2188   Return an error code.
2189
2190   We use these short variable names so we can use the same macros as
2191   `regex_compile' itself.  */
2192
2193static reg_errcode_t
2194compile_range (p_ptr, pend, translate, syntax, b)
2195    const char **p_ptr, *pend;
2196    char *translate;
2197    reg_syntax_t syntax;
2198    unsigned char *b;
2199{
2200  unsigned this_char;
2201
2202  const char *p = *p_ptr;
2203  int range_start, range_end;
2204
2205  if (p == pend)
2206    return REG_ERANGE;
2207
2208  /* Even though the pattern is a signed `char *', we need to fetch
2209     with unsigned char *'s; if the high bit of the pattern character
2210     is set, the range endpoints will be negative if we fetch using a
2211     signed char *.
2212
2213     We also want to fetch the endpoints without translating them; the
2214     appropriate translation is done in the bit-setting loop below.  */
2215  range_start = ((unsigned char *) p)[-2];
2216  range_end   = ((unsigned char *) p)[0];
2217
2218  /* Have to increment the pointer into the pattern string, so the
2219     caller isn't still at the ending character.  */
2220  (*p_ptr)++;
2221
2222  /* If the start is after the end, the range is empty.  */
2223  if (range_start > range_end)
2224    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2225
2226  /* Here we see why `this_char' has to be larger than an `unsigned
2227     char' -- the range is inclusive, so if `range_end' == 0xff
2228     (assuming 8-bit characters), we would otherwise go into an infinite
2229     loop, since all characters <= 0xff.  */
2230  for (this_char = range_start; this_char <= range_end; this_char++)
2231    {
2232      SET_LIST_BIT (TRANSLATE (this_char));
2233    }
2234
2235  return REG_NOERROR;
2236}
2237
2238/* Failure stack declarations and macros; both re_compile_fastmap and
2239   re_match_2 use a failure stack.  These have to be macros because of
2240   REGEX_ALLOCATE.  */
2241
2242
2243/* Number of failure points for which to initially allocate space
2244   when matching.  If this number is exceeded, we allocate more
2245   space, so it is not a hard limit.  */
2246#ifndef INIT_FAILURE_ALLOC
2247#define INIT_FAILURE_ALLOC 5
2248#endif
2249
2250/* Roughly the maximum number of failure points on the stack.  Would be
2251   exactly that if always used MAX_FAILURE_SPACE each time we failed.
2252   This is a variable only so users of regex can assign to it; we never
2253   change it ourselves.  */
2254int re_max_failures = 2000;
2255
2256typedef const unsigned char *fail_stack_elt_t;
2257
2258typedef struct
2259{
2260  fail_stack_elt_t *stack;
2261  unsigned size;
2262  unsigned avail;			/* Offset of next open position.  */
2263} fail_stack_type;
2264
2265#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
2266#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2267#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
2268#define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
2269
2270
2271/* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2272
2273#define INIT_FAIL_STACK()						\
2274  do {									\
2275    fail_stack.stack = (fail_stack_elt_t *)				\
2276      REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
2277									\
2278    if (fail_stack.stack == NULL)					\
2279      return -2;							\
2280									\
2281    fail_stack.size = INIT_FAILURE_ALLOC;				\
2282    fail_stack.avail = 0;						\
2283  } while (0)
2284
2285
2286/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2287
2288   Return 1 if succeeds, and 0 if either ran out of memory
2289   allocating space for it or it was already too large.
2290
2291   REGEX_REALLOCATE requires `destination' be declared.   */
2292
2293#define DOUBLE_FAIL_STACK(fail_stack)					\
2294  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
2295   ? 0									\
2296   : ((fail_stack).stack = (fail_stack_elt_t *)				\
2297        REGEX_REALLOCATE ((fail_stack).stack, 				\
2298          (fail_stack).size * sizeof (fail_stack_elt_t),		\
2299          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
2300									\
2301      (fail_stack).stack == NULL					\
2302      ? 0								\
2303      : ((fail_stack).size <<= 1, 					\
2304         1)))
2305
2306
2307/* Push PATTERN_OP on FAIL_STACK.
2308
2309   Return 1 if was able to do so and 0 if ran out of memory allocating
2310   space to do so.  */
2311#define PUSH_PATTERN_OP(pattern_op, fail_stack)				\
2312  ((FAIL_STACK_FULL ()							\
2313    && !DOUBLE_FAIL_STACK (fail_stack))					\
2314    ? 0									\
2315    : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,		\
2316       1))
2317
2318/* This pushes an item onto the failure stack.  Must be a four-byte
2319   value.  Assumes the variable `fail_stack'.  Probably should only
2320   be called from within `PUSH_FAILURE_POINT'.  */
2321#define PUSH_FAILURE_ITEM(item)						\
2322  fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2323
2324/* The complement operation.  Assumes `fail_stack' is nonempty.  */
2325#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2326
2327/* Used to omit pushing failure point id's when we're not debugging.  */
2328#ifdef DEBUG
2329#define DEBUG_PUSH PUSH_FAILURE_ITEM
2330#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2331#else
2332#define DEBUG_PUSH(item)
2333#define DEBUG_POP(item_addr)
2334#endif
2335
2336
2337/* Push the information about the state we will need
2338   if we ever fail back to it.
2339
2340   Requires variables fail_stack, regstart, regend, reg_info, and
2341   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2342   declared.
2343
2344   Does `return FAILURE_CODE' if runs out of memory.  */
2345
2346#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
2347  do {									\
2348    char *destination;							\
2349    /* Must be int, so when we don't save any registers, the arithmetic	\
2350       of 0 + -1 isn't done as unsigned.  */				\
2351    int this_reg;							\
2352    									\
2353    DEBUG_STATEMENT (failure_id++);					\
2354    DEBUG_STATEMENT (nfailure_points_pushed++);				\
2355    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
2356    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2357    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2358									\
2359    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
2360    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
2361									\
2362    /* Ensure we have enough space allocated for what we will push.  */	\
2363    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
2364      {									\
2365        if (!DOUBLE_FAIL_STACK (fail_stack))			\
2366          return failure_code;						\
2367									\
2368        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
2369		       (fail_stack).size);				\
2370        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2371      }									\
2372									\
2373    /* Push the info, starting with the registers.  */			\
2374    DEBUG_PRINT1 ("\n");						\
2375									\
2376    for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;	\
2377         this_reg++)							\
2378      {									\
2379	DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);			\
2380        DEBUG_STATEMENT (num_regs_pushed++);				\
2381									\
2382	DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);		\
2383        PUSH_FAILURE_ITEM (regstart[this_reg]);				\
2384                                                                        \
2385	DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
2386        PUSH_FAILURE_ITEM (regend[this_reg]);				\
2387									\
2388	DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
2389        DEBUG_PRINT2 (" match_null=%d",					\
2390                      REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
2391        DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
2392        DEBUG_PRINT2 (" matched_something=%d",				\
2393                      MATCHED_SOMETHING (reg_info[this_reg]));		\
2394        DEBUG_PRINT2 (" ever_matched=%d",				\
2395                      EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
2396	DEBUG_PRINT1 ("\n");						\
2397        PUSH_FAILURE_ITEM (reg_info[this_reg].word);			\
2398      }									\
2399									\
2400    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2401    PUSH_FAILURE_ITEM (lowest_active_reg);				\
2402									\
2403    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2404    PUSH_FAILURE_ITEM (highest_active_reg);				\
2405									\
2406    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
2407    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
2408    PUSH_FAILURE_ITEM (pattern_place);					\
2409									\
2410    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
2411    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2412				 size2);				\
2413    DEBUG_PRINT1 ("'\n");						\
2414    PUSH_FAILURE_ITEM (string_place);					\
2415									\
2416    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
2417    DEBUG_PUSH (failure_id);						\
2418  } while (0)
2419
2420/* This is the number of items that are pushed and popped on the stack
2421   for each register.  */
2422#define NUM_REG_ITEMS  3
2423
2424/* Individual items aside from the registers.  */
2425#ifdef DEBUG
2426#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
2427#else
2428#define NUM_NONREG_ITEMS 4
2429#endif
2430
2431/* We push at most this many items on the stack.  */
2432#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2433
2434/* We actually push this many items.  */
2435#define NUM_FAILURE_ITEMS						\
2436  ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
2437    + NUM_NONREG_ITEMS)
2438
2439/* How many items can still be added to the stack without overflowing it.  */
2440#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2441
2442
2443/* Pops what PUSH_FAIL_STACK pushes.
2444
2445   We restore into the parameters, all of which should be lvalues:
2446     STR -- the saved data position.
2447     PAT -- the saved pattern position.
2448     LOW_REG, HIGH_REG -- the highest and lowest active registers.
2449     REGSTART, REGEND -- arrays of string positions.
2450     REG_INFO -- array of information about each subexpression.
2451
2452   Also assumes the variables `fail_stack' and (if debugging), `bufp',
2453   `pend', `string1', `size1', `string2', and `size2'.  */
2454
2455#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2456{									\
2457  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
2458  int this_reg;								\
2459  const unsigned char *string_temp;					\
2460									\
2461  assert (!FAIL_STACK_EMPTY ());					\
2462									\
2463  /* Remove failure points and point to how many regs pushed.  */	\
2464  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
2465  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
2466  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
2467									\
2468  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
2469									\
2470  DEBUG_POP (&failure_id);						\
2471  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
2472									\
2473  /* If the saved string location is NULL, it came from an		\
2474     on_failure_keep_string_jump opcode, and we want to throw away the	\
2475     saved NULL, thus retaining our current position in the string.  */	\
2476  string_temp = POP_FAILURE_ITEM ();					\
2477  if (string_temp != NULL)						\
2478    str = (const char *) string_temp;					\
2479									\
2480  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
2481  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
2482  DEBUG_PRINT1 ("'\n");							\
2483									\
2484  pat = (unsigned char *) POP_FAILURE_ITEM ();				\
2485  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
2486  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
2487									\
2488  /* Restore register info.  */						\
2489  high_reg = (unsigned) POP_FAILURE_ITEM ();				\
2490  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
2491									\
2492  low_reg = (unsigned) POP_FAILURE_ITEM ();				\
2493  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
2494									\
2495  for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
2496    {									\
2497      DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);			\
2498									\
2499      reg_info[this_reg].word = POP_FAILURE_ITEM ();			\
2500      DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);		\
2501									\
2502      regend[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2503      DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
2504									\
2505      regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2506      DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);		\
2507    }									\
2508									\
2509  DEBUG_STATEMENT (nfailure_points_popped++);				\
2510} /* POP_FAILURE_POINT */
2511
2512/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2513   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2514   characters can start a string that matches the pattern.  This fastmap
2515   is used by re_search to skip quickly over impossible starting points.
2516
2517   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2518   area as BUFP->fastmap.
2519
2520   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2521   the pattern buffer.
2522
2523   Returns 0 if we succeed, -2 if an internal error.   */
2524
2525int
2526re_compile_fastmap (bufp)
2527     struct re_pattern_buffer *bufp;
2528{
2529  int j, k;
2530  fail_stack_type fail_stack;
2531#ifndef REGEX_MALLOC
2532  char *destination;
2533#endif
2534  /* We don't push any register information onto the failure stack.  */
2535  unsigned num_regs = 0;
2536
2537  register char *fastmap = bufp->fastmap;
2538  unsigned char *pattern = bufp->buffer;
2539  unsigned long size = bufp->used;
2540  const unsigned char *p = pattern;
2541  register unsigned char *pend = pattern + size;
2542
2543  /* Assume that each path through the pattern can be null until
2544     proven otherwise.  We set this false at the bottom of switch
2545     statement, to which we get only if a particular path doesn't
2546     match the empty string.  */
2547  boolean path_can_be_null = true;
2548
2549  /* We aren't doing a `succeed_n' to begin with.  */
2550  boolean succeed_n_p = false;
2551
2552  assert (fastmap != NULL && p != NULL);
2553
2554  INIT_FAIL_STACK ();
2555  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2556  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
2557  bufp->can_be_null = 0;
2558
2559  while (p != pend || !FAIL_STACK_EMPTY ())
2560    {
2561      if (p == pend)
2562        {
2563          bufp->can_be_null |= path_can_be_null;
2564
2565          /* Reset for next path.  */
2566          path_can_be_null = true;
2567
2568          p = fail_stack.stack[--fail_stack.avail];
2569	}
2570
2571      /* We should never be about to go beyond the end of the pattern.  */
2572      assert (p < pend);
2573
2574#ifdef SWITCH_ENUM_BUG
2575      switch ((int) ((re_opcode_t) *p++))
2576#else
2577      switch ((re_opcode_t) *p++)
2578#endif
2579	{
2580
2581        /* I guess the idea here is to simply not bother with a fastmap
2582           if a backreference is used, since it's too hard to figure out
2583           the fastmap for the corresponding group.  Setting
2584           `can_be_null' stops `re_search_2' from using the fastmap, so
2585           that is all we do.  */
2586	case duplicate:
2587	  bufp->can_be_null = 1;
2588          return 0;
2589
2590
2591      /* Following are the cases which match a character.  These end
2592         with `break'.  */
2593
2594	case exactn:
2595          fastmap[p[1]] = 1;
2596	  break;
2597
2598
2599        case charset:
2600          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2601	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2602              fastmap[j] = 1;
2603	  break;
2604
2605
2606	case charset_not:
2607	  /* Chars beyond end of map must be allowed.  */
2608	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2609            fastmap[j] = 1;
2610
2611	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2612	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2613              fastmap[j] = 1;
2614          break;
2615
2616
2617	case wordchar:
2618	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2619	    if (SYNTAX (j) == Sword)
2620	      fastmap[j] = 1;
2621	  break;
2622
2623
2624	case notwordchar:
2625	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2626	    if (SYNTAX (j) != Sword)
2627	      fastmap[j] = 1;
2628	  break;
2629
2630
2631        case anychar:
2632          /* `.' matches anything ...  */
2633	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2634            fastmap[j] = 1;
2635
2636          /* ... except perhaps newline.  */
2637          if (!(bufp->syntax & RE_DOT_NEWLINE))
2638            fastmap['\n'] = 0;
2639
2640          /* Return if we have already set `can_be_null'; if we have,
2641             then the fastmap is irrelevant.  Something's wrong here.  */
2642	  else if (bufp->can_be_null)
2643	    return 0;
2644
2645          /* Otherwise, have to check alternative paths.  */
2646	  break;
2647
2648
2649#ifdef emacs
2650        case syntaxspec:
2651	  k = *p++;
2652	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2653	    if (SYNTAX (j) == (enum syntaxcode) k)
2654	      fastmap[j] = 1;
2655	  break;
2656
2657
2658	case notsyntaxspec:
2659	  k = *p++;
2660	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2661	    if (SYNTAX (j) != (enum syntaxcode) k)
2662	      fastmap[j] = 1;
2663	  break;
2664
2665
2666      /* All cases after this match the empty string.  These end with
2667         `continue'.  */
2668
2669
2670	case before_dot:
2671	case at_dot:
2672	case after_dot:
2673          continue;
2674#endif /* not emacs */
2675
2676
2677        case no_op:
2678        case begline:
2679        case endline:
2680	case begbuf:
2681	case endbuf:
2682	case wordbound:
2683	case notwordbound:
2684	case wordbeg:
2685	case wordend:
2686        case push_dummy_failure:
2687          continue;
2688
2689
2690	case jump_n:
2691        case pop_failure_jump:
2692	case maybe_pop_jump:
2693	case jump:
2694        case jump_past_alt:
2695	case dummy_failure_jump:
2696          EXTRACT_NUMBER_AND_INCR (j, p);
2697	  p += j;
2698	  if (j > 0)
2699	    continue;
2700
2701          /* Jump backward implies we just went through the body of a
2702             loop and matched nothing.  Opcode jumped to should be
2703             `on_failure_jump' or `succeed_n'.  Just treat it like an
2704             ordinary jump.  For a * loop, it has pushed its failure
2705             point already; if so, discard that as redundant.  */
2706          if ((re_opcode_t) *p != on_failure_jump
2707	      && (re_opcode_t) *p != succeed_n)
2708	    continue;
2709
2710          p++;
2711          EXTRACT_NUMBER_AND_INCR (j, p);
2712          p += j;
2713
2714          /* If what's on the stack is where we are now, pop it.  */
2715          if (!FAIL_STACK_EMPTY ()
2716	      && fail_stack.stack[fail_stack.avail - 1] == p)
2717            fail_stack.avail--;
2718
2719          continue;
2720
2721
2722        case on_failure_jump:
2723        case on_failure_keep_string_jump:
2724	handle_on_failure_jump:
2725          EXTRACT_NUMBER_AND_INCR (j, p);
2726
2727          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2728             end of the pattern.  We don't want to push such a point,
2729             since when we restore it above, entering the switch will
2730             increment `p' past the end of the pattern.  We don't need
2731             to push such a point since we obviously won't find any more
2732             fastmap entries beyond `pend'.  Such a pattern can match
2733             the null string, though.  */
2734          if (p + j < pend)
2735            {
2736              if (!PUSH_PATTERN_OP (p + j, fail_stack))
2737                return -2;
2738            }
2739          else
2740            bufp->can_be_null = 1;
2741
2742          if (succeed_n_p)
2743            {
2744              EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
2745              succeed_n_p = false;
2746	    }
2747
2748          continue;
2749
2750
2751	case succeed_n:
2752          /* Get to the number of times to succeed.  */
2753          p += 2;
2754
2755          /* Increment p past the n for when k != 0.  */
2756          EXTRACT_NUMBER_AND_INCR (k, p);
2757          if (k == 0)
2758	    {
2759              p -= 4;
2760  	      succeed_n_p = true;  /* Spaghetti code alert.  */
2761              goto handle_on_failure_jump;
2762            }
2763          continue;
2764
2765
2766	case set_number_at:
2767          p += 4;
2768          continue;
2769
2770
2771	case start_memory:
2772        case stop_memory:
2773	  p += 2;
2774	  continue;
2775
2776
2777	default:
2778          abort (); /* We have listed all the cases.  */
2779        } /* switch *p++ */
2780
2781      /* Getting here means we have found the possible starting
2782         characters for one path of the pattern -- and that the empty
2783         string does not match.  We need not follow this path further.
2784         Instead, look at the next alternative (remembered on the
2785         stack), or quit if no more.  The test at the top of the loop
2786         does these things.  */
2787      path_can_be_null = false;
2788      p = pend;
2789    } /* while p */
2790
2791  /* Set `can_be_null' for the last path (also the first path, if the
2792     pattern is empty).  */
2793  bufp->can_be_null |= path_can_be_null;
2794  return 0;
2795} /* re_compile_fastmap */
2796
2797/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2798   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
2799   this memory for recording register information.  STARTS and ENDS
2800   must be allocated using the malloc library routine, and must each
2801   be at least NUM_REGS * sizeof (regoff_t) bytes long.
2802
2803   If NUM_REGS == 0, then subsequent matches should allocate their own
2804   register data.
2805
2806   Unless this function is called, the first search or match using
2807   PATTERN_BUFFER will allocate its own register data, without
2808   freeing the old data.  */
2809
2810void
2811re_set_registers (bufp, regs, num_regs, starts, ends)
2812    struct re_pattern_buffer *bufp;
2813    struct re_registers *regs;
2814    unsigned num_regs;
2815    regoff_t *starts, *ends;
2816{
2817  if (num_regs)
2818    {
2819      bufp->regs_allocated = REGS_REALLOCATE;
2820      regs->num_regs = num_regs;
2821      regs->start = starts;
2822      regs->end = ends;
2823    }
2824  else
2825    {
2826      bufp->regs_allocated = REGS_UNALLOCATED;
2827      regs->num_regs = 0;
2828      regs->start = regs->end = (regoff_t) 0;
2829    }
2830}
2831
2832/* Searching routines.  */
2833
2834/* Like re_search_2, below, but only one string is specified, and
2835   doesn't let you say where to stop matching. */
2836
2837int
2838re_search (bufp, string, size, startpos, range, regs)
2839     struct re_pattern_buffer *bufp;
2840     const char *string;
2841     int size, startpos, range;
2842     struct re_registers *regs;
2843{
2844  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2845		      regs, size);
2846}
2847
2848
2849/* Using the compiled pattern in BUFP->buffer, first tries to match the
2850   virtual concatenation of STRING1 and STRING2, starting first at index
2851   STARTPOS, then at STARTPOS + 1, and so on.
2852
2853   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2854
2855   RANGE is how far to scan while trying to match.  RANGE = 0 means try
2856   only at STARTPOS; in general, the last start tried is STARTPOS +
2857   RANGE.
2858
2859   In REGS, return the indices of the virtual concatenation of STRING1
2860   and STRING2 that matched the entire BUFP->buffer and its contained
2861   subexpressions.
2862
2863   Do not consider matching one past the index STOP in the virtual
2864   concatenation of STRING1 and STRING2.
2865
2866   We return either the position in the strings at which the match was
2867   found, -1 if no match, or -2 if error (such as failure
2868   stack overflow).  */
2869
2870int
2871re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2872     struct re_pattern_buffer *bufp;
2873     const char *string1, *string2;
2874     int size1, size2;
2875     int startpos;
2876     int range;
2877     struct re_registers *regs;
2878     int stop;
2879{
2880  int val;
2881  register char *fastmap = bufp->fastmap;
2882  register char *translate = bufp->translate;
2883  int total_size = size1 + size2;
2884  int endpos = startpos + range;
2885
2886  /* Check for out-of-range STARTPOS.  */
2887  if (startpos < 0 || startpos > total_size)
2888    return -1;
2889
2890  /* Fix up RANGE if it might eventually take us outside
2891     the virtual concatenation of STRING1 and STRING2.  */
2892  if (endpos < -1)
2893    range = -1 - startpos;
2894  else if (endpos > total_size)
2895    range = total_size - startpos;
2896
2897  /* If the search isn't to be a backwards one, don't waste time in a
2898     search for a pattern that must be anchored.  */
2899  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2900    {
2901      if (startpos > 0)
2902	return -1;
2903      else
2904	range = 1;
2905    }
2906
2907  /* Update the fastmap now if not correct already.  */
2908  if (fastmap && !bufp->fastmap_accurate)
2909    if (re_compile_fastmap (bufp) == -2)
2910      return -2;
2911
2912  /* Loop through the string, looking for a place to start matching.  */
2913  for (;;)
2914    {
2915      /* If a fastmap is supplied, skip quickly over characters that
2916         cannot be the start of a match.  If the pattern can match the
2917         null string, however, we don't need to skip characters; we want
2918         the first null string.  */
2919      if (fastmap && startpos < total_size && !bufp->can_be_null)
2920	{
2921	  if (range > 0)	/* Searching forwards.  */
2922	    {
2923	      register const char *d;
2924	      register int lim = 0;
2925	      int irange = range;
2926
2927              if (startpos < size1 && startpos + range >= size1)
2928                lim = range - (size1 - startpos);
2929
2930	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2931
2932              /* Written out as an if-else to avoid testing `translate'
2933                 inside the loop.  */
2934	      if (translate)
2935                while (range > lim
2936                       && !fastmap[(unsigned char)
2937				   translate[(unsigned char) *d++]])
2938                  range--;
2939	      else
2940                while (range > lim && !fastmap[(unsigned char) *d++])
2941                  range--;
2942
2943	      startpos += irange - range;
2944	    }
2945	  else				/* Searching backwards.  */
2946	    {
2947	      register char c = (size1 == 0 || startpos >= size1
2948                                 ? string2[startpos - size1]
2949                                 : string1[startpos]);
2950
2951	      if (!fastmap[(unsigned char) TRANSLATE (c)])
2952		goto advance;
2953	    }
2954	}
2955
2956      /* If can't match the null string, and that's all we have left, fail.  */
2957      if (range >= 0 && startpos == total_size && fastmap
2958          && !bufp->can_be_null)
2959	return -1;
2960
2961      val = re_match_2 (bufp, string1, size1, string2, size2,
2962	                startpos, regs, stop);
2963      if (val >= 0)
2964	return startpos;
2965
2966      if (val == -2)
2967	return -2;
2968
2969    advance:
2970      if (!range)
2971        break;
2972      else if (range > 0)
2973        {
2974          range--;
2975          startpos++;
2976        }
2977      else
2978        {
2979          range++;
2980          startpos--;
2981        }
2982    }
2983  return -1;
2984} /* re_search_2 */
2985
2986/* Declarations and macros for re_match_2.  */
2987
2988static int bcmp_translate ();
2989static boolean alt_match_null_string_p (),
2990               common_op_match_null_string_p (),
2991               group_match_null_string_p ();
2992
2993/* Structure for per-register (a.k.a. per-group) information.
2994   This must not be longer than one word, because we push this value
2995   onto the failure stack.  Other register information, such as the
2996   starting and ending positions (which are addresses), and the list of
2997   inner groups (which is a bits list) are maintained in separate
2998   variables.
2999
3000   We are making a (strictly speaking) nonportable assumption here: that
3001   the compiler will pack our bit fields into something that fits into
3002   the type of `word', i.e., is something that fits into one item on the
3003   failure stack.  */
3004typedef union
3005{
3006  fail_stack_elt_t word;
3007  struct
3008  {
3009      /* This field is one if this group can match the empty string,
3010         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
3011#define MATCH_NULL_UNSET_VALUE 3
3012    unsigned match_null_string_p : 2;
3013    unsigned is_active : 1;
3014    unsigned matched_something : 1;
3015    unsigned ever_matched_something : 1;
3016  } bits;
3017} register_info_type;
3018
3019#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
3020#define IS_ACTIVE(R)  ((R).bits.is_active)
3021#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
3022#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
3023
3024
3025/* Call this when have matched a real character; it sets `matched' flags
3026   for the subexpressions which we are currently inside.  Also records
3027   that those subexprs have matched.  */
3028#define SET_REGS_MATCHED()						\
3029  do									\
3030    {									\
3031      unsigned r;							\
3032      for (r = lowest_active_reg; r <= highest_active_reg; r++)		\
3033        {								\
3034          MATCHED_SOMETHING (reg_info[r])				\
3035            = EVER_MATCHED_SOMETHING (reg_info[r])			\
3036            = 1;							\
3037        }								\
3038    }									\
3039  while (0)
3040
3041
3042/* This converts PTR, a pointer into one of the search strings `string1'
3043   and `string2' into an offset from the beginning of that string.  */
3044#define POINTER_TO_OFFSET(ptr)						\
3045  (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3046
3047/* Registers are set to a sentinel when they haven't yet matched.  */
3048#define REG_UNSET_VALUE ((char *) -1)
3049#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3050
3051
3052/* Macros for dealing with the split strings in re_match_2.  */
3053
3054#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3055
3056/* Call before fetching a character with *d.  This switches over to
3057   string2 if necessary.  */
3058#define PREFETCH()							\
3059  while (d == dend)						    	\
3060    {									\
3061      /* End of string2 => fail.  */					\
3062      if (dend == end_match_2) 						\
3063        goto fail;							\
3064      /* End of string1 => advance to string2.  */ 			\
3065      d = string2;						        \
3066      dend = end_match_2;						\
3067    }
3068
3069
3070/* Test if at very beginning or at very end of the virtual concatenation
3071   of `string1' and `string2'.  If only one string, it's `string2'.  */
3072#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3073#define AT_STRINGS_END(d) ((d) == end2)
3074
3075
3076/* Test if D points to a character which is word-constituent.  We have
3077   two special cases to check for: if past the end of string1, look at
3078   the first character in string2; and if before the beginning of
3079   string2, look at the last character in string1.  */
3080#define WORDCHAR_P(d)							\
3081  (SYNTAX ((d) == end1 ? *string2					\
3082           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3083   == Sword)
3084
3085/* Test if the character before D and the one at D differ with respect
3086   to being word-constituent.  */
3087#define AT_WORD_BOUNDARY(d)						\
3088  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3089   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3090
3091
3092/* Free everything we malloc.  */
3093#ifdef REGEX_MALLOC
3094#define FREE_VAR(var) if (var) free (var); var = NULL
3095#define FREE_VARIABLES()						\
3096  do {									\
3097    FREE_VAR (fail_stack.stack);					\
3098    FREE_VAR (regstart);						\
3099    FREE_VAR (regend);							\
3100    FREE_VAR (old_regstart);						\
3101    FREE_VAR (old_regend);						\
3102    FREE_VAR (best_regstart);						\
3103    FREE_VAR (best_regend);						\
3104    FREE_VAR (reg_info);						\
3105    FREE_VAR (reg_dummy);						\
3106    FREE_VAR (reg_info_dummy);						\
3107  } while (0)
3108#else /* not REGEX_MALLOC */
3109/* Some MIPS systems (at least) want this to free alloca'd storage.  */
3110#define FREE_VARIABLES() alloca (0)
3111#endif /* not REGEX_MALLOC */
3112
3113
3114/* These values must meet several constraints.  They must not be valid
3115   register values; since we have a limit of 255 registers (because
3116   we use only one byte in the pattern for the register number), we can
3117   use numbers larger than 255.  They must differ by 1, because of
3118   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3119   be larger than the value for the highest register, so we do not try
3120   to actually save any registers when none are active.  */
3121#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3122#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3123
3124/* Matching routines.  */
3125
3126#ifndef emacs   /* Emacs never uses this.  */
3127/* re_match is like re_match_2 except it takes only a single string.  */
3128
3129int
3130re_match (bufp, string, size, pos, regs)
3131     struct re_pattern_buffer *bufp;
3132     const char *string;
3133     int size, pos;
3134     struct re_registers *regs;
3135 {
3136  return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3137}
3138#endif /* not emacs */
3139
3140
3141/* re_match_2 matches the compiled pattern in BUFP against the
3142   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3143   and SIZE2, respectively).  We start matching at POS, and stop
3144   matching at STOP.
3145
3146   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3147   store offsets for the substring each group matched in REGS.  See the
3148   documentation for exactly how many groups we fill.
3149
3150   We return -1 if no match, -2 if an internal error (such as the
3151   failure stack overflowing).  Otherwise, we return the length of the
3152   matched substring.  */
3153
3154int
3155re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3156     struct re_pattern_buffer *bufp;
3157     const char *string1, *string2;
3158     int size1, size2;
3159     int pos;
3160     struct re_registers *regs;
3161     int stop;
3162{
3163  /* General temporaries.  */
3164  int mcnt;
3165  unsigned char *p1;
3166
3167  /* Just past the end of the corresponding string.  */
3168  const char *end1, *end2;
3169
3170  /* Pointers into string1 and string2, just past the last characters in
3171     each to consider matching.  */
3172  const char *end_match_1, *end_match_2;
3173
3174  /* Where we are in the data, and the end of the current string.  */
3175  const char *d, *dend;
3176
3177  /* Where we are in the pattern, and the end of the pattern.  */
3178  unsigned char *p = bufp->buffer;
3179  register unsigned char *pend = p + bufp->used;
3180
3181  /* We use this to map every character in the string.  */
3182  char *translate = bufp->translate;
3183
3184  /* Failure point stack.  Each place that can handle a failure further
3185     down the line pushes a failure point on this stack.  It consists of
3186     restart, regend, and reg_info for all registers corresponding to
3187     the subexpressions we're currently inside, plus the number of such
3188     registers, and, finally, two char *'s.  The first char * is where
3189     to resume scanning the pattern; the second one is where to resume
3190     scanning the strings.  If the latter is zero, the failure point is
3191     a ``dummy''; if a failure happens and the failure point is a dummy,
3192     it gets discarded and the next next one is tried.  */
3193  fail_stack_type fail_stack;
3194#ifdef DEBUG
3195  static unsigned failure_id = 0;
3196  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3197#endif
3198
3199  /* We fill all the registers internally, independent of what we
3200     return, for use in backreferences.  The number here includes
3201     an element for register zero.  */
3202  unsigned num_regs = bufp->re_nsub + 1;
3203
3204  /* The currently active registers.  */
3205  unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3206  unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3207
3208  /* Information on the contents of registers. These are pointers into
3209     the input strings; they record just what was matched (on this
3210     attempt) by a subexpression part of the pattern, that is, the
3211     regnum-th regstart pointer points to where in the pattern we began
3212     matching and the regnum-th regend points to right after where we
3213     stopped matching the regnum-th subexpression.  (The zeroth register
3214     keeps track of what the whole pattern matches.)  */
3215  const char **regstart, **regend;
3216
3217  /* If a group that's operated upon by a repetition operator fails to
3218     match anything, then the register for its start will need to be
3219     restored because it will have been set to wherever in the string we
3220     are when we last see its open-group operator.  Similarly for a
3221     register's end.  */
3222  const char **old_regstart, **old_regend;
3223
3224  /* The is_active field of reg_info helps us keep track of which (possibly
3225     nested) subexpressions we are currently in. The matched_something
3226     field of reg_info[reg_num] helps us tell whether or not we have
3227     matched any of the pattern so far this time through the reg_num-th
3228     subexpression.  These two fields get reset each time through any
3229     loop their register is in.  */
3230  register_info_type *reg_info;
3231
3232  /* The following record the register info as found in the above
3233     variables when we find a match better than any we've seen before.
3234     This happens as we backtrack through the failure points, which in
3235     turn happens only if we have not yet matched the entire string. */
3236  unsigned best_regs_set = false;
3237  const char **best_regstart, **best_regend;
3238
3239  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3240     allocate space for that if we're not allocating space for anything
3241     else (see below).  Also, we never need info about register 0 for
3242     any of the other register vectors, and it seems rather a kludge to
3243     treat `best_regend' differently than the rest.  So we keep track of
3244     the end of the best match so far in a separate variable.  We
3245     initialize this to NULL so that when we backtrack the first time
3246     and need to test it, it's not garbage.  */
3247  const char *match_end = NULL;
3248
3249  /* Used when we pop values we don't care about.  */
3250  const char **reg_dummy;
3251  register_info_type *reg_info_dummy;
3252
3253#ifdef DEBUG
3254  /* Counts the total number of registers pushed.  */
3255  unsigned num_regs_pushed = 0;
3256#endif
3257
3258  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3259
3260  INIT_FAIL_STACK ();
3261
3262  /* Do not bother to initialize all the register variables if there are
3263     no groups in the pattern, as it takes a fair amount of time.  If
3264     there are groups, we include space for register 0 (the whole
3265     pattern), even though we never use it, since it simplifies the
3266     array indexing.  We should fix this.  */
3267  if (bufp->re_nsub)
3268    {
3269      regstart = REGEX_TALLOC (num_regs, const char *);
3270      regend = REGEX_TALLOC (num_regs, const char *);
3271      old_regstart = REGEX_TALLOC (num_regs, const char *);
3272      old_regend = REGEX_TALLOC (num_regs, const char *);
3273      best_regstart = REGEX_TALLOC (num_regs, const char *);
3274      best_regend = REGEX_TALLOC (num_regs, const char *);
3275      reg_info = REGEX_TALLOC (num_regs, register_info_type);
3276      reg_dummy = REGEX_TALLOC (num_regs, const char *);
3277      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3278
3279      if (!(regstart && regend && old_regstart && old_regend && reg_info
3280            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3281        {
3282          FREE_VARIABLES ();
3283          return -2;
3284        }
3285    }
3286#ifdef REGEX_MALLOC
3287  else
3288    {
3289      /* We must initialize all our variables to NULL, so that
3290         `FREE_VARIABLES' doesn't try to free them.  */
3291      regstart = regend = old_regstart = old_regend = best_regstart
3292        = best_regend = reg_dummy = NULL;
3293      reg_info = reg_info_dummy = (register_info_type *) NULL;
3294    }
3295#endif /* REGEX_MALLOC */
3296
3297  /* The starting position is bogus.  */
3298  if (pos < 0 || pos > size1 + size2)
3299    {
3300      FREE_VARIABLES ();
3301      return -1;
3302    }
3303
3304  /* Initialize subexpression text positions to -1 to mark ones that no
3305     start_memory/stop_memory has been seen for. Also initialize the
3306     register information struct.  */
3307  for (mcnt = 1; mcnt < num_regs; mcnt++)
3308    {
3309      regstart[mcnt] = regend[mcnt]
3310        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3311
3312      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3313      IS_ACTIVE (reg_info[mcnt]) = 0;
3314      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3315      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3316    }
3317
3318  /* We move `string1' into `string2' if the latter's empty -- but not if
3319     `string1' is null.  */
3320  if (size2 == 0 && string1 != NULL)
3321    {
3322      string2 = string1;
3323      size2 = size1;
3324      string1 = 0;
3325      size1 = 0;
3326    }
3327  end1 = string1 + size1;
3328  end2 = string2 + size2;
3329
3330  /* Compute where to stop matching, within the two strings.  */
3331  if (stop <= size1)
3332    {
3333      end_match_1 = string1 + stop;
3334      end_match_2 = string2;
3335    }
3336  else
3337    {
3338      end_match_1 = end1;
3339      end_match_2 = string2 + stop - size1;
3340    }
3341
3342  /* `p' scans through the pattern as `d' scans through the data.
3343     `dend' is the end of the input string that `d' points within.  `d'
3344     is advanced into the following input string whenever necessary, but
3345     this happens before fetching; therefore, at the beginning of the
3346     loop, `d' can be pointing at the end of a string, but it cannot
3347     equal `string2'.  */
3348  if (size1 > 0 && pos <= size1)
3349    {
3350      d = string1 + pos;
3351      dend = end_match_1;
3352    }
3353  else
3354    {
3355      d = string2 + pos - size1;
3356      dend = end_match_2;
3357    }
3358
3359  DEBUG_PRINT1 ("The compiled pattern is: ");
3360  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3361  DEBUG_PRINT1 ("The string to match is: `");
3362  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3363  DEBUG_PRINT1 ("'\n");
3364
3365  /* This loops over pattern commands.  It exits by returning from the
3366     function if the match is complete, or it drops through if the match
3367     fails at this starting point in the input data.  */
3368  for (;;)
3369    {
3370      DEBUG_PRINT2 ("\n0x%x: ", p);
3371
3372      if (p == pend)
3373	{ /* End of pattern means we might have succeeded.  */
3374          DEBUG_PRINT1 ("end of pattern ... ");
3375
3376	  /* If we haven't matched the entire string, and we want the
3377             longest match, try backtracking.  */
3378          if (d != end_match_2)
3379	    {
3380              DEBUG_PRINT1 ("backtracking.\n");
3381
3382              if (!FAIL_STACK_EMPTY ())
3383                { /* More failure points to try.  */
3384                  boolean same_str_p = (FIRST_STRING_P (match_end)
3385	        	                == MATCHING_IN_FIRST_STRING);
3386
3387                  /* If exceeds best match so far, save it.  */
3388                  if (!best_regs_set
3389                      || (same_str_p && d > match_end)
3390                      || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3391                    {
3392                      best_regs_set = true;
3393                      match_end = d;
3394
3395                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3396
3397                      for (mcnt = 1; mcnt < num_regs; mcnt++)
3398                        {
3399                          best_regstart[mcnt] = regstart[mcnt];
3400                          best_regend[mcnt] = regend[mcnt];
3401                        }
3402                    }
3403                  goto fail;
3404                }
3405
3406              /* If no failure points, don't restore garbage.  */
3407              else if (best_regs_set)
3408                {
3409  	        restore_best_regs:
3410                  /* Restore best match.  It may happen that `dend ==
3411                     end_match_1' while the restored d is in string2.
3412                     For example, the pattern `x.*y.*z' against the
3413                     strings `x-' and `y-z-', if the two strings are
3414                     not consecutive in memory.  */
3415                  DEBUG_PRINT1 ("Restoring best registers.\n");
3416
3417                  d = match_end;
3418                  dend = ((d >= string1 && d <= end1)
3419		           ? end_match_1 : end_match_2);
3420
3421		  for (mcnt = 1; mcnt < num_regs; mcnt++)
3422		    {
3423		      regstart[mcnt] = best_regstart[mcnt];
3424		      regend[mcnt] = best_regend[mcnt];
3425		    }
3426                }
3427            } /* d != end_match_2 */
3428
3429          DEBUG_PRINT1 ("Accepting match.\n");
3430
3431          /* If caller wants register contents data back, do it.  */
3432          if (regs && !bufp->no_sub)
3433	    {
3434              /* Have the register data arrays been allocated?  */
3435              if (bufp->regs_allocated == REGS_UNALLOCATED)
3436                { /* No.  So allocate them with malloc.  We need one
3437                     extra element beyond `num_regs' for the `-1' marker
3438                     GNU code uses.  */
3439                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3440                  regs->start = TALLOC (regs->num_regs, regoff_t);
3441                  regs->end = TALLOC (regs->num_regs, regoff_t);
3442                  if (regs->start == NULL || regs->end == NULL)
3443                    return -2;
3444                  bufp->regs_allocated = REGS_REALLOCATE;
3445                }
3446              else if (bufp->regs_allocated == REGS_REALLOCATE)
3447                { /* Yes.  If we need more elements than were already
3448                     allocated, reallocate them.  If we need fewer, just
3449                     leave it alone.  */
3450                  if (regs->num_regs < num_regs + 1)
3451                    {
3452                      regs->num_regs = num_regs + 1;
3453                      RETALLOC (regs->start, regs->num_regs, regoff_t);
3454                      RETALLOC (regs->end, regs->num_regs, regoff_t);
3455                      if (regs->start == NULL || regs->end == NULL)
3456                        return -2;
3457                    }
3458                }
3459              else
3460                assert (bufp->regs_allocated == REGS_FIXED);
3461
3462              /* Convert the pointer data in `regstart' and `regend' to
3463                 indices.  Register zero has to be set differently,
3464                 since we haven't kept track of any info for it.  */
3465              if (regs->num_regs > 0)
3466                {
3467                  regs->start[0] = pos;
3468                  regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3469			          : d - string2 + size1);
3470                }
3471
3472              /* Go through the first `min (num_regs, regs->num_regs)'
3473                 registers, since that is all we initialized.  */
3474	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3475		{
3476                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3477                    regs->start[mcnt] = regs->end[mcnt] = -1;
3478                  else
3479                    {
3480		      regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3481                      regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3482                    }
3483		}
3484
3485              /* If the regs structure we return has more elements than
3486                 were in the pattern, set the extra elements to -1.  If
3487                 we (re)allocated the registers, this is the case,
3488                 because we always allocate enough to have at least one
3489                 -1 at the end.  */
3490              for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3491                regs->start[mcnt] = regs->end[mcnt] = -1;
3492	    } /* regs && !bufp->no_sub */
3493
3494          FREE_VARIABLES ();
3495          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3496                        nfailure_points_pushed, nfailure_points_popped,
3497                        nfailure_points_pushed - nfailure_points_popped);
3498          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3499
3500          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3501			    ? string1
3502			    : string2 - size1);
3503
3504          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3505
3506          return mcnt;
3507        }
3508
3509      /* Otherwise match next pattern command.  */
3510#ifdef SWITCH_ENUM_BUG
3511      switch ((int) ((re_opcode_t) *p++))
3512#else
3513      switch ((re_opcode_t) *p++)
3514#endif
3515	{
3516        /* Ignore these.  Used to ignore the n of succeed_n's which
3517           currently have n == 0.  */
3518        case no_op:
3519          DEBUG_PRINT1 ("EXECUTING no_op.\n");
3520          break;
3521
3522
3523        /* Match the next n pattern characters exactly.  The following
3524           byte in the pattern defines n, and the n bytes after that
3525           are the characters to match.  */
3526	case exactn:
3527	  mcnt = *p++;
3528          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3529
3530          /* This is written out as an if-else so we don't waste time
3531             testing `translate' inside the loop.  */
3532          if (translate)
3533	    {
3534	      do
3535		{
3536		  PREFETCH ();
3537		  if (translate[(unsigned char) *d++] != (char) *p++)
3538                    goto fail;
3539		}
3540	      while (--mcnt);
3541	    }
3542	  else
3543	    {
3544	      do
3545		{
3546		  PREFETCH ();
3547		  if (*d++ != (char) *p++) goto fail;
3548		}
3549	      while (--mcnt);
3550	    }
3551	  SET_REGS_MATCHED ();
3552          break;
3553
3554
3555        /* Match any character except possibly a newline or a null.  */
3556	case anychar:
3557          DEBUG_PRINT1 ("EXECUTING anychar.\n");
3558
3559          PREFETCH ();
3560
3561          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3562              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3563	    goto fail;
3564
3565          SET_REGS_MATCHED ();
3566          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3567          d++;
3568	  break;
3569
3570
3571	case charset:
3572	case charset_not:
3573	  {
3574	    register unsigned char c;
3575	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
3576
3577            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3578
3579	    PREFETCH ();
3580	    c = TRANSLATE (*d); /* The character to match.  */
3581
3582            /* Cast to `unsigned' instead of `unsigned char' in case the
3583               bit list is a full 32 bytes long.  */
3584	    if (c < (unsigned) (*p * BYTEWIDTH)
3585		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3586	      not = !not;
3587
3588	    p += 1 + *p;
3589
3590	    if (!not) goto fail;
3591
3592	    SET_REGS_MATCHED ();
3593            d++;
3594	    break;
3595	  }
3596
3597
3598        /* The beginning of a group is represented by start_memory.
3599           The arguments are the register number in the next byte, and the
3600           number of groups inner to this one in the next.  The text
3601           matched within the group is recorded (in the internal
3602           registers data structure) under the register number.  */
3603        case start_memory:
3604	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3605
3606          /* Find out if this group can match the empty string.  */
3607	  p1 = p;		/* To send to group_match_null_string_p.  */
3608
3609          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3610            REG_MATCH_NULL_STRING_P (reg_info[*p])
3611              = group_match_null_string_p (&p1, pend, reg_info);
3612
3613          /* Save the position in the string where we were the last time
3614             we were at this open-group operator in case the group is
3615             operated upon by a repetition operator, e.g., with `(a*)*b'
3616             against `ab'; then we want to ignore where we are now in
3617             the string in case this attempt to match fails.  */
3618          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3619                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3620                             : regstart[*p];
3621	  DEBUG_PRINT2 ("  old_regstart: %d\n",
3622			 POINTER_TO_OFFSET (old_regstart[*p]));
3623
3624          regstart[*p] = d;
3625	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3626
3627          IS_ACTIVE (reg_info[*p]) = 1;
3628          MATCHED_SOMETHING (reg_info[*p]) = 0;
3629
3630          /* This is the new highest active register.  */
3631          highest_active_reg = *p;
3632
3633          /* If nothing was active before, this is the new lowest active
3634             register.  */
3635          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3636            lowest_active_reg = *p;
3637
3638          /* Move past the register number and inner group count.  */
3639          p += 2;
3640          break;
3641
3642
3643        /* The stop_memory opcode represents the end of a group.  Its
3644           arguments are the same as start_memory's: the register
3645           number, and the number of inner groups.  */
3646	case stop_memory:
3647	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3648
3649          /* We need to save the string position the last time we were at
3650             this close-group operator in case the group is operated
3651             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3652             against `aba'; then we want to ignore where we are now in
3653             the string in case this attempt to match fails.  */
3654          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3655                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
3656			   : regend[*p];
3657	  DEBUG_PRINT2 ("      old_regend: %d\n",
3658			 POINTER_TO_OFFSET (old_regend[*p]));
3659
3660          regend[*p] = d;
3661	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3662
3663          /* This register isn't active anymore.  */
3664          IS_ACTIVE (reg_info[*p]) = 0;
3665
3666          /* If this was the only register active, nothing is active
3667             anymore.  */
3668          if (lowest_active_reg == highest_active_reg)
3669            {
3670              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3671              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3672            }
3673          else
3674            { /* We must scan for the new highest active register, since
3675                 it isn't necessarily one less than now: consider
3676                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3677                 new highest active register is 1.  */
3678              unsigned char r = *p - 1;
3679              while (r > 0 && !IS_ACTIVE (reg_info[r]))
3680                r--;
3681
3682              /* If we end up at register zero, that means that we saved
3683                 the registers as the result of an `on_failure_jump', not
3684                 a `start_memory', and we jumped to past the innermost
3685                 `stop_memory'.  For example, in ((.)*) we save
3686                 registers 1 and 2 as a result of the *, but when we pop
3687                 back to the second ), we are at the stop_memory 1.
3688                 Thus, nothing is active.  */
3689	      if (r == 0)
3690                {
3691                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3692                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3693                }
3694              else
3695                highest_active_reg = r;
3696            }
3697
3698          /* If just failed to match something this time around with a
3699             group that's operated on by a repetition operator, try to
3700             force exit from the ``loop'', and restore the register
3701             information for this group that we had before trying this
3702             last match.  */
3703          if ((!MATCHED_SOMETHING (reg_info[*p])
3704               || (re_opcode_t) p[-3] == start_memory)
3705	      && (p + 2) < pend)
3706            {
3707              boolean is_a_jump_n = false;
3708
3709              p1 = p + 2;
3710              mcnt = 0;
3711              switch ((re_opcode_t) *p1++)
3712                {
3713                  case jump_n:
3714		    is_a_jump_n = true;
3715                  case pop_failure_jump:
3716		  case maybe_pop_jump:
3717		  case jump:
3718		  case dummy_failure_jump:
3719                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3720		    if (is_a_jump_n)
3721		      p1 += 2;
3722                    break;
3723
3724                  default:
3725                    /* do nothing */ ;
3726                }
3727	      p1 += mcnt;
3728
3729              /* If the next operation is a jump backwards in the pattern
3730	         to an on_failure_jump right before the start_memory
3731                 corresponding to this stop_memory, exit from the loop
3732                 by forcing a failure after pushing on the stack the
3733                 on_failure_jump's jump in the pattern, and d.  */
3734              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3735                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3736		{
3737                  /* If this group ever matched anything, then restore
3738                     what its registers were before trying this last
3739                     failed match, e.g., with `(a*)*b' against `ab' for
3740                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
3741                     against `aba' for regend[3].
3742
3743                     Also restore the registers for inner groups for,
3744                     e.g., `((a*)(b*))*' against `aba' (register 3 would
3745                     otherwise get trashed).  */
3746
3747                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3748		    {
3749		      unsigned r;
3750
3751                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3752
3753		      /* Restore this and inner groups' (if any) registers.  */
3754                      for (r = *p; r < *p + *(p + 1); r++)
3755                        {
3756                          regstart[r] = old_regstart[r];
3757
3758                          /* xx why this test?  */
3759                          if ((int) old_regend[r] >= (int) regstart[r])
3760                            regend[r] = old_regend[r];
3761                        }
3762                    }
3763		  p1++;
3764                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3765                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3766
3767                  goto fail;
3768                }
3769            }
3770
3771          /* Move past the register number and the inner group count.  */
3772          p += 2;
3773          break;
3774
3775
3776	/* \<digit> has been turned into a `duplicate' command which is
3777           followed by the numeric value of <digit> as the register number.  */
3778        case duplicate:
3779	  {
3780	    register const char *d2, *dend2;
3781	    int regno = *p++;   /* Get which register to match against.  */
3782	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3783
3784	    /* Can't back reference a group which we've never matched.  */
3785            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3786              goto fail;
3787
3788            /* Where in input to try to start matching.  */
3789            d2 = regstart[regno];
3790
3791            /* Where to stop matching; if both the place to start and
3792               the place to stop matching are in the same string, then
3793               set to the place to stop, otherwise, for now have to use
3794               the end of the first string.  */
3795
3796            dend2 = ((FIRST_STRING_P (regstart[regno])
3797		      == FIRST_STRING_P (regend[regno]))
3798		     ? regend[regno] : end_match_1);
3799	    for (;;)
3800	      {
3801		/* If necessary, advance to next segment in register
3802                   contents.  */
3803		while (d2 == dend2)
3804		  {
3805		    if (dend2 == end_match_2) break;
3806		    if (dend2 == regend[regno]) break;
3807
3808                    /* End of string1 => advance to string2. */
3809                    d2 = string2;
3810                    dend2 = regend[regno];
3811		  }
3812		/* At end of register contents => success */
3813		if (d2 == dend2) break;
3814
3815		/* If necessary, advance to next segment in data.  */
3816		PREFETCH ();
3817
3818		/* How many characters left in this segment to match.  */
3819		mcnt = dend - d;
3820
3821		/* Want how many consecutive characters we can match in
3822                   one shot, so, if necessary, adjust the count.  */
3823                if (mcnt > dend2 - d2)
3824		  mcnt = dend2 - d2;
3825
3826		/* Compare that many; failure if mismatch, else move
3827                   past them.  */
3828		if (translate
3829                    ? bcmp_translate (d, d2, mcnt, translate)
3830                    : bcmp (d, d2, mcnt))
3831		  goto fail;
3832		d += mcnt, d2 += mcnt;
3833	      }
3834	  }
3835	  break;
3836
3837
3838        /* begline matches the empty string at the beginning of the string
3839           (unless `not_bol' is set in `bufp'), and, if
3840           `newline_anchor' is set, after newlines.  */
3841	case begline:
3842          DEBUG_PRINT1 ("EXECUTING begline.\n");
3843
3844          if (AT_STRINGS_BEG (d))
3845            {
3846              if (!bufp->not_bol) break;
3847            }
3848          else if (d[-1] == '\n' && bufp->newline_anchor)
3849            {
3850              break;
3851            }
3852          /* In all other cases, we fail.  */
3853          goto fail;
3854
3855
3856        /* endline is the dual of begline.  */
3857	case endline:
3858          DEBUG_PRINT1 ("EXECUTING endline.\n");
3859
3860          if (AT_STRINGS_END (d))
3861            {
3862              if (!bufp->not_eol) break;
3863            }
3864
3865          /* We have to ``prefetch'' the next character.  */
3866          else if ((d == end1 ? *string2 : *d) == '\n'
3867                   && bufp->newline_anchor)
3868            {
3869              break;
3870            }
3871          goto fail;
3872
3873
3874	/* Match at the very beginning of the data.  */
3875        case begbuf:
3876          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3877          if (AT_STRINGS_BEG (d))
3878            break;
3879          goto fail;
3880
3881
3882	/* Match at the very end of the data.  */
3883        case endbuf:
3884          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3885	  if (AT_STRINGS_END (d))
3886	    break;
3887          goto fail;
3888
3889
3890        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3891           pushes NULL as the value for the string on the stack.  Then
3892           `pop_failure_point' will keep the current value for the
3893           string, instead of restoring it.  To see why, consider
3894           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3895           then the . fails against the \n.  But the next thing we want
3896           to do is match the \n against the \n; if we restored the
3897           string value, we would be back at the foo.
3898
3899           Because this is used only in specific cases, we don't need to
3900           check all the things that `on_failure_jump' does, to make
3901           sure the right things get saved on the stack.  Hence we don't
3902           share its code.  The only reason to push anything on the
3903           stack at all is that otherwise we would have to change
3904           `anychar's code to do something besides goto fail in this
3905           case; that seems worse than this.  */
3906        case on_failure_keep_string_jump:
3907          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3908
3909          EXTRACT_NUMBER_AND_INCR (mcnt, p);
3910          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3911
3912          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3913          break;
3914
3915
3916	/* Uses of on_failure_jump:
3917
3918           Each alternative starts with an on_failure_jump that points
3919           to the beginning of the next alternative.  Each alternative
3920           except the last ends with a jump that in effect jumps past
3921           the rest of the alternatives.  (They really jump to the
3922           ending jump of the following alternative, because tensioning
3923           these jumps is a hassle.)
3924
3925           Repeats start with an on_failure_jump that points past both
3926           the repetition text and either the following jump or
3927           pop_failure_jump back to this on_failure_jump.  */
3928	case on_failure_jump:
3929        on_failure:
3930          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3931
3932          EXTRACT_NUMBER_AND_INCR (mcnt, p);
3933          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3934
3935          /* If this on_failure_jump comes right before a group (i.e.,
3936             the original * applied to a group), save the information
3937             for that group and all inner ones, so that if we fail back
3938             to this point, the group's information will be correct.
3939             For example, in \(a*\)*\1, we need the preceding group,
3940             and in \(\(a*\)b*\)\2, we need the inner group.  */
3941
3942          /* We can't use `p' to check ahead because we push
3943             a failure point to `p + mcnt' after we do this.  */
3944          p1 = p;
3945
3946          /* We need to skip no_op's before we look for the
3947             start_memory in case this on_failure_jump is happening as
3948             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3949             against aba.  */
3950          while (p1 < pend && (re_opcode_t) *p1 == no_op)
3951            p1++;
3952
3953          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3954            {
3955              /* We have a new highest active register now.  This will
3956                 get reset at the start_memory we are about to get to,
3957                 but we will have saved all the registers relevant to
3958                 this repetition op, as described above.  */
3959              highest_active_reg = *(p1 + 1) + *(p1 + 2);
3960              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3961                lowest_active_reg = *(p1 + 1);
3962            }
3963
3964          DEBUG_PRINT1 (":\n");
3965          PUSH_FAILURE_POINT (p + mcnt, d, -2);
3966          break;
3967
3968
3969        /* A smart repeat ends with `maybe_pop_jump'.
3970	   We change it to either `pop_failure_jump' or `jump'.  */
3971        case maybe_pop_jump:
3972          EXTRACT_NUMBER_AND_INCR (mcnt, p);
3973          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3974          {
3975	    register unsigned char *p2 = p;
3976
3977            /* Compare the beginning of the repeat with what in the
3978               pattern follows its end. If we can establish that there
3979               is nothing that they would both match, i.e., that we
3980               would have to backtrack because of (as in, e.g., `a*a')
3981               then we can change to pop_failure_jump, because we'll
3982               never have to backtrack.
3983
3984               This is not true in the case of alternatives: in
3985               `(a|ab)*' we do need to backtrack to the `ab' alternative
3986               (e.g., if the string was `ab').  But instead of trying to
3987               detect that here, the alternative has put on a dummy
3988               failure point which is what we will end up popping.  */
3989
3990	    /* Skip over open/close-group commands.  */
3991	    while (p2 + 2 < pend
3992		   && ((re_opcode_t) *p2 == stop_memory
3993		       || (re_opcode_t) *p2 == start_memory))
3994	      p2 += 3;			/* Skip over args, too.  */
3995
3996            /* If we're at the end of the pattern, we can change.  */
3997            if (p2 == pend)
3998	      {
3999		/* Consider what happens when matching ":\(.*\)"
4000		   against ":/".  I don't really understand this code
4001		   yet.  */
4002  	        p[-3] = (unsigned char) pop_failure_jump;
4003                DEBUG_PRINT1
4004                  ("  End of pattern: change to `pop_failure_jump'.\n");
4005              }
4006
4007            else if ((re_opcode_t) *p2 == exactn
4008		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4009	      {
4010		register unsigned char c
4011                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4012		p1 = p + mcnt;
4013
4014                /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4015                   to the `maybe_finalize_jump' of this case.  Examine what
4016                   follows.  */
4017                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4018                  {
4019  		    p[-3] = (unsigned char) pop_failure_jump;
4020                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4021                                  c, p1[5]);
4022                  }
4023
4024		else if ((re_opcode_t) p1[3] == charset
4025			 || (re_opcode_t) p1[3] == charset_not)
4026		  {
4027		    int not = (re_opcode_t) p1[3] == charset_not;
4028
4029		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4030			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4031		      not = !not;
4032
4033                    /* `not' is equal to 1 if c would match, which means
4034                        that we can't change to pop_failure_jump.  */
4035		    if (!not)
4036                      {
4037  		        p[-3] = (unsigned char) pop_failure_jump;
4038                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4039                      }
4040		  }
4041	      }
4042	  }
4043	  p -= 2;		/* Point at relative address again.  */
4044	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4045	    {
4046	      p[-1] = (unsigned char) jump;
4047              DEBUG_PRINT1 ("  Match => jump.\n");
4048	      goto unconditional_jump;
4049	    }
4050        /* Note fall through.  */
4051
4052
4053	/* The end of a simple repeat has a pop_failure_jump back to
4054           its matching on_failure_jump, where the latter will push a
4055           failure point.  The pop_failure_jump takes off failure
4056           points put on by this pop_failure_jump's matching
4057           on_failure_jump; we got through the pattern to here from the
4058           matching on_failure_jump, so didn't fail.  */
4059        case pop_failure_jump:
4060          {
4061            /* We need to pass separate storage for the lowest and
4062               highest registers, even though we don't care about the
4063               actual values.  Otherwise, we will restore only one
4064               register from the stack, since lowest will == highest in
4065               `pop_failure_point'.  */
4066            unsigned dummy_low_reg, dummy_high_reg;
4067            unsigned char *pdummy;
4068            const char *sdummy;
4069
4070            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4071            POP_FAILURE_POINT (sdummy, pdummy,
4072                               dummy_low_reg, dummy_high_reg,
4073                               reg_dummy, reg_dummy, reg_info_dummy);
4074          }
4075          /* Note fall through.  */
4076
4077
4078        /* Unconditionally jump (without popping any failure points).  */
4079        case jump:
4080	unconditional_jump:
4081	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4082          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4083	  p += mcnt;				/* Do the jump.  */
4084          DEBUG_PRINT2 ("(to 0x%x).\n", p);
4085	  break;
4086
4087
4088        /* We need this opcode so we can detect where alternatives end
4089           in `group_match_null_string_p' et al.  */
4090        case jump_past_alt:
4091          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4092          goto unconditional_jump;
4093
4094
4095        /* Normally, the on_failure_jump pushes a failure point, which
4096           then gets popped at pop_failure_jump.  We will end up at
4097           pop_failure_jump, also, and with a pattern of, say, `a+', we
4098           are skipping over the on_failure_jump, so we have to push
4099           something meaningless for pop_failure_jump to pop.  */
4100        case dummy_failure_jump:
4101          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4102          /* It doesn't matter what we push for the string here.  What
4103             the code at `fail' tests is the value for the pattern.  */
4104          PUSH_FAILURE_POINT (0, 0, -2);
4105          goto unconditional_jump;
4106
4107
4108        /* At the end of an alternative, we need to push a dummy failure
4109           point in case we are followed by a `pop_failure_jump', because
4110           we don't want the failure point for the alternative to be
4111           popped.  For example, matching `(a|ab)*' against `aab'
4112           requires that we match the `ab' alternative.  */
4113        case push_dummy_failure:
4114          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4115          /* See comments just above at `dummy_failure_jump' about the
4116             two zeroes.  */
4117          PUSH_FAILURE_POINT (0, 0, -2);
4118          break;
4119
4120        /* Have to succeed matching what follows at least n times.
4121           After that, handle like `on_failure_jump'.  */
4122        case succeed_n:
4123          EXTRACT_NUMBER (mcnt, p + 2);
4124          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4125
4126          assert (mcnt >= 0);
4127          /* Originally, this is how many times we HAVE to succeed.  */
4128          if (mcnt > 0)
4129            {
4130               mcnt--;
4131	       p += 2;
4132               STORE_NUMBER_AND_INCR (p, mcnt);
4133               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4134            }
4135	  else if (mcnt == 0)
4136            {
4137              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4138	      p[2] = (unsigned char) no_op;
4139              p[3] = (unsigned char) no_op;
4140              goto on_failure;
4141            }
4142          break;
4143
4144        case jump_n:
4145          EXTRACT_NUMBER (mcnt, p + 2);
4146          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4147
4148          /* Originally, this is how many times we CAN jump.  */
4149          if (mcnt)
4150            {
4151               mcnt--;
4152               STORE_NUMBER (p + 2, mcnt);
4153	       goto unconditional_jump;
4154            }
4155          /* If don't have to jump any more, skip over the rest of command.  */
4156	  else
4157	    p += 4;
4158          break;
4159
4160	case set_number_at:
4161	  {
4162            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4163
4164            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4165            p1 = p + mcnt;
4166            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4167            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4168	    STORE_NUMBER (p1, mcnt);
4169            break;
4170          }
4171
4172        case wordbound:
4173          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4174          if (AT_WORD_BOUNDARY (d))
4175	    break;
4176          goto fail;
4177
4178	case notwordbound:
4179          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4180	  if (AT_WORD_BOUNDARY (d))
4181	    goto fail;
4182          break;
4183
4184	case wordbeg:
4185          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4186	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4187	    break;
4188          goto fail;
4189
4190	case wordend:
4191          DEBUG_PRINT1 ("EXECUTING wordend.\n");
4192	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4193              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4194	    break;
4195          goto fail;
4196
4197#ifdef emacs
4198#ifdef emacs19
4199  	case before_dot:
4200          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4201 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4202  	    goto fail;
4203  	  break;
4204
4205  	case at_dot:
4206          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4207 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
4208  	    goto fail;
4209  	  break;
4210
4211  	case after_dot:
4212          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4213          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4214  	    goto fail;
4215  	  break;
4216#else /* not emacs19 */
4217	case at_dot:
4218          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4219	  if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4220	    goto fail;
4221	  break;
4222#endif /* not emacs19 */
4223
4224	case syntaxspec:
4225          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4226	  mcnt = *p++;
4227	  goto matchsyntax;
4228
4229        case wordchar:
4230          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4231	  mcnt = (int) Sword;
4232        matchsyntax:
4233	  PREFETCH ();
4234	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4235            goto fail;
4236          SET_REGS_MATCHED ();
4237	  break;
4238
4239	case notsyntaxspec:
4240          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4241	  mcnt = *p++;
4242	  goto matchnotsyntax;
4243
4244        case notwordchar:
4245          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4246	  mcnt = (int) Sword;
4247        matchnotsyntax:
4248	  PREFETCH ();
4249	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4250            goto fail;
4251	  SET_REGS_MATCHED ();
4252          break;
4253
4254#else /* not emacs */
4255	case wordchar:
4256          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4257	  PREFETCH ();
4258          if (!WORDCHAR_P (d))
4259            goto fail;
4260	  SET_REGS_MATCHED ();
4261          d++;
4262	  break;
4263
4264	case notwordchar:
4265          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4266	  PREFETCH ();
4267	  if (WORDCHAR_P (d))
4268            goto fail;
4269          SET_REGS_MATCHED ();
4270          d++;
4271	  break;
4272#endif /* not emacs */
4273
4274        default:
4275          abort ();
4276	}
4277      continue;  /* Successfully executed one pattern command; keep going.  */
4278
4279
4280    /* We goto here if a matching operation fails. */
4281    fail:
4282      if (!FAIL_STACK_EMPTY ())
4283	{ /* A restart point is known.  Restore to that state.  */
4284          DEBUG_PRINT1 ("\nFAIL:\n");
4285          POP_FAILURE_POINT (d, p,
4286                             lowest_active_reg, highest_active_reg,
4287                             regstart, regend, reg_info);
4288
4289          /* If this failure point is a dummy, try the next one.  */
4290          if (!p)
4291	    goto fail;
4292
4293          /* If we failed to the end of the pattern, don't examine *p.  */
4294	  assert (p <= pend);
4295          if (p < pend)
4296            {
4297              boolean is_a_jump_n = false;
4298
4299              /* If failed to a backwards jump that's part of a repetition
4300                 loop, need to pop this failure point and use the next one.  */
4301              switch ((re_opcode_t) *p)
4302                {
4303                case jump_n:
4304                  is_a_jump_n = true;
4305                case maybe_pop_jump:
4306                case pop_failure_jump:
4307                case jump:
4308                  p1 = p + 1;
4309                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4310                  p1 += mcnt;
4311
4312                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4313                      || (!is_a_jump_n
4314                          && (re_opcode_t) *p1 == on_failure_jump))
4315                    goto fail;
4316                  break;
4317                default:
4318                  /* do nothing */ ;
4319                }
4320            }
4321
4322          if (d >= string1 && d <= end1)
4323	    dend = end_match_1;
4324        }
4325      else
4326        break;   /* Matching at this starting point really fails.  */
4327    } /* for (;;) */
4328
4329  if (best_regs_set)
4330    goto restore_best_regs;
4331
4332  FREE_VARIABLES ();
4333
4334  return -1;         			/* Failure to match.  */
4335} /* re_match_2 */
4336
4337/* Subroutine definitions for re_match_2.  */
4338
4339
4340/* We are passed P pointing to a register number after a start_memory.
4341
4342   Return true if the pattern up to the corresponding stop_memory can
4343   match the empty string, and false otherwise.
4344
4345   If we find the matching stop_memory, sets P to point to one past its number.
4346   Otherwise, sets P to an undefined byte less than or equal to END.
4347
4348   We don't handle duplicates properly (yet).  */
4349
4350static boolean
4351group_match_null_string_p (p, end, reg_info)
4352    unsigned char **p, *end;
4353    register_info_type *reg_info;
4354{
4355  int mcnt;
4356  /* Point to after the args to the start_memory.  */
4357  unsigned char *p1 = *p + 2;
4358
4359  while (p1 < end)
4360    {
4361      /* Skip over opcodes that can match nothing, and return true or
4362	 false, as appropriate, when we get to one that can't, or to the
4363         matching stop_memory.  */
4364
4365      switch ((re_opcode_t) *p1)
4366        {
4367        /* Could be either a loop or a series of alternatives.  */
4368        case on_failure_jump:
4369          p1++;
4370          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4371
4372          /* If the next operation is not a jump backwards in the
4373	     pattern.  */
4374
4375	  if (mcnt >= 0)
4376	    {
4377              /* Go through the on_failure_jumps of the alternatives,
4378                 seeing if any of the alternatives cannot match nothing.
4379                 The last alternative starts with only a jump,
4380                 whereas the rest start with on_failure_jump and end
4381                 with a jump, e.g., here is the pattern for `a|b|c':
4382
4383                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4384                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4385                 /exactn/1/c
4386
4387                 So, we have to first go through the first (n-1)
4388                 alternatives and then deal with the last one separately.  */
4389
4390
4391              /* Deal with the first (n-1) alternatives, which start
4392                 with an on_failure_jump (see above) that jumps to right
4393                 past a jump_past_alt.  */
4394
4395              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4396                {
4397                  /* `mcnt' holds how many bytes long the alternative
4398                     is, including the ending `jump_past_alt' and
4399                     its number.  */
4400
4401                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4402				                      reg_info))
4403                    return false;
4404
4405                  /* Move to right after this alternative, including the
4406		     jump_past_alt.  */
4407                  p1 += mcnt;
4408
4409                  /* Break if it's the beginning of an n-th alternative
4410                     that doesn't begin with an on_failure_jump.  */
4411                  if ((re_opcode_t) *p1 != on_failure_jump)
4412                    break;
4413
4414		  /* Still have to check that it's not an n-th
4415		     alternative that starts with an on_failure_jump.  */
4416		  p1++;
4417                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4418                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4419                    {
4420		      /* Get to the beginning of the n-th alternative.  */
4421                      p1 -= 3;
4422                      break;
4423                    }
4424                }
4425
4426              /* Deal with the last alternative: go back and get number
4427                 of the `jump_past_alt' just before it.  `mcnt' contains
4428                 the length of the alternative.  */
4429              EXTRACT_NUMBER (mcnt, p1 - 2);
4430
4431              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4432                return false;
4433
4434              p1 += mcnt;	/* Get past the n-th alternative.  */
4435            } /* if mcnt > 0 */
4436          break;
4437
4438
4439        case stop_memory:
4440	  assert (p1[1] == **p);
4441          *p = p1 + 2;
4442          return true;
4443
4444
4445        default:
4446          if (!common_op_match_null_string_p (&p1, end, reg_info))
4447            return false;
4448        }
4449    } /* while p1 < end */
4450
4451  return false;
4452} /* group_match_null_string_p */
4453
4454
4455/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4456   It expects P to be the first byte of a single alternative and END one
4457   byte past the last. The alternative can contain groups.  */
4458
4459static boolean
4460alt_match_null_string_p (p, end, reg_info)
4461    unsigned char *p, *end;
4462    register_info_type *reg_info;
4463{
4464  int mcnt;
4465  unsigned char *p1 = p;
4466
4467  while (p1 < end)
4468    {
4469      /* Skip over opcodes that can match nothing, and break when we get
4470         to one that can't.  */
4471
4472      switch ((re_opcode_t) *p1)
4473        {
4474	/* It's a loop.  */
4475        case on_failure_jump:
4476          p1++;
4477          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4478          p1 += mcnt;
4479          break;
4480
4481	default:
4482          if (!common_op_match_null_string_p (&p1, end, reg_info))
4483            return false;
4484        }
4485    }  /* while p1 < end */
4486
4487  return true;
4488} /* alt_match_null_string_p */
4489
4490
4491/* Deals with the ops common to group_match_null_string_p and
4492   alt_match_null_string_p.
4493
4494   Sets P to one after the op and its arguments, if any.  */
4495
4496static boolean
4497common_op_match_null_string_p (p, end, reg_info)
4498    unsigned char **p, *end;
4499    register_info_type *reg_info;
4500{
4501  int mcnt;
4502  boolean ret;
4503  int reg_no;
4504  unsigned char *p1 = *p;
4505
4506  switch ((re_opcode_t) *p1++)
4507    {
4508    case no_op:
4509    case begline:
4510    case endline:
4511    case begbuf:
4512    case endbuf:
4513    case wordbeg:
4514    case wordend:
4515    case wordbound:
4516    case notwordbound:
4517#ifdef emacs
4518    case before_dot:
4519    case at_dot:
4520    case after_dot:
4521#endif
4522      break;
4523
4524    case start_memory:
4525      reg_no = *p1;
4526      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4527      ret = group_match_null_string_p (&p1, end, reg_info);
4528
4529      /* Have to set this here in case we're checking a group which
4530         contains a group and a back reference to it.  */
4531
4532      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4533        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4534
4535      if (!ret)
4536        return false;
4537      break;
4538
4539    /* If this is an optimized succeed_n for zero times, make the jump.  */
4540    case jump:
4541      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4542      if (mcnt >= 0)
4543        p1 += mcnt;
4544      else
4545        return false;
4546      break;
4547
4548    case succeed_n:
4549      /* Get to the number of times to succeed.  */
4550      p1 += 2;
4551      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4552
4553      if (mcnt == 0)
4554        {
4555          p1 -= 4;
4556          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4557          p1 += mcnt;
4558        }
4559      else
4560        return false;
4561      break;
4562
4563    case duplicate:
4564      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4565        return false;
4566      break;
4567
4568    case set_number_at:
4569      p1 += 4;
4570
4571    default:
4572      /* All other opcodes mean we cannot match the empty string.  */
4573      return false;
4574  }
4575
4576  *p = p1;
4577  return true;
4578} /* common_op_match_null_string_p */
4579
4580
4581/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4582   bytes; nonzero otherwise.  */
4583
4584static int
4585bcmp_translate (s1, s2, len, translate)
4586     unsigned char *s1, *s2;
4587     register int len;
4588     char *translate;
4589{
4590  register unsigned char *p1 = s1, *p2 = s2;
4591  while (len)
4592    {
4593      if (translate[*p1++] != translate[*p2++]) return 1;
4594      len--;
4595    }
4596  return 0;
4597}
4598
4599/* Entry points for GNU code.  */
4600
4601/* re_compile_pattern is the GNU regular expression compiler: it
4602   compiles PATTERN (of length SIZE) and puts the result in BUFP.
4603   Returns 0 if the pattern was valid, otherwise an error string.
4604
4605   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4606   are set in BUFP on entry.
4607
4608   We call regex_compile to do the actual compilation.  */
4609
4610const char *
4611re_compile_pattern (pattern, length, bufp)
4612     const char *pattern;
4613     int length;
4614     struct re_pattern_buffer *bufp;
4615{
4616  reg_errcode_t ret;
4617
4618  /* GNU code is written to assume at least RE_NREGS registers will be set
4619     (and at least one extra will be -1).  */
4620  bufp->regs_allocated = REGS_UNALLOCATED;
4621
4622  /* And GNU code determines whether or not to get register information
4623     by passing null for the REGS argument to re_match, etc., not by
4624     setting no_sub.  */
4625  bufp->no_sub = 0;
4626
4627  /* Match anchors at newline.  */
4628  bufp->newline_anchor = 1;
4629
4630  ret = regex_compile (pattern, length, re_syntax_options, bufp);
4631
4632  return re_error_msg[(int) ret];
4633}
4634
4635/* Entry points compatible with 4.2 BSD regex library.  We don't define
4636   them if this is an Emacs or POSIX compilation.  */
4637
4638#if !defined (emacs) && !defined (_POSIX_SOURCE)
4639
4640/* BSD has one and only one pattern buffer.  */
4641static struct re_pattern_buffer re_comp_buf;
4642
4643char *
4644re_comp (s)
4645    const char *s;
4646{
4647  reg_errcode_t ret;
4648
4649  if (!s)
4650    {
4651      if (!re_comp_buf.buffer)
4652	return "No previous regular expression";
4653      return 0;
4654    }
4655
4656  if (!re_comp_buf.buffer)
4657    {
4658      re_comp_buf.buffer = (unsigned char *) malloc (200);
4659      if (re_comp_buf.buffer == NULL)
4660        return "Memory exhausted";
4661      re_comp_buf.allocated = 200;
4662
4663      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4664      if (re_comp_buf.fastmap == NULL)
4665	return "Memory exhausted";
4666    }
4667
4668  /* Since `re_exec' always passes NULL for the `regs' argument, we
4669     don't need to initialize the pattern buffer fields which affect it.  */
4670
4671  /* Match anchors at newlines.  */
4672  re_comp_buf.newline_anchor = 1;
4673
4674  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4675
4676  /* Yes, we're discarding `const' here.  */
4677  return (char *) re_error_msg[(int) ret];
4678}
4679
4680
4681int
4682re_exec (s)
4683    const char *s;
4684{
4685  const int len = strlen (s);
4686  return
4687    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4688}
4689#endif /* not emacs and not _POSIX_SOURCE */
4690
4691/* POSIX.2 functions.  Don't define these for Emacs.  */
4692
4693#ifndef emacs
4694
4695/* regcomp takes a regular expression as a string and compiles it.
4696
4697   PREG is a regex_t *.  We do not expect any fields to be initialized,
4698   since POSIX says we shouldn't.  Thus, we set
4699
4700     `buffer' to the compiled pattern;
4701     `used' to the length of the compiled pattern;
4702     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4703       REG_EXTENDED bit in CFLAGS is set; otherwise, to
4704       RE_SYNTAX_POSIX_BASIC;
4705     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4706     `fastmap' and `fastmap_accurate' to zero;
4707     `re_nsub' to the number of subexpressions in PATTERN.
4708
4709   PATTERN is the address of the pattern string.
4710
4711   CFLAGS is a series of bits which affect compilation.
4712
4713     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4714     use POSIX basic syntax.
4715
4716     If REG_NEWLINE is set, then . and [^...] don't match newline.
4717     Also, regexec will try a match beginning after every newline.
4718
4719     If REG_ICASE is set, then we considers upper- and lowercase
4720     versions of letters to be equivalent when matching.
4721
4722     If REG_NOSUB is set, then when PREG is passed to regexec, that
4723     routine will report only success or failure, and nothing about the
4724     registers.
4725
4726   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
4727   the return codes and their meanings.)  */
4728
4729int
4730regcomp (preg, pattern, cflags)
4731    regex_t *preg;
4732    const char *pattern;
4733    int cflags;
4734{
4735  reg_errcode_t ret;
4736  unsigned syntax
4737    = (cflags & REG_EXTENDED) ?
4738      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4739
4740  /* regex_compile will allocate the space for the compiled pattern.  */
4741  preg->buffer = 0;
4742  preg->allocated = 0;
4743
4744  /* Don't bother to use a fastmap when searching.  This simplifies the
4745     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4746     characters after newlines into the fastmap.  This way, we just try
4747     every character.  */
4748  preg->fastmap = 0;
4749
4750  if (cflags & REG_ICASE)
4751    {
4752      unsigned i;
4753
4754      preg->translate = (char *) malloc (CHAR_SET_SIZE);
4755      if (preg->translate == NULL)
4756        return (int) REG_ESPACE;
4757
4758      /* Map uppercase characters to corresponding lowercase ones.  */
4759      for (i = 0; i < CHAR_SET_SIZE; i++)
4760        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4761    }
4762  else
4763    preg->translate = NULL;
4764
4765  /* If REG_NEWLINE is set, newlines are treated differently.  */
4766  if (cflags & REG_NEWLINE)
4767    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4768      syntax &= ~RE_DOT_NEWLINE;
4769      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4770      /* It also changes the matching behavior.  */
4771      preg->newline_anchor = 1;
4772    }
4773  else
4774    preg->newline_anchor = 0;
4775
4776  preg->no_sub = !!(cflags & REG_NOSUB);
4777
4778  /* POSIX says a null character in the pattern terminates it, so we
4779     can use strlen here in compiling the pattern.  */
4780  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4781
4782  /* POSIX doesn't distinguish between an unmatched open-group and an
4783     unmatched close-group: both are REG_EPAREN.  */
4784  if (ret == REG_ERPAREN) ret = REG_EPAREN;
4785
4786  return (int) ret;
4787}
4788
4789
4790/* regexec searches for a given pattern, specified by PREG, in the
4791   string STRING.
4792
4793   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4794   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4795   least NMATCH elements, and we set them to the offsets of the
4796   corresponding matched substrings.
4797
4798   EFLAGS specifies `execution flags' which affect matching: if
4799   REG_NOTBOL is set, then ^ does not match at the beginning of the
4800   string; if REG_NOTEOL is set, then $ does not match at the end.
4801
4802   We return 0 if we find a match and REG_NOMATCH if not.  */
4803
4804int
4805regexec (preg, string, nmatch, pmatch, eflags)
4806    const regex_t *preg;
4807    const char *string;
4808    size_t nmatch;
4809    regmatch_t pmatch[];
4810    int eflags;
4811{
4812  int ret;
4813  struct re_registers regs;
4814  regex_t private_preg;
4815  int len = strlen (string);
4816  boolean want_reg_info = !preg->no_sub && nmatch > 0;
4817
4818  private_preg = *preg;
4819
4820  private_preg.not_bol = !!(eflags & REG_NOTBOL);
4821  private_preg.not_eol = !!(eflags & REG_NOTEOL);
4822
4823  /* The user has told us exactly how many registers to return
4824     information about, via `nmatch'.  We have to pass that on to the
4825     matching routines.  */
4826  private_preg.regs_allocated = REGS_FIXED;
4827
4828  if (want_reg_info)
4829    {
4830      regs.num_regs = nmatch;
4831      regs.start = TALLOC (nmatch, regoff_t);
4832      regs.end = TALLOC (nmatch, regoff_t);
4833      if (regs.start == NULL || regs.end == NULL)
4834        return (int) REG_NOMATCH;
4835    }
4836
4837  /* Perform the searching operation.  */
4838  ret = re_search (&private_preg, string, len,
4839                   /* start: */ 0, /* range: */ len,
4840                   want_reg_info ? &regs : (struct re_registers *) 0);
4841
4842  /* Copy the register information to the POSIX structure.  */
4843  if (want_reg_info)
4844    {
4845      if (ret >= 0)
4846        {
4847          unsigned r;
4848
4849          for (r = 0; r < nmatch; r++)
4850            {
4851              pmatch[r].rm_so = regs.start[r];
4852              pmatch[r].rm_eo = regs.end[r];
4853            }
4854        }
4855
4856      /* If we needed the temporary register info, free the space now.  */
4857      free (regs.start);
4858      free (regs.end);
4859    }
4860
4861  /* We want zero return to mean success, unlike `re_search'.  */
4862  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4863}
4864
4865
4866/* Returns a message corresponding to an error code, ERRCODE, returned
4867   from either regcomp or regexec.   We don't use PREG here.  */
4868
4869size_t
4870regerror (errcode, preg, errbuf, errbuf_size)
4871    int errcode;
4872    const regex_t *preg;
4873    char *errbuf;
4874    size_t errbuf_size;
4875{
4876  const char *msg;
4877  size_t msg_size;
4878
4879  if (errcode < 0
4880      || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4881    /* Only error codes returned by the rest of the code should be passed
4882       to this routine.  If we are given anything else, or if other regex
4883       code generates an invalid error code, then the program has a bug.
4884       Dump core so we can fix it.  */
4885    abort ();
4886
4887  msg = re_error_msg[errcode];
4888
4889  /* POSIX doesn't require that we do anything in this case, but why
4890     not be nice.  */
4891  if (! msg)
4892    msg = "Success";
4893
4894  msg_size = strlen (msg) + 1; /* Includes the null.  */
4895
4896  if (errbuf_size != 0)
4897    {
4898      if (msg_size > errbuf_size)
4899        {
4900          strncpy (errbuf, msg, errbuf_size - 1);
4901          errbuf[errbuf_size - 1] = 0;
4902        }
4903      else
4904        strcpy (errbuf, msg);
4905    }
4906
4907  return msg_size;
4908}
4909
4910
4911/* Free dynamically allocated space used by PREG.  */
4912
4913void
4914regfree (preg)
4915    regex_t *preg;
4916{
4917  if (preg->buffer != NULL)
4918    free (preg->buffer);
4919  preg->buffer = NULL;
4920
4921  preg->allocated = 0;
4922  preg->used = 0;
4923
4924  if (preg->fastmap != NULL)
4925    free (preg->fastmap);
4926  preg->fastmap = NULL;
4927  preg->fastmap_accurate = 0;
4928
4929  if (preg->translate != NULL)
4930    free (preg->translate);
4931  preg->translate = NULL;
4932}
4933
4934#endif /* not emacs  */
4935
4936/*
4937Local variables:
4938make-backup-files: t
4939version-control: t
4940trim-versions-without-asking: nil
4941End:
4942*/
4943