regex.c revision 218
1/* Extended regular expression matching and search library,
2   version 0.12.
3   (Implements POSIX draft P10003.2/D11.2, except for
4   internationalization features.)
5
6   Copyright (C) 1993 Free Software Foundation, Inc.
7
8   This program is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2, or (at your option)
11   any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program; if not, write to the Free Software
20   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
21
22/* AIX requires this to be the first thing in the file. */
23#if defined (_AIX) && !defined (REGEX_MALLOC)
24  #pragma alloca
25#endif
26
27#define _GNU_SOURCE
28
29/* We need this for `regex.h', and perhaps for the Emacs include files.  */
30#include <sys/types.h>
31
32#ifdef HAVE_CONFIG_H
33#include "config.h"
34#endif
35
36/* The `emacs' switch turns on certain matching commands
37   that make sense only in Emacs. */
38#ifdef emacs
39
40#include "lisp.h"
41#include "buffer.h"
42#include "syntax.h"
43
44/* Emacs uses `NULL' as a predicate.  */
45#undef NULL
46
47#else  /* not emacs */
48
49/* We used to test for `BSTRING' here, but only GCC and Emacs define
50   `BSTRING', as far as I know, and neither of them use this code.  */
51#if HAVE_STRING_H || STDC_HEADERS
52#include <string.h>
53#ifndef bcmp
54#define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
55#endif
56#ifndef bcopy
57#define bcopy(s, d, n)	memcpy ((d), (s), (n))
58#endif
59#ifndef bzero
60#define bzero(s, n)	memset ((s), 0, (n))
61#endif
62#else
63#include <strings.h>
64#endif
65
66#ifdef STDC_HEADERS
67#include <stdlib.h>
68#else
69char *malloc ();
70char *realloc ();
71#endif
72
73
74/* Define the syntax stuff for \<, \>, etc.  */
75
76/* This must be nonzero for the wordchar and notwordchar pattern
77   commands in re_match_2.  */
78#ifndef Sword
79#define Sword 1
80#endif
81
82#ifdef SYNTAX_TABLE
83
84extern char *re_syntax_table;
85
86#else /* not SYNTAX_TABLE */
87
88/* How many characters in the character set.  */
89#define CHAR_SET_SIZE 256
90
91static char re_syntax_table[CHAR_SET_SIZE];
92
93static void
94init_syntax_once ()
95{
96   register int c;
97   static int done = 0;
98
99   if (done)
100     return;
101
102   bzero (re_syntax_table, sizeof re_syntax_table);
103
104   for (c = 'a'; c <= 'z'; c++)
105     re_syntax_table[c] = Sword;
106
107   for (c = 'A'; c <= 'Z'; c++)
108     re_syntax_table[c] = Sword;
109
110   for (c = '0'; c <= '9'; c++)
111     re_syntax_table[c] = Sword;
112
113   re_syntax_table['_'] = Sword;
114
115   done = 1;
116}
117
118#endif /* not SYNTAX_TABLE */
119
120#define SYNTAX(c) re_syntax_table[c]
121
122#endif /* not emacs */
123
124/* Get the interface, including the syntax bits.  */
125#include "regex.h"
126
127/* isalpha etc. are used for the character classes.  */
128#include <ctype.h>
129
130#ifndef isascii
131#define isascii(c) 1
132#endif
133
134#ifdef isblank
135#define ISBLANK(c) (isascii (c) && isblank (c))
136#else
137#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
138#endif
139#ifdef isgraph
140#define ISGRAPH(c) (isascii (c) && isgraph (c))
141#else
142#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
143#endif
144
145#define ISPRINT(c) (isascii (c) && isprint (c))
146#define ISDIGIT(c) (isascii (c) && isdigit (c))
147#define ISALNUM(c) (isascii (c) && isalnum (c))
148#define ISALPHA(c) (isascii (c) && isalpha (c))
149#define ISCNTRL(c) (isascii (c) && iscntrl (c))
150#define ISLOWER(c) (isascii (c) && islower (c))
151#define ISPUNCT(c) (isascii (c) && ispunct (c))
152#define ISSPACE(c) (isascii (c) && isspace (c))
153#define ISUPPER(c) (isascii (c) && isupper (c))
154#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
155
156#ifndef NULL
157#define NULL 0
158#endif
159
160/* We remove any previous definition of `SIGN_EXTEND_CHAR',
161   since ours (we hope) works properly with all combinations of
162   machines, compilers, `char' and `unsigned char' argument types.
163   (Per Bothner suggested the basic approach.)  */
164#undef SIGN_EXTEND_CHAR
165#if __STDC__
166#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
167#else  /* not __STDC__ */
168/* As in Harbison and Steele.  */
169#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
170#endif
171
172/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
173   use `alloca' instead of `malloc'.  This is because using malloc in
174   re_search* or re_match* could cause memory leaks when C-g is used in
175   Emacs; also, malloc is slower and causes storage fragmentation.  On
176   the other hand, malloc is more portable, and easier to debug.
177
178   Because we sometimes use alloca, some routines have to be macros,
179   not functions -- `alloca'-allocated space disappears at the end of the
180   function it is called in.  */
181
182#ifdef REGEX_MALLOC
183
184#define REGEX_ALLOCATE malloc
185#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
186
187#else /* not REGEX_MALLOC  */
188
189/* Emacs already defines alloca, sometimes.  */
190#ifndef alloca
191
192/* Make alloca work the best possible way.  */
193#ifdef __GNUC__
194#define alloca __builtin_alloca
195#else /* not __GNUC__ */
196#if HAVE_ALLOCA_H
197#include <alloca.h>
198#else /* not __GNUC__ or HAVE_ALLOCA_H */
199#ifndef _AIX /* Already did AIX, up at the top.  */
200char *alloca ();
201#endif /* not _AIX */
202#endif /* not HAVE_ALLOCA_H */
203#endif /* not __GNUC__ */
204
205#endif /* not alloca */
206
207#define REGEX_ALLOCATE alloca
208
209/* Assumes a `char *destination' variable.  */
210#define REGEX_REALLOCATE(source, osize, nsize)				\
211  (destination = (char *) alloca (nsize),				\
212   bcopy (source, destination, osize),					\
213   destination)
214
215#endif /* not REGEX_MALLOC */
216
217
218/* True if `size1' is non-NULL and PTR is pointing anywhere inside
219   `string1' or just past its end.  This works if PTR is NULL, which is
220   a good thing.  */
221#define FIRST_STRING_P(ptr) 					\
222  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
223
224/* (Re)Allocate N items of type T using malloc, or fail.  */
225#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
226#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
227#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
228
229#define BYTEWIDTH 8 /* In bits.  */
230
231#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
232
233#define MAX(a, b) ((a) > (b) ? (a) : (b))
234#define MIN(a, b) ((a) < (b) ? (a) : (b))
235
236typedef char boolean;
237#define false 0
238#define true 1
239
240/* These are the command codes that appear in compiled regular
241   expressions.  Some opcodes are followed by argument bytes.  A
242   command code can specify any interpretation whatsoever for its
243   arguments.  Zero bytes may appear in the compiled regular expression.
244
245   The value of `exactn' is needed in search.c (search_buffer) in Emacs.
246   So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
247   `exactn' we use here must also be 1.  */
248
249typedef enum
250{
251  no_op = 0,
252
253        /* Followed by one byte giving n, then by n literal bytes.  */
254  exactn = 1,
255
256        /* Matches any (more or less) character.  */
257  anychar,
258
259        /* Matches any one char belonging to specified set.  First
260           following byte is number of bitmap bytes.  Then come bytes
261           for a bitmap saying which chars are in.  Bits in each byte
262           are ordered low-bit-first.  A character is in the set if its
263           bit is 1.  A character too large to have a bit in the map is
264           automatically not in the set.  */
265  charset,
266
267        /* Same parameters as charset, but match any character that is
268           not one of those specified.  */
269  charset_not,
270
271        /* Start remembering the text that is matched, for storing in a
272           register.  Followed by one byte with the register number, in
273           the range 0 to one less than the pattern buffer's re_nsub
274           field.  Then followed by one byte with the number of groups
275           inner to this one.  (This last has to be part of the
276           start_memory only because we need it in the on_failure_jump
277           of re_match_2.)  */
278  start_memory,
279
280        /* Stop remembering the text that is matched and store it in a
281           memory register.  Followed by one byte with the register
282           number, in the range 0 to one less than `re_nsub' in the
283           pattern buffer, and one byte with the number of inner groups,
284           just like `start_memory'.  (We need the number of inner
285           groups here because we don't have any easy way of finding the
286           corresponding start_memory when we're at a stop_memory.)  */
287  stop_memory,
288
289        /* Match a duplicate of something remembered. Followed by one
290           byte containing the register number.  */
291  duplicate,
292
293        /* Fail unless at beginning of line.  */
294  begline,
295
296        /* Fail unless at end of line.  */
297  endline,
298
299        /* Succeeds if at beginning of buffer (if emacs) or at beginning
300           of string to be matched (if not).  */
301  begbuf,
302
303        /* Analogously, for end of buffer/string.  */
304  endbuf,
305
306        /* Followed by two byte relative address to which to jump.  */
307  jump,
308
309	/* Same as jump, but marks the end of an alternative.  */
310  jump_past_alt,
311
312        /* Followed by two-byte relative address of place to resume at
313           in case of failure.  */
314  on_failure_jump,
315
316        /* Like on_failure_jump, but pushes a placeholder instead of the
317           current string position when executed.  */
318  on_failure_keep_string_jump,
319
320        /* Throw away latest failure point and then jump to following
321           two-byte relative address.  */
322  pop_failure_jump,
323
324        /* Change to pop_failure_jump if know won't have to backtrack to
325           match; otherwise change to jump.  This is used to jump
326           back to the beginning of a repeat.  If what follows this jump
327           clearly won't match what the repeat does, such that we can be
328           sure that there is no use backtracking out of repetitions
329           already matched, then we change it to a pop_failure_jump.
330           Followed by two-byte address.  */
331  maybe_pop_jump,
332
333        /* Jump to following two-byte address, and push a dummy failure
334           point. This failure point will be thrown away if an attempt
335           is made to use it for a failure.  A `+' construct makes this
336           before the first repeat.  Also used as an intermediary kind
337           of jump when compiling an alternative.  */
338  dummy_failure_jump,
339
340	/* Push a dummy failure point and continue.  Used at the end of
341	   alternatives.  */
342  push_dummy_failure,
343
344        /* Followed by two-byte relative address and two-byte number n.
345           After matching N times, jump to the address upon failure.  */
346  succeed_n,
347
348        /* Followed by two-byte relative address, and two-byte number n.
349           Jump to the address N times, then fail.  */
350  jump_n,
351
352        /* Set the following two-byte relative address to the
353           subsequent two-byte number.  The address *includes* the two
354           bytes of number.  */
355  set_number_at,
356
357  wordchar,	/* Matches any word-constituent character.  */
358  notwordchar,	/* Matches any char that is not a word-constituent.  */
359
360  wordbeg,	/* Succeeds if at word beginning.  */
361  wordend,	/* Succeeds if at word end.  */
362
363  wordbound,	/* Succeeds if at a word boundary.  */
364  notwordbound	/* Succeeds if not at a word boundary.  */
365
366#ifdef emacs
367  ,before_dot,	/* Succeeds if before point.  */
368  at_dot,	/* Succeeds if at point.  */
369  after_dot,	/* Succeeds if after point.  */
370
371	/* Matches any character whose syntax is specified.  Followed by
372           a byte which contains a syntax code, e.g., Sword.  */
373  syntaxspec,
374
375	/* Matches any character whose syntax is not that specified.  */
376  notsyntaxspec
377#endif /* emacs */
378} re_opcode_t;
379
380/* Common operations on the compiled pattern.  */
381
382/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
383
384#define STORE_NUMBER(destination, number)				\
385  do {									\
386    (destination)[0] = (number) & 0377;					\
387    (destination)[1] = (number) >> 8;					\
388  } while (0)
389
390/* Same as STORE_NUMBER, except increment DESTINATION to
391   the byte after where the number is stored.  Therefore, DESTINATION
392   must be an lvalue.  */
393
394#define STORE_NUMBER_AND_INCR(destination, number)			\
395  do {									\
396    STORE_NUMBER (destination, number);					\
397    (destination) += 2;							\
398  } while (0)
399
400/* Put into DESTINATION a number stored in two contiguous bytes starting
401   at SOURCE.  */
402
403#define EXTRACT_NUMBER(destination, source)				\
404  do {									\
405    (destination) = *(source) & 0377;					\
406    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
407  } while (0)
408
409#ifdef DEBUG
410static void
411extract_number (dest, source)
412    int *dest;
413    unsigned char *source;
414{
415  int temp = SIGN_EXTEND_CHAR (*(source + 1));
416  *dest = *source & 0377;
417  *dest += temp << 8;
418}
419
420#ifndef EXTRACT_MACROS /* To debug the macros.  */
421#undef EXTRACT_NUMBER
422#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
423#endif /* not EXTRACT_MACROS */
424
425#endif /* DEBUG */
426
427/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
428   SOURCE must be an lvalue.  */
429
430#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
431  do {									\
432    EXTRACT_NUMBER (destination, source);				\
433    (source) += 2; 							\
434  } while (0)
435
436#ifdef DEBUG
437static void
438extract_number_and_incr (destination, source)
439    int *destination;
440    unsigned char **source;
441{
442  extract_number (destination, *source);
443  *source += 2;
444}
445
446#ifndef EXTRACT_MACROS
447#undef EXTRACT_NUMBER_AND_INCR
448#define EXTRACT_NUMBER_AND_INCR(dest, src) \
449  extract_number_and_incr (&dest, &src)
450#endif /* not EXTRACT_MACROS */
451
452#endif /* DEBUG */
453
454/* If DEBUG is defined, Regex prints many voluminous messages about what
455   it is doing (if the variable `debug' is nonzero).  If linked with the
456   main program in `iregex.c', you can enter patterns and strings
457   interactively.  And if linked with the main program in `main.c' and
458   the other test files, you can run the already-written tests.  */
459
460#ifdef DEBUG
461
462/* We use standard I/O for debugging.  */
463#include <stdio.h>
464
465/* It is useful to test things that ``must'' be true when debugging.  */
466#include <assert.h>
467
468static int debug = 0;
469
470#define DEBUG_STATEMENT(e) e
471#define DEBUG_PRINT1(x) if (debug) printf (x)
472#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
473#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
474#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
475#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
476  if (debug) print_partial_compiled_pattern (s, e)
477#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
478  if (debug) print_double_string (w, s1, sz1, s2, sz2)
479
480
481extern void printchar ();
482
483/* Print the fastmap in human-readable form.  */
484
485void
486print_fastmap (fastmap)
487    char *fastmap;
488{
489  unsigned was_a_range = 0;
490  unsigned i = 0;
491
492  while (i < (1 << BYTEWIDTH))
493    {
494      if (fastmap[i++])
495	{
496	  was_a_range = 0;
497          printchar (i - 1);
498          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
499            {
500              was_a_range = 1;
501              i++;
502            }
503	  if (was_a_range)
504            {
505              printf ("-");
506              printchar (i - 1);
507            }
508        }
509    }
510  putchar ('\n');
511}
512
513
514/* Print a compiled pattern string in human-readable form, starting at
515   the START pointer into it and ending just before the pointer END.  */
516
517void
518print_partial_compiled_pattern (start, end)
519    unsigned char *start;
520    unsigned char *end;
521{
522  int mcnt, mcnt2;
523  unsigned char *p = start;
524  unsigned char *pend = end;
525
526  if (start == NULL)
527    {
528      printf ("(null)\n");
529      return;
530    }
531
532  /* Loop over pattern commands.  */
533  while (p < pend)
534    {
535      switch ((re_opcode_t) *p++)
536	{
537        case no_op:
538          printf ("/no_op");
539          break;
540
541	case exactn:
542	  mcnt = *p++;
543          printf ("/exactn/%d", mcnt);
544          do
545	    {
546              putchar ('/');
547	      printchar (*p++);
548            }
549          while (--mcnt);
550          break;
551
552	case start_memory:
553          mcnt = *p++;
554          printf ("/start_memory/%d/%d", mcnt, *p++);
555          break;
556
557	case stop_memory:
558          mcnt = *p++;
559	  printf ("/stop_memory/%d/%d", mcnt, *p++);
560          break;
561
562	case duplicate:
563	  printf ("/duplicate/%d", *p++);
564	  break;
565
566	case anychar:
567	  printf ("/anychar");
568	  break;
569
570	case charset:
571        case charset_not:
572          {
573            register int c;
574
575            printf ("/charset%s",
576	            (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
577
578            assert (p + *p < pend);
579
580            for (c = 0; c < *p; c++)
581              {
582                unsigned bit;
583                unsigned char map_byte = p[1 + c];
584
585                putchar ('/');
586
587		for (bit = 0; bit < BYTEWIDTH; bit++)
588                  if (map_byte & (1 << bit))
589                    printchar (c * BYTEWIDTH + bit);
590              }
591	    p += 1 + *p;
592	    break;
593	  }
594
595	case begline:
596	  printf ("/begline");
597          break;
598
599	case endline:
600          printf ("/endline");
601          break;
602
603	case on_failure_jump:
604          extract_number_and_incr (&mcnt, &p);
605  	  printf ("/on_failure_jump/0/%d", mcnt);
606          break;
607
608	case on_failure_keep_string_jump:
609          extract_number_and_incr (&mcnt, &p);
610  	  printf ("/on_failure_keep_string_jump/0/%d", mcnt);
611          break;
612
613	case dummy_failure_jump:
614          extract_number_and_incr (&mcnt, &p);
615  	  printf ("/dummy_failure_jump/0/%d", mcnt);
616          break;
617
618	case push_dummy_failure:
619          printf ("/push_dummy_failure");
620          break;
621
622        case maybe_pop_jump:
623          extract_number_and_incr (&mcnt, &p);
624  	  printf ("/maybe_pop_jump/0/%d", mcnt);
625	  break;
626
627        case pop_failure_jump:
628	  extract_number_and_incr (&mcnt, &p);
629  	  printf ("/pop_failure_jump/0/%d", mcnt);
630	  break;
631
632        case jump_past_alt:
633	  extract_number_and_incr (&mcnt, &p);
634  	  printf ("/jump_past_alt/0/%d", mcnt);
635	  break;
636
637        case jump:
638	  extract_number_and_incr (&mcnt, &p);
639  	  printf ("/jump/0/%d", mcnt);
640	  break;
641
642        case succeed_n:
643          extract_number_and_incr (&mcnt, &p);
644          extract_number_and_incr (&mcnt2, &p);
645 	  printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
646          break;
647
648        case jump_n:
649          extract_number_and_incr (&mcnt, &p);
650          extract_number_and_incr (&mcnt2, &p);
651 	  printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
652          break;
653
654        case set_number_at:
655          extract_number_and_incr (&mcnt, &p);
656          extract_number_and_incr (&mcnt2, &p);
657 	  printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
658          break;
659
660        case wordbound:
661	  printf ("/wordbound");
662	  break;
663
664	case notwordbound:
665	  printf ("/notwordbound");
666          break;
667
668	case wordbeg:
669	  printf ("/wordbeg");
670	  break;
671
672	case wordend:
673	  printf ("/wordend");
674
675#ifdef emacs
676	case before_dot:
677	  printf ("/before_dot");
678          break;
679
680	case at_dot:
681	  printf ("/at_dot");
682          break;
683
684	case after_dot:
685	  printf ("/after_dot");
686          break;
687
688	case syntaxspec:
689          printf ("/syntaxspec");
690	  mcnt = *p++;
691	  printf ("/%d", mcnt);
692          break;
693
694	case notsyntaxspec:
695          printf ("/notsyntaxspec");
696	  mcnt = *p++;
697	  printf ("/%d", mcnt);
698	  break;
699#endif /* emacs */
700
701	case wordchar:
702	  printf ("/wordchar");
703          break;
704
705	case notwordchar:
706	  printf ("/notwordchar");
707          break;
708
709	case begbuf:
710	  printf ("/begbuf");
711          break;
712
713	case endbuf:
714	  printf ("/endbuf");
715          break;
716
717        default:
718          printf ("?%d", *(p-1));
719	}
720    }
721  printf ("/\n");
722}
723
724
725void
726print_compiled_pattern (bufp)
727    struct re_pattern_buffer *bufp;
728{
729  unsigned char *buffer = bufp->buffer;
730
731  print_partial_compiled_pattern (buffer, buffer + bufp->used);
732  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
733
734  if (bufp->fastmap_accurate && bufp->fastmap)
735    {
736      printf ("fastmap: ");
737      print_fastmap (bufp->fastmap);
738    }
739
740  printf ("re_nsub: %d\t", bufp->re_nsub);
741  printf ("regs_alloc: %d\t", bufp->regs_allocated);
742  printf ("can_be_null: %d\t", bufp->can_be_null);
743  printf ("newline_anchor: %d\n", bufp->newline_anchor);
744  printf ("no_sub: %d\t", bufp->no_sub);
745  printf ("not_bol: %d\t", bufp->not_bol);
746  printf ("not_eol: %d\t", bufp->not_eol);
747  printf ("syntax: %d\n", bufp->syntax);
748  /* Perhaps we should print the translate table?  */
749}
750
751
752void
753print_double_string (where, string1, size1, string2, size2)
754    const char *where;
755    const char *string1;
756    const char *string2;
757    int size1;
758    int size2;
759{
760  unsigned this_char;
761
762  if (where == NULL)
763    printf ("(null)");
764  else
765    {
766      if (FIRST_STRING_P (where))
767        {
768          for (this_char = where - string1; this_char < size1; this_char++)
769            printchar (string1[this_char]);
770
771          where = string2;
772        }
773
774      for (this_char = where - string2; this_char < size2; this_char++)
775        printchar (string2[this_char]);
776    }
777}
778
779#else /* not DEBUG */
780
781#undef assert
782#define assert(e)
783
784#define DEBUG_STATEMENT(e)
785#define DEBUG_PRINT1(x)
786#define DEBUG_PRINT2(x1, x2)
787#define DEBUG_PRINT3(x1, x2, x3)
788#define DEBUG_PRINT4(x1, x2, x3, x4)
789#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
790#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
791
792#endif /* not DEBUG */
793
794/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
795   also be assigned to arbitrarily: each pattern buffer stores its own
796   syntax, so it can be changed between regex compilations.  */
797reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
798
799
800/* Specify the precise syntax of regexps for compilation.  This provides
801   for compatibility for various utilities which historically have
802   different, incompatible syntaxes.
803
804   The argument SYNTAX is a bit mask comprised of the various bits
805   defined in regex.h.  We return the old syntax.  */
806
807reg_syntax_t
808re_set_syntax (syntax)
809    reg_syntax_t syntax;
810{
811  reg_syntax_t ret = re_syntax_options;
812
813  re_syntax_options = syntax;
814  return ret;
815}
816
817/* This table gives an error message for each of the error codes listed
818   in regex.h.  Obviously the order here has to be same as there.  */
819
820static const char *re_error_msg[] =
821  { NULL,					/* REG_NOERROR */
822    "No match",					/* REG_NOMATCH */
823    "Invalid regular expression",		/* REG_BADPAT */
824    "Invalid collation character",		/* REG_ECOLLATE */
825    "Invalid character class name",		/* REG_ECTYPE */
826    "Trailing backslash",			/* REG_EESCAPE */
827    "Invalid back reference",			/* REG_ESUBREG */
828    "Unmatched [ or [^",			/* REG_EBRACK */
829    "Unmatched ( or \\(",			/* REG_EPAREN */
830    "Unmatched \\{",				/* REG_EBRACE */
831    "Invalid content of \\{\\}",		/* REG_BADBR */
832    "Invalid range end",			/* REG_ERANGE */
833    "Memory exhausted",				/* REG_ESPACE */
834    "Invalid preceding regular expression",	/* REG_BADRPT */
835    "Premature end of regular expression",	/* REG_EEND */
836    "Regular expression too big",		/* REG_ESIZE */
837    "Unmatched ) or \\)",			/* REG_ERPAREN */
838  };
839
840/* Subroutine declarations and macros for regex_compile.  */
841
842static void store_op1 (), store_op2 ();
843static void insert_op1 (), insert_op2 ();
844static boolean at_begline_loc_p (), at_endline_loc_p ();
845static boolean group_in_compile_stack ();
846static reg_errcode_t compile_range ();
847
848/* Fetch the next character in the uncompiled pattern---translating it
849   if necessary.  Also cast from a signed character in the constant
850   string passed to us by the user to an unsigned char that we can use
851   as an array index (in, e.g., `translate').  */
852#define PATFETCH(c)							\
853  do {if (p == pend) return REG_EEND;					\
854    c = (unsigned char) *p++;						\
855    if (translate) c = translate[c]; 					\
856  } while (0)
857
858/* Fetch the next character in the uncompiled pattern, with no
859   translation.  */
860#define PATFETCH_RAW(c)							\
861  do {if (p == pend) return REG_EEND;					\
862    c = (unsigned char) *p++; 						\
863  } while (0)
864
865/* Go backwards one character in the pattern.  */
866#define PATUNFETCH p--
867
868
869/* If `translate' is non-null, return translate[D], else just D.  We
870   cast the subscript to translate because some data is declared as
871   `char *', to avoid warnings when a string constant is passed.  But
872   when we use a character as a subscript we must make it unsigned.  */
873#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
874
875
876/* Macros for outputting the compiled pattern into `buffer'.  */
877
878/* If the buffer isn't allocated when it comes in, use this.  */
879#define INIT_BUF_SIZE  32
880
881/* Make sure we have at least N more bytes of space in buffer.  */
882#define GET_BUFFER_SPACE(n)						\
883    while (b - bufp->buffer + (n) > bufp->allocated)			\
884      EXTEND_BUFFER ()
885
886/* Make sure we have one more byte of buffer space and then add C to it.  */
887#define BUF_PUSH(c)							\
888  do {									\
889    GET_BUFFER_SPACE (1);						\
890    *b++ = (unsigned char) (c);						\
891  } while (0)
892
893
894/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
895#define BUF_PUSH_2(c1, c2)						\
896  do {									\
897    GET_BUFFER_SPACE (2);						\
898    *b++ = (unsigned char) (c1);					\
899    *b++ = (unsigned char) (c2);					\
900  } while (0)
901
902
903/* As with BUF_PUSH_2, except for three bytes.  */
904#define BUF_PUSH_3(c1, c2, c3)						\
905  do {									\
906    GET_BUFFER_SPACE (3);						\
907    *b++ = (unsigned char) (c1);					\
908    *b++ = (unsigned char) (c2);					\
909    *b++ = (unsigned char) (c3);					\
910  } while (0)
911
912
913/* Store a jump with opcode OP at LOC to location TO.  We store a
914   relative address offset by the three bytes the jump itself occupies.  */
915#define STORE_JUMP(op, loc, to) \
916  store_op1 (op, loc, (to) - (loc) - 3)
917
918/* Likewise, for a two-argument jump.  */
919#define STORE_JUMP2(op, loc, to, arg) \
920  store_op2 (op, loc, (to) - (loc) - 3, arg)
921
922/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
923#define INSERT_JUMP(op, loc, to) \
924  insert_op1 (op, loc, (to) - (loc) - 3, b)
925
926/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
927#define INSERT_JUMP2(op, loc, to, arg) \
928  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
929
930
931/* This is not an arbitrary limit: the arguments which represent offsets
932   into the pattern are two bytes long.  So if 2^16 bytes turns out to
933   be too small, many things would have to change.  */
934#define MAX_BUF_SIZE (1L << 16)
935
936
937/* Extend the buffer by twice its current size via realloc and
938   reset the pointers that pointed into the old block to point to the
939   correct places in the new one.  If extending the buffer results in it
940   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
941#define EXTEND_BUFFER()							\
942  do { 									\
943    unsigned char *old_buffer = bufp->buffer;				\
944    if (bufp->allocated == MAX_BUF_SIZE) 				\
945      return REG_ESIZE;							\
946    bufp->allocated <<= 1;						\
947    if (bufp->allocated > MAX_BUF_SIZE)					\
948      bufp->allocated = MAX_BUF_SIZE; 					\
949    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
950    if (bufp->buffer == NULL)						\
951      return REG_ESPACE;						\
952    /* If the buffer moved, move all the pointers into it.  */		\
953    if (old_buffer != bufp->buffer)					\
954      {									\
955        b = (b - old_buffer) + bufp->buffer;				\
956        begalt = (begalt - old_buffer) + bufp->buffer;			\
957        if (fixup_alt_jump)						\
958          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
959        if (laststart)							\
960          laststart = (laststart - old_buffer) + bufp->buffer;		\
961        if (pending_exact)						\
962          pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
963      }									\
964  } while (0)
965
966
967/* Since we have one byte reserved for the register number argument to
968   {start,stop}_memory, the maximum number of groups we can report
969   things about is what fits in that byte.  */
970#define MAX_REGNUM 255
971
972/* But patterns can have more than `MAX_REGNUM' registers.  We just
973   ignore the excess.  */
974typedef unsigned regnum_t;
975
976
977/* Macros for the compile stack.  */
978
979/* Since offsets can go either forwards or backwards, this type needs to
980   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
981typedef int pattern_offset_t;
982
983typedef struct
984{
985  pattern_offset_t begalt_offset;
986  pattern_offset_t fixup_alt_jump;
987  pattern_offset_t inner_group_offset;
988  pattern_offset_t laststart_offset;
989  regnum_t regnum;
990} compile_stack_elt_t;
991
992
993typedef struct
994{
995  compile_stack_elt_t *stack;
996  unsigned size;
997  unsigned avail;			/* Offset of next open position.  */
998} compile_stack_type;
999
1000
1001#define INIT_COMPILE_STACK_SIZE 32
1002
1003#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1004#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1005
1006/* The next available element.  */
1007#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1008
1009
1010/* Set the bit for character C in a list.  */
1011#define SET_LIST_BIT(c)                               \
1012  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1013   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1014
1015
1016/* Get the next unsigned number in the uncompiled pattern.  */
1017#define GET_UNSIGNED_NUMBER(num) 					\
1018  { if (p != pend)							\
1019     {									\
1020       PATFETCH (c); 							\
1021       while (ISDIGIT (c)) 						\
1022         { 								\
1023           if (num < 0)							\
1024              num = 0;							\
1025           num = num * 10 + c - '0'; 					\
1026           if (p == pend) 						\
1027              break; 							\
1028           PATFETCH (c);						\
1029         } 								\
1030       } 								\
1031    }
1032
1033#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1034
1035#define IS_CHAR_CLASS(string)						\
1036   (STREQ (string, "alpha") || STREQ (string, "upper")			\
1037    || STREQ (string, "lower") || STREQ (string, "digit")		\
1038    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1039    || STREQ (string, "space") || STREQ (string, "print")		\
1040    || STREQ (string, "punct") || STREQ (string, "graph")		\
1041    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1042
1043/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1044   Returns one of error codes defined in `regex.h', or zero for success.
1045
1046   Assumes the `allocated' (and perhaps `buffer') and `translate'
1047   fields are set in BUFP on entry.
1048
1049   If it succeeds, results are put in BUFP (if it returns an error, the
1050   contents of BUFP are undefined):
1051     `buffer' is the compiled pattern;
1052     `syntax' is set to SYNTAX;
1053     `used' is set to the length of the compiled pattern;
1054     `fastmap_accurate' is zero;
1055     `re_nsub' is the number of subexpressions in PATTERN;
1056     `not_bol' and `not_eol' are zero;
1057
1058   The `fastmap' and `newline_anchor' fields are neither
1059   examined nor set.  */
1060
1061static reg_errcode_t
1062regex_compile (pattern, size, syntax, bufp)
1063     const char *pattern;
1064     int size;
1065     reg_syntax_t syntax;
1066     struct re_pattern_buffer *bufp;
1067{
1068  /* We fetch characters from PATTERN here.  Even though PATTERN is
1069     `char *' (i.e., signed), we declare these variables as unsigned, so
1070     they can be reliably used as array indices.  */
1071  register unsigned char c, c1;
1072
1073  /* A random tempory spot in PATTERN.  */
1074  const char *p1;
1075
1076  /* Points to the end of the buffer, where we should append.  */
1077  register unsigned char *b;
1078
1079  /* Keeps track of unclosed groups.  */
1080  compile_stack_type compile_stack;
1081
1082  /* Points to the current (ending) position in the pattern.  */
1083  const char *p = pattern;
1084  const char *pend = pattern + size;
1085
1086  /* How to translate the characters in the pattern.  */
1087  char *translate = bufp->translate;
1088
1089  /* Address of the count-byte of the most recently inserted `exactn'
1090     command.  This makes it possible to tell if a new exact-match
1091     character can be added to that command or if the character requires
1092     a new `exactn' command.  */
1093  unsigned char *pending_exact = 0;
1094
1095  /* Address of start of the most recently finished expression.
1096     This tells, e.g., postfix * where to find the start of its
1097     operand.  Reset at the beginning of groups and alternatives.  */
1098  unsigned char *laststart = 0;
1099
1100  /* Address of beginning of regexp, or inside of last group.  */
1101  unsigned char *begalt;
1102
1103  /* Place in the uncompiled pattern (i.e., the {) to
1104     which to go back if the interval is invalid.  */
1105  const char *beg_interval;
1106
1107  /* Address of the place where a forward jump should go to the end of
1108     the containing expression.  Each alternative of an `or' -- except the
1109     last -- ends with a forward jump of this sort.  */
1110  unsigned char *fixup_alt_jump = 0;
1111
1112  /* Counts open-groups as they are encountered.  Remembered for the
1113     matching close-group on the compile stack, so the same register
1114     number is put in the stop_memory as the start_memory.  */
1115  regnum_t regnum = 0;
1116
1117#ifdef DEBUG
1118  DEBUG_PRINT1 ("\nCompiling pattern: ");
1119  if (debug)
1120    {
1121      unsigned debug_count;
1122
1123      for (debug_count = 0; debug_count < size; debug_count++)
1124        printchar (pattern[debug_count]);
1125      putchar ('\n');
1126    }
1127#endif /* DEBUG */
1128
1129  /* Initialize the compile stack.  */
1130  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1131  if (compile_stack.stack == NULL)
1132    return REG_ESPACE;
1133
1134  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1135  compile_stack.avail = 0;
1136
1137  /* Initialize the pattern buffer.  */
1138  bufp->syntax = syntax;
1139  bufp->fastmap_accurate = 0;
1140  bufp->not_bol = bufp->not_eol = 0;
1141
1142  /* Set `used' to zero, so that if we return an error, the pattern
1143     printer (for debugging) will think there's no pattern.  We reset it
1144     at the end.  */
1145  bufp->used = 0;
1146
1147  /* Always count groups, whether or not bufp->no_sub is set.  */
1148  bufp->re_nsub = 0;
1149
1150#if !defined (emacs) && !defined (SYNTAX_TABLE)
1151  /* Initialize the syntax table.  */
1152   init_syntax_once ();
1153#endif
1154
1155  if (bufp->allocated == 0)
1156    {
1157      if (bufp->buffer)
1158	{ /* If zero allocated, but buffer is non-null, try to realloc
1159             enough space.  This loses if buffer's address is bogus, but
1160             that is the user's responsibility.  */
1161          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1162        }
1163      else
1164        { /* Caller did not allocate a buffer.  Do it for them.  */
1165          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1166        }
1167      if (!bufp->buffer) return REG_ESPACE;
1168
1169      bufp->allocated = INIT_BUF_SIZE;
1170    }
1171
1172  begalt = b = bufp->buffer;
1173
1174  /* Loop through the uncompiled pattern until we're at the end.  */
1175  while (p != pend)
1176    {
1177      PATFETCH (c);
1178
1179      switch (c)
1180        {
1181        case '^':
1182          {
1183            if (   /* If at start of pattern, it's an operator.  */
1184                   p == pattern + 1
1185                   /* If context independent, it's an operator.  */
1186                || syntax & RE_CONTEXT_INDEP_ANCHORS
1187                   /* Otherwise, depends on what's come before.  */
1188                || at_begline_loc_p (pattern, p, syntax))
1189              BUF_PUSH (begline);
1190            else
1191              goto normal_char;
1192          }
1193          break;
1194
1195
1196        case '$':
1197          {
1198            if (   /* If at end of pattern, it's an operator.  */
1199                   p == pend
1200                   /* If context independent, it's an operator.  */
1201                || syntax & RE_CONTEXT_INDEP_ANCHORS
1202                   /* Otherwise, depends on what's next.  */
1203                || at_endline_loc_p (p, pend, syntax))
1204               BUF_PUSH (endline);
1205             else
1206               goto normal_char;
1207           }
1208           break;
1209
1210
1211	case '+':
1212        case '?':
1213          if ((syntax & RE_BK_PLUS_QM)
1214              || (syntax & RE_LIMITED_OPS))
1215            goto normal_char;
1216        handle_plus:
1217        case '*':
1218          /* If there is no previous pattern... */
1219          if (!laststart)
1220            {
1221              if (syntax & RE_CONTEXT_INVALID_OPS)
1222                return REG_BADRPT;
1223              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1224                goto normal_char;
1225            }
1226
1227          {
1228            /* Are we optimizing this jump?  */
1229            boolean keep_string_p = false;
1230
1231            /* 1 means zero (many) matches is allowed.  */
1232            char zero_times_ok = 0, many_times_ok = 0;
1233
1234            /* If there is a sequence of repetition chars, collapse it
1235               down to just one (the right one).  We can't combine
1236               interval operators with these because of, e.g., `a{2}*',
1237               which should only match an even number of `a's.  */
1238
1239            for (;;)
1240              {
1241                zero_times_ok |= c != '+';
1242                many_times_ok |= c != '?';
1243
1244                if (p == pend)
1245                  break;
1246
1247                PATFETCH (c);
1248
1249                if (c == '*'
1250                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1251                  ;
1252
1253                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1254                  {
1255                    if (p == pend) return REG_EESCAPE;
1256
1257                    PATFETCH (c1);
1258                    if (!(c1 == '+' || c1 == '?'))
1259                      {
1260                        PATUNFETCH;
1261                        PATUNFETCH;
1262                        break;
1263                      }
1264
1265                    c = c1;
1266                  }
1267                else
1268                  {
1269                    PATUNFETCH;
1270                    break;
1271                  }
1272
1273                /* If we get here, we found another repeat character.  */
1274               }
1275
1276            /* Star, etc. applied to an empty pattern is equivalent
1277               to an empty pattern.  */
1278            if (!laststart)
1279              break;
1280
1281            /* Now we know whether or not zero matches is allowed
1282               and also whether or not two or more matches is allowed.  */
1283            if (many_times_ok)
1284              { /* More than one repetition is allowed, so put in at the
1285                   end a backward relative jump from `b' to before the next
1286                   jump we're going to put in below (which jumps from
1287                   laststart to after this jump).
1288
1289                   But if we are at the `*' in the exact sequence `.*\n',
1290                   insert an unconditional jump backwards to the .,
1291                   instead of the beginning of the loop.  This way we only
1292                   push a failure point once, instead of every time
1293                   through the loop.  */
1294                assert (p - 1 > pattern);
1295
1296                /* Allocate the space for the jump.  */
1297                GET_BUFFER_SPACE (3);
1298
1299                /* We know we are not at the first character of the pattern,
1300                   because laststart was nonzero.  And we've already
1301                   incremented `p', by the way, to be the character after
1302                   the `*'.  Do we have to do something analogous here
1303                   for null bytes, because of RE_DOT_NOT_NULL?  */
1304                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1305		    && zero_times_ok
1306                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1307                    && !(syntax & RE_DOT_NEWLINE))
1308                  { /* We have .*\n.  */
1309                    STORE_JUMP (jump, b, laststart);
1310                    keep_string_p = true;
1311                  }
1312                else
1313                  /* Anything else.  */
1314                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1315
1316                /* We've added more stuff to the buffer.  */
1317                b += 3;
1318              }
1319
1320            /* On failure, jump from laststart to b + 3, which will be the
1321               end of the buffer after this jump is inserted.  */
1322            GET_BUFFER_SPACE (3);
1323            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1324                                       : on_failure_jump,
1325                         laststart, b + 3);
1326            pending_exact = 0;
1327            b += 3;
1328
1329            if (!zero_times_ok)
1330              {
1331                /* At least one repetition is required, so insert a
1332                   `dummy_failure_jump' before the initial
1333                   `on_failure_jump' instruction of the loop. This
1334                   effects a skip over that instruction the first time
1335                   we hit that loop.  */
1336                GET_BUFFER_SPACE (3);
1337                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1338                b += 3;
1339              }
1340            }
1341	  break;
1342
1343
1344	case '.':
1345          laststart = b;
1346          BUF_PUSH (anychar);
1347          break;
1348
1349
1350        case '[':
1351          {
1352            boolean had_char_class = false;
1353
1354            if (p == pend) return REG_EBRACK;
1355
1356            /* Ensure that we have enough space to push a charset: the
1357               opcode, the length count, and the bitset; 34 bytes in all.  */
1358	    GET_BUFFER_SPACE (34);
1359
1360            laststart = b;
1361
1362            /* We test `*p == '^' twice, instead of using an if
1363               statement, so we only need one BUF_PUSH.  */
1364            BUF_PUSH (*p == '^' ? charset_not : charset);
1365            if (*p == '^')
1366              p++;
1367
1368            /* Remember the first position in the bracket expression.  */
1369            p1 = p;
1370
1371            /* Push the number of bytes in the bitmap.  */
1372            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1373
1374            /* Clear the whole map.  */
1375            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1376
1377            /* charset_not matches newline according to a syntax bit.  */
1378            if ((re_opcode_t) b[-2] == charset_not
1379                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1380              SET_LIST_BIT ('\n');
1381
1382            /* Read in characters and ranges, setting map bits.  */
1383            for (;;)
1384              {
1385                if (p == pend) return REG_EBRACK;
1386
1387                PATFETCH (c);
1388
1389                /* \ might escape characters inside [...] and [^...].  */
1390                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1391                  {
1392                    if (p == pend) return REG_EESCAPE;
1393
1394                    PATFETCH (c1);
1395                    SET_LIST_BIT (c1);
1396                    continue;
1397                  }
1398
1399                /* Could be the end of the bracket expression.  If it's
1400                   not (i.e., when the bracket expression is `[]' so
1401                   far), the ']' character bit gets set way below.  */
1402                if (c == ']' && p != p1 + 1)
1403                  break;
1404
1405                /* Look ahead to see if it's a range when the last thing
1406                   was a character class.  */
1407                if (had_char_class && c == '-' && *p != ']')
1408                  return REG_ERANGE;
1409
1410                /* Look ahead to see if it's a range when the last thing
1411                   was a character: if this is a hyphen not at the
1412                   beginning or the end of a list, then it's the range
1413                   operator.  */
1414                if (c == '-'
1415                    && !(p - 2 >= pattern && p[-2] == '[')
1416                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1417                    && *p != ']')
1418                  {
1419                    reg_errcode_t ret
1420                      = compile_range (&p, pend, translate, syntax, b);
1421                    if (ret != REG_NOERROR) return ret;
1422                  }
1423
1424                else if (p[0] == '-' && p[1] != ']')
1425                  { /* This handles ranges made up of characters only.  */
1426                    reg_errcode_t ret;
1427
1428		    /* Move past the `-'.  */
1429                    PATFETCH (c1);
1430
1431                    ret = compile_range (&p, pend, translate, syntax, b);
1432                    if (ret != REG_NOERROR) return ret;
1433                  }
1434
1435                /* See if we're at the beginning of a possible character
1436                   class.  */
1437
1438                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1439                  { /* Leave room for the null.  */
1440                    char str[CHAR_CLASS_MAX_LENGTH + 1];
1441
1442                    PATFETCH (c);
1443                    c1 = 0;
1444
1445                    /* If pattern is `[[:'.  */
1446                    if (p == pend) return REG_EBRACK;
1447
1448                    for (;;)
1449                      {
1450                        PATFETCH (c);
1451                        if (c == ':' || c == ']' || p == pend
1452                            || c1 == CHAR_CLASS_MAX_LENGTH)
1453                          break;
1454                        str[c1++] = c;
1455                      }
1456                    str[c1] = '\0';
1457
1458                    /* If isn't a word bracketed by `[:' and:`]':
1459                       undo the ending character, the letters, and leave
1460                       the leading `:' and `[' (but set bits for them).  */
1461                    if (c == ':' && *p == ']')
1462                      {
1463                        int ch;
1464                        boolean is_alnum = STREQ (str, "alnum");
1465                        boolean is_alpha = STREQ (str, "alpha");
1466                        boolean is_blank = STREQ (str, "blank");
1467                        boolean is_cntrl = STREQ (str, "cntrl");
1468                        boolean is_digit = STREQ (str, "digit");
1469                        boolean is_graph = STREQ (str, "graph");
1470                        boolean is_lower = STREQ (str, "lower");
1471                        boolean is_print = STREQ (str, "print");
1472                        boolean is_punct = STREQ (str, "punct");
1473                        boolean is_space = STREQ (str, "space");
1474                        boolean is_upper = STREQ (str, "upper");
1475                        boolean is_xdigit = STREQ (str, "xdigit");
1476
1477                        if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1478
1479                        /* Throw away the ] at the end of the character
1480                           class.  */
1481                        PATFETCH (c);
1482
1483                        if (p == pend) return REG_EBRACK;
1484
1485                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1486                          {
1487                            if (   (is_alnum  && ISALNUM (ch))
1488                                || (is_alpha  && ISALPHA (ch))
1489                                || (is_blank  && ISBLANK (ch))
1490                                || (is_cntrl  && ISCNTRL (ch))
1491                                || (is_digit  && ISDIGIT (ch))
1492                                || (is_graph  && ISGRAPH (ch))
1493                                || (is_lower  && ISLOWER (ch))
1494                                || (is_print  && ISPRINT (ch))
1495                                || (is_punct  && ISPUNCT (ch))
1496                                || (is_space  && ISSPACE (ch))
1497                                || (is_upper  && ISUPPER (ch))
1498                                || (is_xdigit && ISXDIGIT (ch)))
1499                            SET_LIST_BIT (ch);
1500                          }
1501                        had_char_class = true;
1502                      }
1503                    else
1504                      {
1505                        c1++;
1506                        while (c1--)
1507                          PATUNFETCH;
1508                        SET_LIST_BIT ('[');
1509                        SET_LIST_BIT (':');
1510                        had_char_class = false;
1511                      }
1512                  }
1513                else
1514                  {
1515                    had_char_class = false;
1516                    SET_LIST_BIT (c);
1517                  }
1518              }
1519
1520            /* Discard any (non)matching list bytes that are all 0 at the
1521               end of the map.  Decrease the map-length byte too.  */
1522            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1523              b[-1]--;
1524            b += b[-1];
1525          }
1526          break;
1527
1528
1529	case '(':
1530          if (syntax & RE_NO_BK_PARENS)
1531            goto handle_open;
1532          else
1533            goto normal_char;
1534
1535
1536        case ')':
1537          if (syntax & RE_NO_BK_PARENS)
1538            goto handle_close;
1539          else
1540            goto normal_char;
1541
1542
1543        case '\n':
1544          if (syntax & RE_NEWLINE_ALT)
1545            goto handle_alt;
1546          else
1547            goto normal_char;
1548
1549
1550	case '|':
1551          if (syntax & RE_NO_BK_VBAR)
1552            goto handle_alt;
1553          else
1554            goto normal_char;
1555
1556
1557        case '{':
1558           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1559             goto handle_interval;
1560           else
1561             goto normal_char;
1562
1563
1564        case '\\':
1565          if (p == pend) return REG_EESCAPE;
1566
1567          /* Do not translate the character after the \, so that we can
1568             distinguish, e.g., \B from \b, even if we normally would
1569             translate, e.g., B to b.  */
1570          PATFETCH_RAW (c);
1571
1572          switch (c)
1573            {
1574            case '(':
1575              if (syntax & RE_NO_BK_PARENS)
1576                goto normal_backslash;
1577
1578            handle_open:
1579              bufp->re_nsub++;
1580              regnum++;
1581
1582              if (COMPILE_STACK_FULL)
1583                {
1584                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
1585                            compile_stack_elt_t);
1586                  if (compile_stack.stack == NULL) return REG_ESPACE;
1587
1588                  compile_stack.size <<= 1;
1589                }
1590
1591              /* These are the values to restore when we hit end of this
1592                 group.  They are all relative offsets, so that if the
1593                 whole pattern moves because of realloc, they will still
1594                 be valid.  */
1595              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1596              COMPILE_STACK_TOP.fixup_alt_jump
1597                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1598              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1599              COMPILE_STACK_TOP.regnum = regnum;
1600
1601              /* We will eventually replace the 0 with the number of
1602                 groups inner to this one.  But do not push a
1603                 start_memory for groups beyond the last one we can
1604                 represent in the compiled pattern.  */
1605              if (regnum <= MAX_REGNUM)
1606                {
1607                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1608                  BUF_PUSH_3 (start_memory, regnum, 0);
1609                }
1610
1611              compile_stack.avail++;
1612
1613              fixup_alt_jump = 0;
1614              laststart = 0;
1615              begalt = b;
1616	      /* If we've reached MAX_REGNUM groups, then this open
1617		 won't actually generate any code, so we'll have to
1618		 clear pending_exact explicitly.  */
1619	      pending_exact = 0;
1620              break;
1621
1622
1623            case ')':
1624              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1625
1626              if (COMPILE_STACK_EMPTY)
1627                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1628                  goto normal_backslash;
1629                else
1630                  return REG_ERPAREN;
1631
1632            handle_close:
1633              if (fixup_alt_jump)
1634                { /* Push a dummy failure point at the end of the
1635                     alternative for a possible future
1636                     `pop_failure_jump' to pop.  See comments at
1637                     `push_dummy_failure' in `re_match_2'.  */
1638                  BUF_PUSH (push_dummy_failure);
1639
1640                  /* We allocated space for this jump when we assigned
1641                     to `fixup_alt_jump', in the `handle_alt' case below.  */
1642                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1643                }
1644
1645              /* See similar code for backslashed left paren above.  */
1646              if (COMPILE_STACK_EMPTY)
1647                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1648                  goto normal_char;
1649                else
1650                  return REG_ERPAREN;
1651
1652              /* Since we just checked for an empty stack above, this
1653                 ``can't happen''.  */
1654              assert (compile_stack.avail != 0);
1655              {
1656                /* We don't just want to restore into `regnum', because
1657                   later groups should continue to be numbered higher,
1658                   as in `(ab)c(de)' -- the second group is #2.  */
1659                regnum_t this_group_regnum;
1660
1661                compile_stack.avail--;
1662                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1663                fixup_alt_jump
1664                  = COMPILE_STACK_TOP.fixup_alt_jump
1665                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1666                    : 0;
1667                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1668                this_group_regnum = COMPILE_STACK_TOP.regnum;
1669		/* If we've reached MAX_REGNUM groups, then this open
1670		   won't actually generate any code, so we'll have to
1671		   clear pending_exact explicitly.  */
1672		pending_exact = 0;
1673
1674                /* We're at the end of the group, so now we know how many
1675                   groups were inside this one.  */
1676                if (this_group_regnum <= MAX_REGNUM)
1677                  {
1678                    unsigned char *inner_group_loc
1679                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1680
1681                    *inner_group_loc = regnum - this_group_regnum;
1682                    BUF_PUSH_3 (stop_memory, this_group_regnum,
1683                                regnum - this_group_regnum);
1684                  }
1685              }
1686              break;
1687
1688
1689            case '|':					/* `\|'.  */
1690              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1691                goto normal_backslash;
1692            handle_alt:
1693              if (syntax & RE_LIMITED_OPS)
1694                goto normal_char;
1695
1696              /* Insert before the previous alternative a jump which
1697                 jumps to this alternative if the former fails.  */
1698              GET_BUFFER_SPACE (3);
1699              INSERT_JUMP (on_failure_jump, begalt, b + 6);
1700              pending_exact = 0;
1701              b += 3;
1702
1703              /* The alternative before this one has a jump after it
1704                 which gets executed if it gets matched.  Adjust that
1705                 jump so it will jump to this alternative's analogous
1706                 jump (put in below, which in turn will jump to the next
1707                 (if any) alternative's such jump, etc.).  The last such
1708                 jump jumps to the correct final destination.  A picture:
1709                          _____ _____
1710                          |   | |   |
1711                          |   v |   v
1712                         a | b   | c
1713
1714                 If we are at `b', then fixup_alt_jump right now points to a
1715                 three-byte space after `a'.  We'll put in the jump, set
1716                 fixup_alt_jump to right after `b', and leave behind three
1717                 bytes which we'll fill in when we get to after `c'.  */
1718
1719              if (fixup_alt_jump)
1720                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1721
1722              /* Mark and leave space for a jump after this alternative,
1723                 to be filled in later either by next alternative or
1724                 when know we're at the end of a series of alternatives.  */
1725              fixup_alt_jump = b;
1726              GET_BUFFER_SPACE (3);
1727              b += 3;
1728
1729              laststart = 0;
1730              begalt = b;
1731              break;
1732
1733
1734            case '{':
1735              /* If \{ is a literal.  */
1736              if (!(syntax & RE_INTERVALS)
1737                     /* If we're at `\{' and it's not the open-interval
1738                        operator.  */
1739                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1740                  || (p - 2 == pattern  &&  p == pend))
1741                goto normal_backslash;
1742
1743            handle_interval:
1744              {
1745                /* If got here, then the syntax allows intervals.  */
1746
1747                /* At least (most) this many matches must be made.  */
1748                int lower_bound = -1, upper_bound = -1;
1749
1750                beg_interval = p - 1;
1751
1752                if (p == pend)
1753                  {
1754                    if (syntax & RE_NO_BK_BRACES)
1755                      goto unfetch_interval;
1756                    else
1757                      return REG_EBRACE;
1758                  }
1759
1760                GET_UNSIGNED_NUMBER (lower_bound);
1761
1762                if (c == ',')
1763                  {
1764                    GET_UNSIGNED_NUMBER (upper_bound);
1765                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1766                  }
1767                else
1768                  /* Interval such as `{1}' => match exactly once. */
1769                  upper_bound = lower_bound;
1770
1771                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1772                    || lower_bound > upper_bound)
1773                  {
1774                    if (syntax & RE_NO_BK_BRACES)
1775                      goto unfetch_interval;
1776                    else
1777                      return REG_BADBR;
1778                  }
1779
1780                if (!(syntax & RE_NO_BK_BRACES))
1781                  {
1782                    if (c != '\\') return REG_EBRACE;
1783
1784                    PATFETCH (c);
1785                  }
1786
1787                if (c != '}')
1788                  {
1789                    if (syntax & RE_NO_BK_BRACES)
1790                      goto unfetch_interval;
1791                    else
1792                      return REG_BADBR;
1793                  }
1794
1795                /* We just parsed a valid interval.  */
1796
1797                /* If it's invalid to have no preceding re.  */
1798                if (!laststart)
1799                  {
1800                    if (syntax & RE_CONTEXT_INVALID_OPS)
1801                      return REG_BADRPT;
1802                    else if (syntax & RE_CONTEXT_INDEP_OPS)
1803                      laststart = b;
1804                    else
1805                      goto unfetch_interval;
1806                  }
1807
1808                /* If the upper bound is zero, don't want to succeed at
1809                   all; jump from `laststart' to `b + 3', which will be
1810                   the end of the buffer after we insert the jump.  */
1811                 if (upper_bound == 0)
1812                   {
1813                     GET_BUFFER_SPACE (3);
1814                     INSERT_JUMP (jump, laststart, b + 3);
1815                     b += 3;
1816                   }
1817
1818                 /* Otherwise, we have a nontrivial interval.  When
1819                    we're all done, the pattern will look like:
1820                      set_number_at <jump count> <upper bound>
1821                      set_number_at <succeed_n count> <lower bound>
1822                      succeed_n <after jump addr> <succed_n count>
1823                      <body of loop>
1824                      jump_n <succeed_n addr> <jump count>
1825                    (The upper bound and `jump_n' are omitted if
1826                    `upper_bound' is 1, though.)  */
1827                 else
1828                   { /* If the upper bound is > 1, we need to insert
1829                        more at the end of the loop.  */
1830                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
1831
1832                     GET_BUFFER_SPACE (nbytes);
1833
1834                     /* Initialize lower bound of the `succeed_n', even
1835                        though it will be set during matching by its
1836                        attendant `set_number_at' (inserted next),
1837                        because `re_compile_fastmap' needs to know.
1838                        Jump to the `jump_n' we might insert below.  */
1839                     INSERT_JUMP2 (succeed_n, laststart,
1840                                   b + 5 + (upper_bound > 1) * 5,
1841                                   lower_bound);
1842                     b += 5;
1843
1844                     /* Code to initialize the lower bound.  Insert
1845                        before the `succeed_n'.  The `5' is the last two
1846                        bytes of this `set_number_at', plus 3 bytes of
1847                        the following `succeed_n'.  */
1848                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1849                     b += 5;
1850
1851                     if (upper_bound > 1)
1852                       { /* More than one repetition is allowed, so
1853                            append a backward jump to the `succeed_n'
1854                            that starts this interval.
1855
1856                            When we've reached this during matching,
1857                            we'll have matched the interval once, so
1858                            jump back only `upper_bound - 1' times.  */
1859                         STORE_JUMP2 (jump_n, b, laststart + 5,
1860                                      upper_bound - 1);
1861                         b += 5;
1862
1863                         /* The location we want to set is the second
1864                            parameter of the `jump_n'; that is `b-2' as
1865                            an absolute address.  `laststart' will be
1866                            the `set_number_at' we're about to insert;
1867                            `laststart+3' the number to set, the source
1868                            for the relative address.  But we are
1869                            inserting into the middle of the pattern --
1870                            so everything is getting moved up by 5.
1871                            Conclusion: (b - 2) - (laststart + 3) + 5,
1872                            i.e., b - laststart.
1873
1874                            We insert this at the beginning of the loop
1875                            so that if we fail during matching, we'll
1876                            reinitialize the bounds.  */
1877                         insert_op2 (set_number_at, laststart, b - laststart,
1878                                     upper_bound - 1, b);
1879                         b += 5;
1880                       }
1881                   }
1882                pending_exact = 0;
1883                beg_interval = NULL;
1884              }
1885              break;
1886
1887            unfetch_interval:
1888              /* If an invalid interval, match the characters as literals.  */
1889               assert (beg_interval);
1890               p = beg_interval;
1891               beg_interval = NULL;
1892
1893               /* normal_char and normal_backslash need `c'.  */
1894               PATFETCH (c);
1895
1896               if (!(syntax & RE_NO_BK_BRACES))
1897                 {
1898                   if (p > pattern  &&  p[-1] == '\\')
1899                     goto normal_backslash;
1900                 }
1901               goto normal_char;
1902
1903#ifdef emacs
1904            /* There is no way to specify the before_dot and after_dot
1905               operators.  rms says this is ok.  --karl  */
1906            case '=':
1907              BUF_PUSH (at_dot);
1908              break;
1909
1910            case 's':
1911              laststart = b;
1912              PATFETCH (c);
1913              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1914              break;
1915
1916            case 'S':
1917              laststart = b;
1918              PATFETCH (c);
1919              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1920              break;
1921#endif /* emacs */
1922
1923
1924            case 'w':
1925              laststart = b;
1926              BUF_PUSH (wordchar);
1927              break;
1928
1929
1930            case 'W':
1931              laststart = b;
1932              BUF_PUSH (notwordchar);
1933              break;
1934
1935
1936            case '<':
1937              BUF_PUSH (wordbeg);
1938              break;
1939
1940            case '>':
1941              BUF_PUSH (wordend);
1942              break;
1943
1944            case 'b':
1945              BUF_PUSH (wordbound);
1946              break;
1947
1948            case 'B':
1949              BUF_PUSH (notwordbound);
1950              break;
1951
1952            case '`':
1953              BUF_PUSH (begbuf);
1954              break;
1955
1956            case '\'':
1957              BUF_PUSH (endbuf);
1958              break;
1959
1960            case '1': case '2': case '3': case '4': case '5':
1961            case '6': case '7': case '8': case '9':
1962              if (syntax & RE_NO_BK_REFS)
1963                goto normal_char;
1964
1965              c1 = c - '0';
1966
1967              if (c1 > regnum)
1968                return REG_ESUBREG;
1969
1970              /* Can't back reference to a subexpression if inside of it.  */
1971              if (group_in_compile_stack (compile_stack, c1))
1972                goto normal_char;
1973
1974              laststart = b;
1975              BUF_PUSH_2 (duplicate, c1);
1976              break;
1977
1978
1979            case '+':
1980            case '?':
1981              if (syntax & RE_BK_PLUS_QM)
1982                goto handle_plus;
1983              else
1984                goto normal_backslash;
1985
1986            default:
1987            normal_backslash:
1988              /* You might think it would be useful for \ to mean
1989                 not to translate; but if we don't translate it
1990                 it will never match anything.  */
1991              c = TRANSLATE (c);
1992              goto normal_char;
1993            }
1994          break;
1995
1996
1997	default:
1998        /* Expects the character in `c'.  */
1999	normal_char:
2000	      /* If no exactn currently being built.  */
2001          if (!pending_exact
2002
2003              /* If last exactn not at current position.  */
2004              || pending_exact + *pending_exact + 1 != b
2005
2006              /* We have only one byte following the exactn for the count.  */
2007	      || *pending_exact == (1 << BYTEWIDTH) - 1
2008
2009              /* If followed by a repetition operator.  */
2010              || *p == '*' || *p == '^'
2011	      || ((syntax & RE_BK_PLUS_QM)
2012		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2013		  : (*p == '+' || *p == '?'))
2014	      || ((syntax & RE_INTERVALS)
2015                  && ((syntax & RE_NO_BK_BRACES)
2016		      ? *p == '{'
2017                      : (p[0] == '\\' && p[1] == '{'))))
2018	    {
2019	      /* Start building a new exactn.  */
2020
2021              laststart = b;
2022
2023	      BUF_PUSH_2 (exactn, 0);
2024	      pending_exact = b - 1;
2025            }
2026
2027	  BUF_PUSH (c);
2028          (*pending_exact)++;
2029	  break;
2030        } /* switch (c) */
2031    } /* while p != pend */
2032
2033
2034  /* Through the pattern now.  */
2035
2036  if (fixup_alt_jump)
2037    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2038
2039  if (!COMPILE_STACK_EMPTY)
2040    return REG_EPAREN;
2041
2042  free (compile_stack.stack);
2043
2044  /* We have succeeded; set the length of the buffer.  */
2045  bufp->used = b - bufp->buffer;
2046
2047#ifdef DEBUG
2048  if (debug)
2049    {
2050      DEBUG_PRINT1 ("\nCompiled pattern: ");
2051      print_compiled_pattern (bufp);
2052    }
2053#endif /* DEBUG */
2054
2055  return REG_NOERROR;
2056} /* regex_compile */
2057
2058/* Subroutines for `regex_compile'.  */
2059
2060/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2061
2062static void
2063store_op1 (op, loc, arg)
2064    re_opcode_t op;
2065    unsigned char *loc;
2066    int arg;
2067{
2068  *loc = (unsigned char) op;
2069  STORE_NUMBER (loc + 1, arg);
2070}
2071
2072
2073/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2074
2075static void
2076store_op2 (op, loc, arg1, arg2)
2077    re_opcode_t op;
2078    unsigned char *loc;
2079    int arg1, arg2;
2080{
2081  *loc = (unsigned char) op;
2082  STORE_NUMBER (loc + 1, arg1);
2083  STORE_NUMBER (loc + 3, arg2);
2084}
2085
2086
2087/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2088   for OP followed by two-byte integer parameter ARG.  */
2089
2090static void
2091insert_op1 (op, loc, arg, end)
2092    re_opcode_t op;
2093    unsigned char *loc;
2094    int arg;
2095    unsigned char *end;
2096{
2097  register unsigned char *pfrom = end;
2098  register unsigned char *pto = end + 3;
2099
2100  while (pfrom != loc)
2101    *--pto = *--pfrom;
2102
2103  store_op1 (op, loc, arg);
2104}
2105
2106
2107/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2108
2109static void
2110insert_op2 (op, loc, arg1, arg2, end)
2111    re_opcode_t op;
2112    unsigned char *loc;
2113    int arg1, arg2;
2114    unsigned char *end;
2115{
2116  register unsigned char *pfrom = end;
2117  register unsigned char *pto = end + 5;
2118
2119  while (pfrom != loc)
2120    *--pto = *--pfrom;
2121
2122  store_op2 (op, loc, arg1, arg2);
2123}
2124
2125
2126/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2127   after an alternative or a begin-subexpression.  We assume there is at
2128   least one character before the ^.  */
2129
2130static boolean
2131at_begline_loc_p (pattern, p, syntax)
2132    const char *pattern, *p;
2133    reg_syntax_t syntax;
2134{
2135  const char *prev = p - 2;
2136  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2137
2138  return
2139       /* After a subexpression?  */
2140       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2141       /* After an alternative?  */
2142    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2143}
2144
2145
2146/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2147   at least one character after the $, i.e., `P < PEND'.  */
2148
2149static boolean
2150at_endline_loc_p (p, pend, syntax)
2151    const char *p, *pend;
2152    int syntax;
2153{
2154  const char *next = p;
2155  boolean next_backslash = *next == '\\';
2156  const char *next_next = p + 1 < pend ? p + 1 : NULL;
2157
2158  return
2159       /* Before a subexpression?  */
2160       (syntax & RE_NO_BK_PARENS ? *next == ')'
2161        : next_backslash && next_next && *next_next == ')')
2162       /* Before an alternative?  */
2163    || (syntax & RE_NO_BK_VBAR ? *next == '|'
2164        : next_backslash && next_next && *next_next == '|');
2165}
2166
2167
2168/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2169   false if it's not.  */
2170
2171static boolean
2172group_in_compile_stack (compile_stack, regnum)
2173    compile_stack_type compile_stack;
2174    regnum_t regnum;
2175{
2176  int this_element;
2177
2178  for (this_element = compile_stack.avail - 1;
2179       this_element >= 0;
2180       this_element--)
2181    if (compile_stack.stack[this_element].regnum == regnum)
2182      return true;
2183
2184  return false;
2185}
2186
2187
2188/* Read the ending character of a range (in a bracket expression) from the
2189   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2190   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2191   Then we set the translation of all bits between the starting and
2192   ending characters (inclusive) in the compiled pattern B.
2193
2194   Return an error code.
2195
2196   We use these short variable names so we can use the same macros as
2197   `regex_compile' itself.  */
2198
2199static reg_errcode_t
2200compile_range (p_ptr, pend, translate, syntax, b)
2201    const char **p_ptr, *pend;
2202    char *translate;
2203    reg_syntax_t syntax;
2204    unsigned char *b;
2205{
2206  unsigned this_char;
2207
2208  const char *p = *p_ptr;
2209  int range_start, range_end;
2210
2211  if (p == pend)
2212    return REG_ERANGE;
2213
2214  /* Even though the pattern is a signed `char *', we need to fetch
2215     with unsigned char *'s; if the high bit of the pattern character
2216     is set, the range endpoints will be negative if we fetch using a
2217     signed char *.
2218
2219     We also want to fetch the endpoints without translating them; the
2220     appropriate translation is done in the bit-setting loop below.  */
2221  range_start = ((unsigned char *) p)[-2];
2222  range_end   = ((unsigned char *) p)[0];
2223
2224  /* Have to increment the pointer into the pattern string, so the
2225     caller isn't still at the ending character.  */
2226  (*p_ptr)++;
2227
2228  /* If the start is after the end, the range is empty.  */
2229  if (range_start > range_end)
2230    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2231
2232  /* Here we see why `this_char' has to be larger than an `unsigned
2233     char' -- the range is inclusive, so if `range_end' == 0xff
2234     (assuming 8-bit characters), we would otherwise go into an infinite
2235     loop, since all characters <= 0xff.  */
2236  for (this_char = range_start; this_char <= range_end; this_char++)
2237    {
2238      SET_LIST_BIT (TRANSLATE (this_char));
2239    }
2240
2241  return REG_NOERROR;
2242}
2243
2244/* Failure stack declarations and macros; both re_compile_fastmap and
2245   re_match_2 use a failure stack.  These have to be macros because of
2246   REGEX_ALLOCATE.  */
2247
2248
2249/* Number of failure points for which to initially allocate space
2250   when matching.  If this number is exceeded, we allocate more
2251   space, so it is not a hard limit.  */
2252#ifndef INIT_FAILURE_ALLOC
2253#define INIT_FAILURE_ALLOC 5
2254#endif
2255
2256/* Roughly the maximum number of failure points on the stack.  Would be
2257   exactly that if always used MAX_FAILURE_SPACE each time we failed.
2258   This is a variable only so users of regex can assign to it; we never
2259   change it ourselves.  */
2260int re_max_failures = 2000;
2261
2262typedef const unsigned char *fail_stack_elt_t;
2263
2264typedef struct
2265{
2266  fail_stack_elt_t *stack;
2267  unsigned size;
2268  unsigned avail;			/* Offset of next open position.  */
2269} fail_stack_type;
2270
2271#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
2272#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2273#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
2274#define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
2275
2276
2277/* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2278
2279#define INIT_FAIL_STACK()						\
2280  do {									\
2281    fail_stack.stack = (fail_stack_elt_t *)				\
2282      REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
2283									\
2284    if (fail_stack.stack == NULL)					\
2285      return -2;							\
2286									\
2287    fail_stack.size = INIT_FAILURE_ALLOC;				\
2288    fail_stack.avail = 0;						\
2289  } while (0)
2290
2291
2292/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2293
2294   Return 1 if succeeds, and 0 if either ran out of memory
2295   allocating space for it or it was already too large.
2296
2297   REGEX_REALLOCATE requires `destination' be declared.   */
2298
2299#define DOUBLE_FAIL_STACK(fail_stack)					\
2300  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
2301   ? 0									\
2302   : ((fail_stack).stack = (fail_stack_elt_t *)				\
2303        REGEX_REALLOCATE ((fail_stack).stack, 				\
2304          (fail_stack).size * sizeof (fail_stack_elt_t),		\
2305          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
2306									\
2307      (fail_stack).stack == NULL					\
2308      ? 0								\
2309      : ((fail_stack).size <<= 1, 					\
2310         1)))
2311
2312
2313/* Push PATTERN_OP on FAIL_STACK.
2314
2315   Return 1 if was able to do so and 0 if ran out of memory allocating
2316   space to do so.  */
2317#define PUSH_PATTERN_OP(pattern_op, fail_stack)				\
2318  ((FAIL_STACK_FULL ()							\
2319    && !DOUBLE_FAIL_STACK (fail_stack))					\
2320    ? 0									\
2321    : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,		\
2322       1))
2323
2324/* This pushes an item onto the failure stack.  Must be a four-byte
2325   value.  Assumes the variable `fail_stack'.  Probably should only
2326   be called from within `PUSH_FAILURE_POINT'.  */
2327#define PUSH_FAILURE_ITEM(item)						\
2328  fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2329
2330/* The complement operation.  Assumes `fail_stack' is nonempty.  */
2331#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2332
2333/* Used to omit pushing failure point id's when we're not debugging.  */
2334#ifdef DEBUG
2335#define DEBUG_PUSH PUSH_FAILURE_ITEM
2336#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2337#else
2338#define DEBUG_PUSH(item)
2339#define DEBUG_POP(item_addr)
2340#endif
2341
2342
2343/* Push the information about the state we will need
2344   if we ever fail back to it.
2345
2346   Requires variables fail_stack, regstart, regend, reg_info, and
2347   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2348   declared.
2349
2350   Does `return FAILURE_CODE' if runs out of memory.  */
2351
2352#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
2353  do {									\
2354    char *destination;							\
2355    /* Must be int, so when we don't save any registers, the arithmetic	\
2356       of 0 + -1 isn't done as unsigned.  */				\
2357    int this_reg;							\
2358    									\
2359    DEBUG_STATEMENT (failure_id++);					\
2360    DEBUG_STATEMENT (nfailure_points_pushed++);				\
2361    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
2362    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2363    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2364									\
2365    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
2366    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
2367									\
2368    /* Ensure we have enough space allocated for what we will push.  */	\
2369    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
2370      {									\
2371        if (!DOUBLE_FAIL_STACK (fail_stack))			\
2372          return failure_code;						\
2373									\
2374        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
2375		       (fail_stack).size);				\
2376        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2377      }									\
2378									\
2379    /* Push the info, starting with the registers.  */			\
2380    DEBUG_PRINT1 ("\n");						\
2381									\
2382    for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;	\
2383         this_reg++)							\
2384      {									\
2385	DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);			\
2386        DEBUG_STATEMENT (num_regs_pushed++);				\
2387									\
2388	DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);		\
2389        PUSH_FAILURE_ITEM (regstart[this_reg]);				\
2390                                                                        \
2391	DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
2392        PUSH_FAILURE_ITEM (regend[this_reg]);				\
2393									\
2394	DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
2395        DEBUG_PRINT2 (" match_null=%d",					\
2396                      REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
2397        DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
2398        DEBUG_PRINT2 (" matched_something=%d",				\
2399                      MATCHED_SOMETHING (reg_info[this_reg]));		\
2400        DEBUG_PRINT2 (" ever_matched=%d",				\
2401                      EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
2402	DEBUG_PRINT1 ("\n");						\
2403        PUSH_FAILURE_ITEM (reg_info[this_reg].word);			\
2404      }									\
2405									\
2406    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2407    PUSH_FAILURE_ITEM (lowest_active_reg);				\
2408									\
2409    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2410    PUSH_FAILURE_ITEM (highest_active_reg);				\
2411									\
2412    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
2413    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
2414    PUSH_FAILURE_ITEM (pattern_place);					\
2415									\
2416    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
2417    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2418				 size2);				\
2419    DEBUG_PRINT1 ("'\n");						\
2420    PUSH_FAILURE_ITEM (string_place);					\
2421									\
2422    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
2423    DEBUG_PUSH (failure_id);						\
2424  } while (0)
2425
2426/* This is the number of items that are pushed and popped on the stack
2427   for each register.  */
2428#define NUM_REG_ITEMS  3
2429
2430/* Individual items aside from the registers.  */
2431#ifdef DEBUG
2432#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
2433#else
2434#define NUM_NONREG_ITEMS 4
2435#endif
2436
2437/* We push at most this many items on the stack.  */
2438#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2439
2440/* We actually push this many items.  */
2441#define NUM_FAILURE_ITEMS						\
2442  ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
2443    + NUM_NONREG_ITEMS)
2444
2445/* How many items can still be added to the stack without overflowing it.  */
2446#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2447
2448
2449/* Pops what PUSH_FAIL_STACK pushes.
2450
2451   We restore into the parameters, all of which should be lvalues:
2452     STR -- the saved data position.
2453     PAT -- the saved pattern position.
2454     LOW_REG, HIGH_REG -- the highest and lowest active registers.
2455     REGSTART, REGEND -- arrays of string positions.
2456     REG_INFO -- array of information about each subexpression.
2457
2458   Also assumes the variables `fail_stack' and (if debugging), `bufp',
2459   `pend', `string1', `size1', `string2', and `size2'.  */
2460
2461#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2462{									\
2463  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
2464  int this_reg;								\
2465  const unsigned char *string_temp;					\
2466									\
2467  assert (!FAIL_STACK_EMPTY ());					\
2468									\
2469  /* Remove failure points and point to how many regs pushed.  */	\
2470  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
2471  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
2472  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
2473									\
2474  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
2475									\
2476  DEBUG_POP (&failure_id);						\
2477  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
2478									\
2479  /* If the saved string location is NULL, it came from an		\
2480     on_failure_keep_string_jump opcode, and we want to throw away the	\
2481     saved NULL, thus retaining our current position in the string.  */	\
2482  string_temp = POP_FAILURE_ITEM ();					\
2483  if (string_temp != NULL)						\
2484    str = (const char *) string_temp;					\
2485									\
2486  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
2487  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
2488  DEBUG_PRINT1 ("'\n");							\
2489									\
2490  pat = (unsigned char *) POP_FAILURE_ITEM ();				\
2491  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
2492  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
2493									\
2494  /* Restore register info.  */						\
2495  high_reg = (unsigned) POP_FAILURE_ITEM ();				\
2496  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
2497									\
2498  low_reg = (unsigned) POP_FAILURE_ITEM ();				\
2499  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
2500									\
2501  for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
2502    {									\
2503      DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);			\
2504									\
2505      reg_info[this_reg].word = POP_FAILURE_ITEM ();			\
2506      DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);		\
2507									\
2508      regend[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2509      DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
2510									\
2511      regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2512      DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);		\
2513    }									\
2514									\
2515  DEBUG_STATEMENT (nfailure_points_popped++);				\
2516} /* POP_FAILURE_POINT */
2517
2518/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2519   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2520   characters can start a string that matches the pattern.  This fastmap
2521   is used by re_search to skip quickly over impossible starting points.
2522
2523   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2524   area as BUFP->fastmap.
2525
2526   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2527   the pattern buffer.
2528
2529   Returns 0 if we succeed, -2 if an internal error.   */
2530
2531int
2532re_compile_fastmap (bufp)
2533     struct re_pattern_buffer *bufp;
2534{
2535  int j, k;
2536  fail_stack_type fail_stack;
2537#ifndef REGEX_MALLOC
2538  char *destination;
2539#endif
2540  /* We don't push any register information onto the failure stack.  */
2541  unsigned num_regs = 0;
2542
2543  register char *fastmap = bufp->fastmap;
2544  unsigned char *pattern = bufp->buffer;
2545  unsigned long size = bufp->used;
2546  const unsigned char *p = pattern;
2547  register unsigned char *pend = pattern + size;
2548
2549  /* Assume that each path through the pattern can be null until
2550     proven otherwise.  We set this false at the bottom of switch
2551     statement, to which we get only if a particular path doesn't
2552     match the empty string.  */
2553  boolean path_can_be_null = true;
2554
2555  /* We aren't doing a `succeed_n' to begin with.  */
2556  boolean succeed_n_p = false;
2557
2558  assert (fastmap != NULL && p != NULL);
2559
2560  INIT_FAIL_STACK ();
2561  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2562  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
2563  bufp->can_be_null = 0;
2564
2565  while (p != pend || !FAIL_STACK_EMPTY ())
2566    {
2567      if (p == pend)
2568        {
2569          bufp->can_be_null |= path_can_be_null;
2570
2571          /* Reset for next path.  */
2572          path_can_be_null = true;
2573
2574          p = fail_stack.stack[--fail_stack.avail];
2575	}
2576
2577      /* We should never be about to go beyond the end of the pattern.  */
2578      assert (p < pend);
2579
2580#ifdef SWITCH_ENUM_BUG
2581      switch ((int) ((re_opcode_t) *p++))
2582#else
2583      switch ((re_opcode_t) *p++)
2584#endif
2585	{
2586
2587        /* I guess the idea here is to simply not bother with a fastmap
2588           if a backreference is used, since it's too hard to figure out
2589           the fastmap for the corresponding group.  Setting
2590           `can_be_null' stops `re_search_2' from using the fastmap, so
2591           that is all we do.  */
2592	case duplicate:
2593	  bufp->can_be_null = 1;
2594          return 0;
2595
2596
2597      /* Following are the cases which match a character.  These end
2598         with `break'.  */
2599
2600	case exactn:
2601          fastmap[p[1]] = 1;
2602	  break;
2603
2604
2605        case charset:
2606          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2607	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2608              fastmap[j] = 1;
2609	  break;
2610
2611
2612	case charset_not:
2613	  /* Chars beyond end of map must be allowed.  */
2614	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2615            fastmap[j] = 1;
2616
2617	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2618	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2619              fastmap[j] = 1;
2620          break;
2621
2622
2623	case wordchar:
2624	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2625	    if (SYNTAX (j) == Sword)
2626	      fastmap[j] = 1;
2627	  break;
2628
2629
2630	case notwordchar:
2631	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2632	    if (SYNTAX (j) != Sword)
2633	      fastmap[j] = 1;
2634	  break;
2635
2636
2637        case anychar:
2638          /* `.' matches anything ...  */
2639	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2640            fastmap[j] = 1;
2641
2642          /* ... except perhaps newline.  */
2643          if (!(bufp->syntax & RE_DOT_NEWLINE))
2644            fastmap['\n'] = 0;
2645
2646          /* Return if we have already set `can_be_null'; if we have,
2647             then the fastmap is irrelevant.  Something's wrong here.  */
2648	  else if (bufp->can_be_null)
2649	    return 0;
2650
2651          /* Otherwise, have to check alternative paths.  */
2652	  break;
2653
2654
2655#ifdef emacs
2656        case syntaxspec:
2657	  k = *p++;
2658	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2659	    if (SYNTAX (j) == (enum syntaxcode) k)
2660	      fastmap[j] = 1;
2661	  break;
2662
2663
2664	case notsyntaxspec:
2665	  k = *p++;
2666	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2667	    if (SYNTAX (j) != (enum syntaxcode) k)
2668	      fastmap[j] = 1;
2669	  break;
2670
2671
2672      /* All cases after this match the empty string.  These end with
2673         `continue'.  */
2674
2675
2676	case before_dot:
2677	case at_dot:
2678	case after_dot:
2679          continue;
2680#endif /* not emacs */
2681
2682
2683        case no_op:
2684        case begline:
2685        case endline:
2686	case begbuf:
2687	case endbuf:
2688	case wordbound:
2689	case notwordbound:
2690	case wordbeg:
2691	case wordend:
2692        case push_dummy_failure:
2693          continue;
2694
2695
2696	case jump_n:
2697        case pop_failure_jump:
2698	case maybe_pop_jump:
2699	case jump:
2700        case jump_past_alt:
2701	case dummy_failure_jump:
2702          EXTRACT_NUMBER_AND_INCR (j, p);
2703	  p += j;
2704	  if (j > 0)
2705	    continue;
2706
2707          /* Jump backward implies we just went through the body of a
2708             loop and matched nothing.  Opcode jumped to should be
2709             `on_failure_jump' or `succeed_n'.  Just treat it like an
2710             ordinary jump.  For a * loop, it has pushed its failure
2711             point already; if so, discard that as redundant.  */
2712          if ((re_opcode_t) *p != on_failure_jump
2713	      && (re_opcode_t) *p != succeed_n)
2714	    continue;
2715
2716          p++;
2717          EXTRACT_NUMBER_AND_INCR (j, p);
2718          p += j;
2719
2720          /* If what's on the stack is where we are now, pop it.  */
2721          if (!FAIL_STACK_EMPTY ()
2722	      && fail_stack.stack[fail_stack.avail - 1] == p)
2723            fail_stack.avail--;
2724
2725          continue;
2726
2727
2728        case on_failure_jump:
2729        case on_failure_keep_string_jump:
2730	handle_on_failure_jump:
2731          EXTRACT_NUMBER_AND_INCR (j, p);
2732
2733          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2734             end of the pattern.  We don't want to push such a point,
2735             since when we restore it above, entering the switch will
2736             increment `p' past the end of the pattern.  We don't need
2737             to push such a point since we obviously won't find any more
2738             fastmap entries beyond `pend'.  Such a pattern can match
2739             the null string, though.  */
2740          if (p + j < pend)
2741            {
2742              if (!PUSH_PATTERN_OP (p + j, fail_stack))
2743                return -2;
2744            }
2745          else
2746            bufp->can_be_null = 1;
2747
2748          if (succeed_n_p)
2749            {
2750              EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
2751              succeed_n_p = false;
2752	    }
2753
2754          continue;
2755
2756
2757	case succeed_n:
2758          /* Get to the number of times to succeed.  */
2759          p += 2;
2760
2761          /* Increment p past the n for when k != 0.  */
2762          EXTRACT_NUMBER_AND_INCR (k, p);
2763          if (k == 0)
2764	    {
2765              p -= 4;
2766  	      succeed_n_p = true;  /* Spaghetti code alert.  */
2767              goto handle_on_failure_jump;
2768            }
2769          continue;
2770
2771
2772	case set_number_at:
2773          p += 4;
2774          continue;
2775
2776
2777	case start_memory:
2778        case stop_memory:
2779	  p += 2;
2780	  continue;
2781
2782
2783	default:
2784          abort (); /* We have listed all the cases.  */
2785        } /* switch *p++ */
2786
2787      /* Getting here means we have found the possible starting
2788         characters for one path of the pattern -- and that the empty
2789         string does not match.  We need not follow this path further.
2790         Instead, look at the next alternative (remembered on the
2791         stack), or quit if no more.  The test at the top of the loop
2792         does these things.  */
2793      path_can_be_null = false;
2794      p = pend;
2795    } /* while p */
2796
2797  /* Set `can_be_null' for the last path (also the first path, if the
2798     pattern is empty).  */
2799  bufp->can_be_null |= path_can_be_null;
2800  return 0;
2801} /* re_compile_fastmap */
2802
2803/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2804   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
2805   this memory for recording register information.  STARTS and ENDS
2806   must be allocated using the malloc library routine, and must each
2807   be at least NUM_REGS * sizeof (regoff_t) bytes long.
2808
2809   If NUM_REGS == 0, then subsequent matches should allocate their own
2810   register data.
2811
2812   Unless this function is called, the first search or match using
2813   PATTERN_BUFFER will allocate its own register data, without
2814   freeing the old data.  */
2815
2816void
2817re_set_registers (bufp, regs, num_regs, starts, ends)
2818    struct re_pattern_buffer *bufp;
2819    struct re_registers *regs;
2820    unsigned num_regs;
2821    regoff_t *starts, *ends;
2822{
2823  if (num_regs)
2824    {
2825      bufp->regs_allocated = REGS_REALLOCATE;
2826      regs->num_regs = num_regs;
2827      regs->start = starts;
2828      regs->end = ends;
2829    }
2830  else
2831    {
2832      bufp->regs_allocated = REGS_UNALLOCATED;
2833      regs->num_regs = 0;
2834      regs->start = regs->end = (regoff_t) 0;
2835    }
2836}
2837
2838/* Searching routines.  */
2839
2840/* Like re_search_2, below, but only one string is specified, and
2841   doesn't let you say where to stop matching. */
2842
2843int
2844re_search (bufp, string, size, startpos, range, regs)
2845     struct re_pattern_buffer *bufp;
2846     const char *string;
2847     int size, startpos, range;
2848     struct re_registers *regs;
2849{
2850  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2851		      regs, size);
2852}
2853
2854
2855/* Using the compiled pattern in BUFP->buffer, first tries to match the
2856   virtual concatenation of STRING1 and STRING2, starting first at index
2857   STARTPOS, then at STARTPOS + 1, and so on.
2858
2859   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2860
2861   RANGE is how far to scan while trying to match.  RANGE = 0 means try
2862   only at STARTPOS; in general, the last start tried is STARTPOS +
2863   RANGE.
2864
2865   In REGS, return the indices of the virtual concatenation of STRING1
2866   and STRING2 that matched the entire BUFP->buffer and its contained
2867   subexpressions.
2868
2869   Do not consider matching one past the index STOP in the virtual
2870   concatenation of STRING1 and STRING2.
2871
2872   We return either the position in the strings at which the match was
2873   found, -1 if no match, or -2 if error (such as failure
2874   stack overflow).  */
2875
2876int
2877re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2878     struct re_pattern_buffer *bufp;
2879     const char *string1, *string2;
2880     int size1, size2;
2881     int startpos;
2882     int range;
2883     struct re_registers *regs;
2884     int stop;
2885{
2886  int val;
2887  register char *fastmap = bufp->fastmap;
2888  register char *translate = bufp->translate;
2889  int total_size = size1 + size2;
2890  int endpos = startpos + range;
2891
2892  /* Check for out-of-range STARTPOS.  */
2893  if (startpos < 0 || startpos > total_size)
2894    return -1;
2895
2896  /* Fix up RANGE if it might eventually take us outside
2897     the virtual concatenation of STRING1 and STRING2.  */
2898  if (endpos < -1)
2899    range = -1 - startpos;
2900  else if (endpos > total_size)
2901    range = total_size - startpos;
2902
2903  /* If the search isn't to be a backwards one, don't waste time in a
2904     search for a pattern that must be anchored.  */
2905  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2906    {
2907      if (startpos > 0)
2908	return -1;
2909      else
2910	range = 1;
2911    }
2912
2913  /* Update the fastmap now if not correct already.  */
2914  if (fastmap && !bufp->fastmap_accurate)
2915    if (re_compile_fastmap (bufp) == -2)
2916      return -2;
2917
2918  /* Loop through the string, looking for a place to start matching.  */
2919  for (;;)
2920    {
2921      /* If a fastmap is supplied, skip quickly over characters that
2922         cannot be the start of a match.  If the pattern can match the
2923         null string, however, we don't need to skip characters; we want
2924         the first null string.  */
2925      if (fastmap && startpos < total_size && !bufp->can_be_null)
2926	{
2927	  if (range > 0)	/* Searching forwards.  */
2928	    {
2929	      register const char *d;
2930	      register int lim = 0;
2931	      int irange = range;
2932
2933              if (startpos < size1 && startpos + range >= size1)
2934                lim = range - (size1 - startpos);
2935
2936	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2937
2938              /* Written out as an if-else to avoid testing `translate'
2939                 inside the loop.  */
2940	      if (translate)
2941                while (range > lim
2942                       && !fastmap[(unsigned char)
2943				   translate[(unsigned char) *d++]])
2944                  range--;
2945	      else
2946                while (range > lim && !fastmap[(unsigned char) *d++])
2947                  range--;
2948
2949	      startpos += irange - range;
2950	    }
2951	  else				/* Searching backwards.  */
2952	    {
2953	      register char c = (size1 == 0 || startpos >= size1
2954                                 ? string2[startpos - size1]
2955                                 : string1[startpos]);
2956
2957	      if (!fastmap[(unsigned char) TRANSLATE (c)])
2958		goto advance;
2959	    }
2960	}
2961
2962      /* If can't match the null string, and that's all we have left, fail.  */
2963      if (range >= 0 && startpos == total_size && fastmap
2964          && !bufp->can_be_null)
2965	return -1;
2966
2967      val = re_match_2 (bufp, string1, size1, string2, size2,
2968	                startpos, regs, stop);
2969      if (val >= 0)
2970	return startpos;
2971
2972      if (val == -2)
2973	return -2;
2974
2975    advance:
2976      if (!range)
2977        break;
2978      else if (range > 0)
2979        {
2980          range--;
2981          startpos++;
2982        }
2983      else
2984        {
2985          range++;
2986          startpos--;
2987        }
2988    }
2989  return -1;
2990} /* re_search_2 */
2991
2992/* Declarations and macros for re_match_2.  */
2993
2994static int bcmp_translate ();
2995static boolean alt_match_null_string_p (),
2996               common_op_match_null_string_p (),
2997               group_match_null_string_p ();
2998
2999/* Structure for per-register (a.k.a. per-group) information.
3000   This must not be longer than one word, because we push this value
3001   onto the failure stack.  Other register information, such as the
3002   starting and ending positions (which are addresses), and the list of
3003   inner groups (which is a bits list) are maintained in separate
3004   variables.
3005
3006   We are making a (strictly speaking) nonportable assumption here: that
3007   the compiler will pack our bit fields into something that fits into
3008   the type of `word', i.e., is something that fits into one item on the
3009   failure stack.  */
3010typedef union
3011{
3012  fail_stack_elt_t word;
3013  struct
3014  {
3015      /* This field is one if this group can match the empty string,
3016         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
3017#define MATCH_NULL_UNSET_VALUE 3
3018    unsigned match_null_string_p : 2;
3019    unsigned is_active : 1;
3020    unsigned matched_something : 1;
3021    unsigned ever_matched_something : 1;
3022  } bits;
3023} register_info_type;
3024
3025#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
3026#define IS_ACTIVE(R)  ((R).bits.is_active)
3027#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
3028#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
3029
3030
3031/* Call this when have matched a real character; it sets `matched' flags
3032   for the subexpressions which we are currently inside.  Also records
3033   that those subexprs have matched.  */
3034#define SET_REGS_MATCHED()						\
3035  do									\
3036    {									\
3037      unsigned r;							\
3038      for (r = lowest_active_reg; r <= highest_active_reg; r++)		\
3039        {								\
3040          MATCHED_SOMETHING (reg_info[r])				\
3041            = EVER_MATCHED_SOMETHING (reg_info[r])			\
3042            = 1;							\
3043        }								\
3044    }									\
3045  while (0)
3046
3047
3048/* This converts PTR, a pointer into one of the search strings `string1'
3049   and `string2' into an offset from the beginning of that string.  */
3050#define POINTER_TO_OFFSET(ptr)						\
3051  (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3052
3053/* Registers are set to a sentinel when they haven't yet matched.  */
3054#define REG_UNSET_VALUE ((char *) -1)
3055#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3056
3057
3058/* Macros for dealing with the split strings in re_match_2.  */
3059
3060#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3061
3062/* Call before fetching a character with *d.  This switches over to
3063   string2 if necessary.  */
3064#define PREFETCH()							\
3065  while (d == dend)						    	\
3066    {									\
3067      /* End of string2 => fail.  */					\
3068      if (dend == end_match_2) 						\
3069        goto fail;							\
3070      /* End of string1 => advance to string2.  */ 			\
3071      d = string2;						        \
3072      dend = end_match_2;						\
3073    }
3074
3075
3076/* Test if at very beginning or at very end of the virtual concatenation
3077   of `string1' and `string2'.  If only one string, it's `string2'.  */
3078#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3079#define AT_STRINGS_END(d) ((d) == end2)
3080
3081
3082/* Test if D points to a character which is word-constituent.  We have
3083   two special cases to check for: if past the end of string1, look at
3084   the first character in string2; and if before the beginning of
3085   string2, look at the last character in string1.  */
3086#define WORDCHAR_P(d)							\
3087  (SYNTAX ((d) == end1 ? *string2					\
3088           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3089   == Sword)
3090
3091/* Test if the character before D and the one at D differ with respect
3092   to being word-constituent.  */
3093#define AT_WORD_BOUNDARY(d)						\
3094  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3095   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3096
3097
3098/* Free everything we malloc.  */
3099#ifdef REGEX_MALLOC
3100#define FREE_VAR(var) if (var) free (var); var = NULL
3101#define FREE_VARIABLES()						\
3102  do {									\
3103    FREE_VAR (fail_stack.stack);					\
3104    FREE_VAR (regstart);						\
3105    FREE_VAR (regend);							\
3106    FREE_VAR (old_regstart);						\
3107    FREE_VAR (old_regend);						\
3108    FREE_VAR (best_regstart);						\
3109    FREE_VAR (best_regend);						\
3110    FREE_VAR (reg_info);						\
3111    FREE_VAR (reg_dummy);						\
3112    FREE_VAR (reg_info_dummy);						\
3113  } while (0)
3114#else /* not REGEX_MALLOC */
3115/* Some MIPS systems (at least) want this to free alloca'd storage.  */
3116#define FREE_VARIABLES() alloca (0)
3117#endif /* not REGEX_MALLOC */
3118
3119
3120/* These values must meet several constraints.  They must not be valid
3121   register values; since we have a limit of 255 registers (because
3122   we use only one byte in the pattern for the register number), we can
3123   use numbers larger than 255.  They must differ by 1, because of
3124   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3125   be larger than the value for the highest register, so we do not try
3126   to actually save any registers when none are active.  */
3127#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3128#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3129
3130/* Matching routines.  */
3131
3132#ifndef emacs   /* Emacs never uses this.  */
3133/* re_match is like re_match_2 except it takes only a single string.  */
3134
3135int
3136re_match (bufp, string, size, pos, regs)
3137     struct re_pattern_buffer *bufp;
3138     const char *string;
3139     int size, pos;
3140     struct re_registers *regs;
3141 {
3142  return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3143}
3144#endif /* not emacs */
3145
3146
3147/* re_match_2 matches the compiled pattern in BUFP against the
3148   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3149   and SIZE2, respectively).  We start matching at POS, and stop
3150   matching at STOP.
3151
3152   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3153   store offsets for the substring each group matched in REGS.  See the
3154   documentation for exactly how many groups we fill.
3155
3156   We return -1 if no match, -2 if an internal error (such as the
3157   failure stack overflowing).  Otherwise, we return the length of the
3158   matched substring.  */
3159
3160int
3161re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3162     struct re_pattern_buffer *bufp;
3163     const char *string1, *string2;
3164     int size1, size2;
3165     int pos;
3166     struct re_registers *regs;
3167     int stop;
3168{
3169  /* General temporaries.  */
3170  int mcnt;
3171  unsigned char *p1;
3172
3173  /* Just past the end of the corresponding string.  */
3174  const char *end1, *end2;
3175
3176  /* Pointers into string1 and string2, just past the last characters in
3177     each to consider matching.  */
3178  const char *end_match_1, *end_match_2;
3179
3180  /* Where we are in the data, and the end of the current string.  */
3181  const char *d, *dend;
3182
3183  /* Where we are in the pattern, and the end of the pattern.  */
3184  unsigned char *p = bufp->buffer;
3185  register unsigned char *pend = p + bufp->used;
3186
3187  /* We use this to map every character in the string.  */
3188  char *translate = bufp->translate;
3189
3190  /* Failure point stack.  Each place that can handle a failure further
3191     down the line pushes a failure point on this stack.  It consists of
3192     restart, regend, and reg_info for all registers corresponding to
3193     the subexpressions we're currently inside, plus the number of such
3194     registers, and, finally, two char *'s.  The first char * is where
3195     to resume scanning the pattern; the second one is where to resume
3196     scanning the strings.  If the latter is zero, the failure point is
3197     a ``dummy''; if a failure happens and the failure point is a dummy,
3198     it gets discarded and the next next one is tried.  */
3199  fail_stack_type fail_stack;
3200#ifdef DEBUG
3201  static unsigned failure_id = 0;
3202  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3203#endif
3204
3205  /* We fill all the registers internally, independent of what we
3206     return, for use in backreferences.  The number here includes
3207     an element for register zero.  */
3208  unsigned num_regs = bufp->re_nsub + 1;
3209
3210  /* The currently active registers.  */
3211  unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3212  unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3213
3214  /* Information on the contents of registers. These are pointers into
3215     the input strings; they record just what was matched (on this
3216     attempt) by a subexpression part of the pattern, that is, the
3217     regnum-th regstart pointer points to where in the pattern we began
3218     matching and the regnum-th regend points to right after where we
3219     stopped matching the regnum-th subexpression.  (The zeroth register
3220     keeps track of what the whole pattern matches.)  */
3221  const char **regstart, **regend;
3222
3223  /* If a group that's operated upon by a repetition operator fails to
3224     match anything, then the register for its start will need to be
3225     restored because it will have been set to wherever in the string we
3226     are when we last see its open-group operator.  Similarly for a
3227     register's end.  */
3228  const char **old_regstart, **old_regend;
3229
3230  /* The is_active field of reg_info helps us keep track of which (possibly
3231     nested) subexpressions we are currently in. The matched_something
3232     field of reg_info[reg_num] helps us tell whether or not we have
3233     matched any of the pattern so far this time through the reg_num-th
3234     subexpression.  These two fields get reset each time through any
3235     loop their register is in.  */
3236  register_info_type *reg_info;
3237
3238  /* The following record the register info as found in the above
3239     variables when we find a match better than any we've seen before.
3240     This happens as we backtrack through the failure points, which in
3241     turn happens only if we have not yet matched the entire string. */
3242  unsigned best_regs_set = false;
3243  const char **best_regstart, **best_regend;
3244
3245  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3246     allocate space for that if we're not allocating space for anything
3247     else (see below).  Also, we never need info about register 0 for
3248     any of the other register vectors, and it seems rather a kludge to
3249     treat `best_regend' differently than the rest.  So we keep track of
3250     the end of the best match so far in a separate variable.  We
3251     initialize this to NULL so that when we backtrack the first time
3252     and need to test it, it's not garbage.  */
3253  const char *match_end = NULL;
3254
3255  /* Used when we pop values we don't care about.  */
3256  const char **reg_dummy;
3257  register_info_type *reg_info_dummy;
3258
3259#ifdef DEBUG
3260  /* Counts the total number of registers pushed.  */
3261  unsigned num_regs_pushed = 0;
3262#endif
3263
3264  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3265
3266  INIT_FAIL_STACK ();
3267
3268  /* Do not bother to initialize all the register variables if there are
3269     no groups in the pattern, as it takes a fair amount of time.  If
3270     there are groups, we include space for register 0 (the whole
3271     pattern), even though we never use it, since it simplifies the
3272     array indexing.  We should fix this.  */
3273  if (bufp->re_nsub)
3274    {
3275      regstart = REGEX_TALLOC (num_regs, const char *);
3276      regend = REGEX_TALLOC (num_regs, const char *);
3277      old_regstart = REGEX_TALLOC (num_regs, const char *);
3278      old_regend = REGEX_TALLOC (num_regs, const char *);
3279      best_regstart = REGEX_TALLOC (num_regs, const char *);
3280      best_regend = REGEX_TALLOC (num_regs, const char *);
3281      reg_info = REGEX_TALLOC (num_regs, register_info_type);
3282      reg_dummy = REGEX_TALLOC (num_regs, const char *);
3283      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3284
3285      if (!(regstart && regend && old_regstart && old_regend && reg_info
3286            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3287        {
3288          FREE_VARIABLES ();
3289          return -2;
3290        }
3291    }
3292#ifdef REGEX_MALLOC
3293  else
3294    {
3295      /* We must initialize all our variables to NULL, so that
3296         `FREE_VARIABLES' doesn't try to free them.  */
3297      regstart = regend = old_regstart = old_regend = best_regstart
3298        = best_regend = reg_dummy = NULL;
3299      reg_info = reg_info_dummy = (register_info_type *) NULL;
3300    }
3301#endif /* REGEX_MALLOC */
3302
3303  /* The starting position is bogus.  */
3304  if (pos < 0 || pos > size1 + size2)
3305    {
3306      FREE_VARIABLES ();
3307      return -1;
3308    }
3309
3310  /* Initialize subexpression text positions to -1 to mark ones that no
3311     start_memory/stop_memory has been seen for. Also initialize the
3312     register information struct.  */
3313  for (mcnt = 1; mcnt < num_regs; mcnt++)
3314    {
3315      regstart[mcnt] = regend[mcnt]
3316        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3317
3318      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3319      IS_ACTIVE (reg_info[mcnt]) = 0;
3320      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3321      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3322    }
3323
3324  /* We move `string1' into `string2' if the latter's empty -- but not if
3325     `string1' is null.  */
3326  if (size2 == 0 && string1 != NULL)
3327    {
3328      string2 = string1;
3329      size2 = size1;
3330      string1 = 0;
3331      size1 = 0;
3332    }
3333  end1 = string1 + size1;
3334  end2 = string2 + size2;
3335
3336  /* Compute where to stop matching, within the two strings.  */
3337  if (stop <= size1)
3338    {
3339      end_match_1 = string1 + stop;
3340      end_match_2 = string2;
3341    }
3342  else
3343    {
3344      end_match_1 = end1;
3345      end_match_2 = string2 + stop - size1;
3346    }
3347
3348  /* `p' scans through the pattern as `d' scans through the data.
3349     `dend' is the end of the input string that `d' points within.  `d'
3350     is advanced into the following input string whenever necessary, but
3351     this happens before fetching; therefore, at the beginning of the
3352     loop, `d' can be pointing at the end of a string, but it cannot
3353     equal `string2'.  */
3354  if (size1 > 0 && pos <= size1)
3355    {
3356      d = string1 + pos;
3357      dend = end_match_1;
3358    }
3359  else
3360    {
3361      d = string2 + pos - size1;
3362      dend = end_match_2;
3363    }
3364
3365  DEBUG_PRINT1 ("The compiled pattern is: ");
3366  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3367  DEBUG_PRINT1 ("The string to match is: `");
3368  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3369  DEBUG_PRINT1 ("'\n");
3370
3371  /* This loops over pattern commands.  It exits by returning from the
3372     function if the match is complete, or it drops through if the match
3373     fails at this starting point in the input data.  */
3374  for (;;)
3375    {
3376      DEBUG_PRINT2 ("\n0x%x: ", p);
3377
3378      if (p == pend)
3379	{ /* End of pattern means we might have succeeded.  */
3380          DEBUG_PRINT1 ("end of pattern ... ");
3381
3382	  /* If we haven't matched the entire string, and we want the
3383             longest match, try backtracking.  */
3384          if (d != end_match_2)
3385	    {
3386              DEBUG_PRINT1 ("backtracking.\n");
3387
3388              if (!FAIL_STACK_EMPTY ())
3389                { /* More failure points to try.  */
3390                  boolean same_str_p = (FIRST_STRING_P (match_end)
3391	        	                == MATCHING_IN_FIRST_STRING);
3392
3393                  /* If exceeds best match so far, save it.  */
3394                  if (!best_regs_set
3395                      || (same_str_p && d > match_end)
3396                      || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3397                    {
3398                      best_regs_set = true;
3399                      match_end = d;
3400
3401                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3402
3403                      for (mcnt = 1; mcnt < num_regs; mcnt++)
3404                        {
3405                          best_regstart[mcnt] = regstart[mcnt];
3406                          best_regend[mcnt] = regend[mcnt];
3407                        }
3408                    }
3409                  goto fail;
3410                }
3411
3412              /* If no failure points, don't restore garbage.  */
3413              else if (best_regs_set)
3414                {
3415  	        restore_best_regs:
3416                  /* Restore best match.  It may happen that `dend ==
3417                     end_match_1' while the restored d is in string2.
3418                     For example, the pattern `x.*y.*z' against the
3419                     strings `x-' and `y-z-', if the two strings are
3420                     not consecutive in memory.  */
3421                  DEBUG_PRINT1 ("Restoring best registers.\n");
3422
3423                  d = match_end;
3424                  dend = ((d >= string1 && d <= end1)
3425		           ? end_match_1 : end_match_2);
3426
3427		  for (mcnt = 1; mcnt < num_regs; mcnt++)
3428		    {
3429		      regstart[mcnt] = best_regstart[mcnt];
3430		      regend[mcnt] = best_regend[mcnt];
3431		    }
3432                }
3433            } /* d != end_match_2 */
3434
3435          DEBUG_PRINT1 ("Accepting match.\n");
3436
3437          /* If caller wants register contents data back, do it.  */
3438          if (regs && !bufp->no_sub)
3439	    {
3440              /* Have the register data arrays been allocated?  */
3441              if (bufp->regs_allocated == REGS_UNALLOCATED)
3442                { /* No.  So allocate them with malloc.  We need one
3443                     extra element beyond `num_regs' for the `-1' marker
3444                     GNU code uses.  */
3445                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3446                  regs->start = TALLOC (regs->num_regs, regoff_t);
3447                  regs->end = TALLOC (regs->num_regs, regoff_t);
3448                  if (regs->start == NULL || regs->end == NULL)
3449                    return -2;
3450                  bufp->regs_allocated = REGS_REALLOCATE;
3451                }
3452              else if (bufp->regs_allocated == REGS_REALLOCATE)
3453                { /* Yes.  If we need more elements than were already
3454                     allocated, reallocate them.  If we need fewer, just
3455                     leave it alone.  */
3456                  if (regs->num_regs < num_regs + 1)
3457                    {
3458                      regs->num_regs = num_regs + 1;
3459                      RETALLOC (regs->start, regs->num_regs, regoff_t);
3460                      RETALLOC (regs->end, regs->num_regs, regoff_t);
3461                      if (regs->start == NULL || regs->end == NULL)
3462                        return -2;
3463                    }
3464                }
3465              else
3466                assert (bufp->regs_allocated == REGS_FIXED);
3467
3468              /* Convert the pointer data in `regstart' and `regend' to
3469                 indices.  Register zero has to be set differently,
3470                 since we haven't kept track of any info for it.  */
3471              if (regs->num_regs > 0)
3472                {
3473                  regs->start[0] = pos;
3474                  regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3475			          : d - string2 + size1);
3476                }
3477
3478              /* Go through the first `min (num_regs, regs->num_regs)'
3479                 registers, since that is all we initialized.  */
3480	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3481		{
3482                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3483                    regs->start[mcnt] = regs->end[mcnt] = -1;
3484                  else
3485                    {
3486		      regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3487                      regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3488                    }
3489		}
3490
3491              /* If the regs structure we return has more elements than
3492                 were in the pattern, set the extra elements to -1.  If
3493                 we (re)allocated the registers, this is the case,
3494                 because we always allocate enough to have at least one
3495                 -1 at the end.  */
3496              for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3497                regs->start[mcnt] = regs->end[mcnt] = -1;
3498	    } /* regs && !bufp->no_sub */
3499
3500          FREE_VARIABLES ();
3501          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3502                        nfailure_points_pushed, nfailure_points_popped,
3503                        nfailure_points_pushed - nfailure_points_popped);
3504          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3505
3506          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3507			    ? string1
3508			    : string2 - size1);
3509
3510          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3511
3512          return mcnt;
3513        }
3514
3515      /* Otherwise match next pattern command.  */
3516#ifdef SWITCH_ENUM_BUG
3517      switch ((int) ((re_opcode_t) *p++))
3518#else
3519      switch ((re_opcode_t) *p++)
3520#endif
3521	{
3522        /* Ignore these.  Used to ignore the n of succeed_n's which
3523           currently have n == 0.  */
3524        case no_op:
3525          DEBUG_PRINT1 ("EXECUTING no_op.\n");
3526          break;
3527
3528
3529        /* Match the next n pattern characters exactly.  The following
3530           byte in the pattern defines n, and the n bytes after that
3531           are the characters to match.  */
3532	case exactn:
3533	  mcnt = *p++;
3534          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3535
3536          /* This is written out as an if-else so we don't waste time
3537             testing `translate' inside the loop.  */
3538          if (translate)
3539	    {
3540	      do
3541		{
3542		  PREFETCH ();
3543		  if (translate[(unsigned char) *d++] != (char) *p++)
3544                    goto fail;
3545		}
3546	      while (--mcnt);
3547	    }
3548	  else
3549	    {
3550	      do
3551		{
3552		  PREFETCH ();
3553		  if (*d++ != (char) *p++) goto fail;
3554		}
3555	      while (--mcnt);
3556	    }
3557	  SET_REGS_MATCHED ();
3558          break;
3559
3560
3561        /* Match any character except possibly a newline or a null.  */
3562	case anychar:
3563          DEBUG_PRINT1 ("EXECUTING anychar.\n");
3564
3565          PREFETCH ();
3566
3567          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3568              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3569	    goto fail;
3570
3571          SET_REGS_MATCHED ();
3572          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3573          d++;
3574	  break;
3575
3576
3577	case charset:
3578	case charset_not:
3579	  {
3580	    register unsigned char c;
3581	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
3582
3583            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3584
3585	    PREFETCH ();
3586	    c = TRANSLATE (*d); /* The character to match.  */
3587
3588            /* Cast to `unsigned' instead of `unsigned char' in case the
3589               bit list is a full 32 bytes long.  */
3590	    if (c < (unsigned) (*p * BYTEWIDTH)
3591		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3592	      not = !not;
3593
3594	    p += 1 + *p;
3595
3596	    if (!not) goto fail;
3597
3598	    SET_REGS_MATCHED ();
3599            d++;
3600	    break;
3601	  }
3602
3603
3604        /* The beginning of a group is represented by start_memory.
3605           The arguments are the register number in the next byte, and the
3606           number of groups inner to this one in the next.  The text
3607           matched within the group is recorded (in the internal
3608           registers data structure) under the register number.  */
3609        case start_memory:
3610	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3611
3612          /* Find out if this group can match the empty string.  */
3613	  p1 = p;		/* To send to group_match_null_string_p.  */
3614
3615          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3616            REG_MATCH_NULL_STRING_P (reg_info[*p])
3617              = group_match_null_string_p (&p1, pend, reg_info);
3618
3619          /* Save the position in the string where we were the last time
3620             we were at this open-group operator in case the group is
3621             operated upon by a repetition operator, e.g., with `(a*)*b'
3622             against `ab'; then we want to ignore where we are now in
3623             the string in case this attempt to match fails.  */
3624          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3625                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3626                             : regstart[*p];
3627	  DEBUG_PRINT2 ("  old_regstart: %d\n",
3628			 POINTER_TO_OFFSET (old_regstart[*p]));
3629
3630          regstart[*p] = d;
3631	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3632
3633          IS_ACTIVE (reg_info[*p]) = 1;
3634          MATCHED_SOMETHING (reg_info[*p]) = 0;
3635
3636          /* This is the new highest active register.  */
3637          highest_active_reg = *p;
3638
3639          /* If nothing was active before, this is the new lowest active
3640             register.  */
3641          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3642            lowest_active_reg = *p;
3643
3644          /* Move past the register number and inner group count.  */
3645          p += 2;
3646          break;
3647
3648
3649        /* The stop_memory opcode represents the end of a group.  Its
3650           arguments are the same as start_memory's: the register
3651           number, and the number of inner groups.  */
3652	case stop_memory:
3653	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3654
3655          /* We need to save the string position the last time we were at
3656             this close-group operator in case the group is operated
3657             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3658             against `aba'; then we want to ignore where we are now in
3659             the string in case this attempt to match fails.  */
3660          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3661                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
3662			   : regend[*p];
3663	  DEBUG_PRINT2 ("      old_regend: %d\n",
3664			 POINTER_TO_OFFSET (old_regend[*p]));
3665
3666          regend[*p] = d;
3667	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3668
3669          /* This register isn't active anymore.  */
3670          IS_ACTIVE (reg_info[*p]) = 0;
3671
3672          /* If this was the only register active, nothing is active
3673             anymore.  */
3674          if (lowest_active_reg == highest_active_reg)
3675            {
3676              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3677              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3678            }
3679          else
3680            { /* We must scan for the new highest active register, since
3681                 it isn't necessarily one less than now: consider
3682                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3683                 new highest active register is 1.  */
3684              unsigned char r = *p - 1;
3685              while (r > 0 && !IS_ACTIVE (reg_info[r]))
3686                r--;
3687
3688              /* If we end up at register zero, that means that we saved
3689                 the registers as the result of an `on_failure_jump', not
3690                 a `start_memory', and we jumped to past the innermost
3691                 `stop_memory'.  For example, in ((.)*) we save
3692                 registers 1 and 2 as a result of the *, but when we pop
3693                 back to the second ), we are at the stop_memory 1.
3694                 Thus, nothing is active.  */
3695	      if (r == 0)
3696                {
3697                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3698                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3699                }
3700              else
3701                highest_active_reg = r;
3702            }
3703
3704          /* If just failed to match something this time around with a
3705             group that's operated on by a repetition operator, try to
3706             force exit from the ``loop'', and restore the register
3707             information for this group that we had before trying this
3708             last match.  */
3709          if ((!MATCHED_SOMETHING (reg_info[*p])
3710               || (re_opcode_t) p[-3] == start_memory)
3711	      && (p + 2) < pend)
3712            {
3713              boolean is_a_jump_n = false;
3714
3715              p1 = p + 2;
3716              mcnt = 0;
3717              switch ((re_opcode_t) *p1++)
3718                {
3719                  case jump_n:
3720		    is_a_jump_n = true;
3721                  case pop_failure_jump:
3722		  case maybe_pop_jump:
3723		  case jump:
3724		  case dummy_failure_jump:
3725                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3726		    if (is_a_jump_n)
3727		      p1 += 2;
3728                    break;
3729
3730                  default:
3731                    /* do nothing */ ;
3732                }
3733	      p1 += mcnt;
3734
3735              /* If the next operation is a jump backwards in the pattern
3736	         to an on_failure_jump right before the start_memory
3737                 corresponding to this stop_memory, exit from the loop
3738                 by forcing a failure after pushing on the stack the
3739                 on_failure_jump's jump in the pattern, and d.  */
3740              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3741                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3742		{
3743                  /* If this group ever matched anything, then restore
3744                     what its registers were before trying this last
3745                     failed match, e.g., with `(a*)*b' against `ab' for
3746                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
3747                     against `aba' for regend[3].
3748
3749                     Also restore the registers for inner groups for,
3750                     e.g., `((a*)(b*))*' against `aba' (register 3 would
3751                     otherwise get trashed).  */
3752
3753                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3754		    {
3755		      unsigned r;
3756
3757                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3758
3759		      /* Restore this and inner groups' (if any) registers.  */
3760                      for (r = *p; r < *p + *(p + 1); r++)
3761                        {
3762                          regstart[r] = old_regstart[r];
3763
3764                          /* xx why this test?  */
3765                          if ((int) old_regend[r] >= (int) regstart[r])
3766                            regend[r] = old_regend[r];
3767                        }
3768                    }
3769		  p1++;
3770                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3771                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3772
3773                  goto fail;
3774                }
3775            }
3776
3777          /* Move past the register number and the inner group count.  */
3778          p += 2;
3779          break;
3780
3781
3782	/* \<digit> has been turned into a `duplicate' command which is
3783           followed by the numeric value of <digit> as the register number.  */
3784        case duplicate:
3785	  {
3786	    register const char *d2, *dend2;
3787	    int regno = *p++;   /* Get which register to match against.  */
3788	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3789
3790	    /* Can't back reference a group which we've never matched.  */
3791            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3792              goto fail;
3793
3794            /* Where in input to try to start matching.  */
3795            d2 = regstart[regno];
3796
3797            /* Where to stop matching; if both the place to start and
3798               the place to stop matching are in the same string, then
3799               set to the place to stop, otherwise, for now have to use
3800               the end of the first string.  */
3801
3802            dend2 = ((FIRST_STRING_P (regstart[regno])
3803		      == FIRST_STRING_P (regend[regno]))
3804		     ? regend[regno] : end_match_1);
3805	    for (;;)
3806	      {
3807		/* If necessary, advance to next segment in register
3808                   contents.  */
3809		while (d2 == dend2)
3810		  {
3811		    if (dend2 == end_match_2) break;
3812		    if (dend2 == regend[regno]) break;
3813
3814                    /* End of string1 => advance to string2. */
3815                    d2 = string2;
3816                    dend2 = regend[regno];
3817		  }
3818		/* At end of register contents => success */
3819		if (d2 == dend2) break;
3820
3821		/* If necessary, advance to next segment in data.  */
3822		PREFETCH ();
3823
3824		/* How many characters left in this segment to match.  */
3825		mcnt = dend - d;
3826
3827		/* Want how many consecutive characters we can match in
3828                   one shot, so, if necessary, adjust the count.  */
3829                if (mcnt > dend2 - d2)
3830		  mcnt = dend2 - d2;
3831
3832		/* Compare that many; failure if mismatch, else move
3833                   past them.  */
3834		if (translate
3835                    ? bcmp_translate (d, d2, mcnt, translate)
3836                    : bcmp (d, d2, mcnt))
3837		  goto fail;
3838		d += mcnt, d2 += mcnt;
3839	      }
3840	  }
3841	  break;
3842
3843
3844        /* begline matches the empty string at the beginning of the string
3845           (unless `not_bol' is set in `bufp'), and, if
3846           `newline_anchor' is set, after newlines.  */
3847	case begline:
3848          DEBUG_PRINT1 ("EXECUTING begline.\n");
3849
3850          if (AT_STRINGS_BEG (d))
3851            {
3852              if (!bufp->not_bol) break;
3853            }
3854          else if (d[-1] == '\n' && bufp->newline_anchor)
3855            {
3856              break;
3857            }
3858          /* In all other cases, we fail.  */
3859          goto fail;
3860
3861
3862        /* endline is the dual of begline.  */
3863	case endline:
3864          DEBUG_PRINT1 ("EXECUTING endline.\n");
3865
3866          if (AT_STRINGS_END (d))
3867            {
3868              if (!bufp->not_eol) break;
3869            }
3870
3871          /* We have to ``prefetch'' the next character.  */
3872          else if ((d == end1 ? *string2 : *d) == '\n'
3873                   && bufp->newline_anchor)
3874            {
3875              break;
3876            }
3877          goto fail;
3878
3879
3880	/* Match at the very beginning of the data.  */
3881        case begbuf:
3882          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3883          if (AT_STRINGS_BEG (d))
3884            break;
3885          goto fail;
3886
3887
3888	/* Match at the very end of the data.  */
3889        case endbuf:
3890          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3891	  if (AT_STRINGS_END (d))
3892	    break;
3893          goto fail;
3894
3895
3896        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3897           pushes NULL as the value for the string on the stack.  Then
3898           `pop_failure_point' will keep the current value for the
3899           string, instead of restoring it.  To see why, consider
3900           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3901           then the . fails against the \n.  But the next thing we want
3902           to do is match the \n against the \n; if we restored the
3903           string value, we would be back at the foo.
3904
3905           Because this is used only in specific cases, we don't need to
3906           check all the things that `on_failure_jump' does, to make
3907           sure the right things get saved on the stack.  Hence we don't
3908           share its code.  The only reason to push anything on the
3909           stack at all is that otherwise we would have to change
3910           `anychar's code to do something besides goto fail in this
3911           case; that seems worse than this.  */
3912        case on_failure_keep_string_jump:
3913          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3914
3915          EXTRACT_NUMBER_AND_INCR (mcnt, p);
3916          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3917
3918          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3919          break;
3920
3921
3922	/* Uses of on_failure_jump:
3923
3924           Each alternative starts with an on_failure_jump that points
3925           to the beginning of the next alternative.  Each alternative
3926           except the last ends with a jump that in effect jumps past
3927           the rest of the alternatives.  (They really jump to the
3928           ending jump of the following alternative, because tensioning
3929           these jumps is a hassle.)
3930
3931           Repeats start with an on_failure_jump that points past both
3932           the repetition text and either the following jump or
3933           pop_failure_jump back to this on_failure_jump.  */
3934	case on_failure_jump:
3935        on_failure:
3936          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3937
3938          EXTRACT_NUMBER_AND_INCR (mcnt, p);
3939          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3940
3941          /* If this on_failure_jump comes right before a group (i.e.,
3942             the original * applied to a group), save the information
3943             for that group and all inner ones, so that if we fail back
3944             to this point, the group's information will be correct.
3945             For example, in \(a*\)*\1, we need the preceding group,
3946             and in \(\(a*\)b*\)\2, we need the inner group.  */
3947
3948          /* We can't use `p' to check ahead because we push
3949             a failure point to `p + mcnt' after we do this.  */
3950          p1 = p;
3951
3952          /* We need to skip no_op's before we look for the
3953             start_memory in case this on_failure_jump is happening as
3954             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3955             against aba.  */
3956          while (p1 < pend && (re_opcode_t) *p1 == no_op)
3957            p1++;
3958
3959          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3960            {
3961              /* We have a new highest active register now.  This will
3962                 get reset at the start_memory we are about to get to,
3963                 but we will have saved all the registers relevant to
3964                 this repetition op, as described above.  */
3965              highest_active_reg = *(p1 + 1) + *(p1 + 2);
3966              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3967                lowest_active_reg = *(p1 + 1);
3968            }
3969
3970          DEBUG_PRINT1 (":\n");
3971          PUSH_FAILURE_POINT (p + mcnt, d, -2);
3972          break;
3973
3974
3975        /* A smart repeat ends with `maybe_pop_jump'.
3976	   We change it to either `pop_failure_jump' or `jump'.  */
3977        case maybe_pop_jump:
3978          EXTRACT_NUMBER_AND_INCR (mcnt, p);
3979          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3980          {
3981	    register unsigned char *p2 = p;
3982
3983            /* Compare the beginning of the repeat with what in the
3984               pattern follows its end. If we can establish that there
3985               is nothing that they would both match, i.e., that we
3986               would have to backtrack because of (as in, e.g., `a*a')
3987               then we can change to pop_failure_jump, because we'll
3988               never have to backtrack.
3989
3990               This is not true in the case of alternatives: in
3991               `(a|ab)*' we do need to backtrack to the `ab' alternative
3992               (e.g., if the string was `ab').  But instead of trying to
3993               detect that here, the alternative has put on a dummy
3994               failure point which is what we will end up popping.  */
3995
3996	    /* Skip over open/close-group commands.  */
3997	    while (p2 + 2 < pend
3998		   && ((re_opcode_t) *p2 == stop_memory
3999		       || (re_opcode_t) *p2 == start_memory))
4000	      p2 += 3;			/* Skip over args, too.  */
4001
4002            /* If we're at the end of the pattern, we can change.  */
4003            if (p2 == pend)
4004	      {
4005		/* Consider what happens when matching ":\(.*\)"
4006		   against ":/".  I don't really understand this code
4007		   yet.  */
4008  	        p[-3] = (unsigned char) pop_failure_jump;
4009                DEBUG_PRINT1
4010                  ("  End of pattern: change to `pop_failure_jump'.\n");
4011              }
4012
4013            else if ((re_opcode_t) *p2 == exactn
4014		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4015	      {
4016		register unsigned char c
4017                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4018		p1 = p + mcnt;
4019
4020                /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4021                   to the `maybe_finalize_jump' of this case.  Examine what
4022                   follows.  */
4023                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4024                  {
4025  		    p[-3] = (unsigned char) pop_failure_jump;
4026                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4027                                  c, p1[5]);
4028                  }
4029
4030		else if ((re_opcode_t) p1[3] == charset
4031			 || (re_opcode_t) p1[3] == charset_not)
4032		  {
4033		    int not = (re_opcode_t) p1[3] == charset_not;
4034
4035		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4036			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4037		      not = !not;
4038
4039                    /* `not' is equal to 1 if c would match, which means
4040                        that we can't change to pop_failure_jump.  */
4041		    if (!not)
4042                      {
4043  		        p[-3] = (unsigned char) pop_failure_jump;
4044                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4045                      }
4046		  }
4047	      }
4048	  }
4049	  p -= 2;		/* Point at relative address again.  */
4050	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4051	    {
4052	      p[-1] = (unsigned char) jump;
4053              DEBUG_PRINT1 ("  Match => jump.\n");
4054	      goto unconditional_jump;
4055	    }
4056        /* Note fall through.  */
4057
4058
4059	/* The end of a simple repeat has a pop_failure_jump back to
4060           its matching on_failure_jump, where the latter will push a
4061           failure point.  The pop_failure_jump takes off failure
4062           points put on by this pop_failure_jump's matching
4063           on_failure_jump; we got through the pattern to here from the
4064           matching on_failure_jump, so didn't fail.  */
4065        case pop_failure_jump:
4066          {
4067            /* We need to pass separate storage for the lowest and
4068               highest registers, even though we don't care about the
4069               actual values.  Otherwise, we will restore only one
4070               register from the stack, since lowest will == highest in
4071               `pop_failure_point'.  */
4072            unsigned dummy_low_reg, dummy_high_reg;
4073            unsigned char *pdummy;
4074            const char *sdummy;
4075
4076            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4077            POP_FAILURE_POINT (sdummy, pdummy,
4078                               dummy_low_reg, dummy_high_reg,
4079                               reg_dummy, reg_dummy, reg_info_dummy);
4080          }
4081          /* Note fall through.  */
4082
4083
4084        /* Unconditionally jump (without popping any failure points).  */
4085        case jump:
4086	unconditional_jump:
4087	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4088          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4089	  p += mcnt;				/* Do the jump.  */
4090          DEBUG_PRINT2 ("(to 0x%x).\n", p);
4091	  break;
4092
4093
4094        /* We need this opcode so we can detect where alternatives end
4095           in `group_match_null_string_p' et al.  */
4096        case jump_past_alt:
4097          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4098          goto unconditional_jump;
4099
4100
4101        /* Normally, the on_failure_jump pushes a failure point, which
4102           then gets popped at pop_failure_jump.  We will end up at
4103           pop_failure_jump, also, and with a pattern of, say, `a+', we
4104           are skipping over the on_failure_jump, so we have to push
4105           something meaningless for pop_failure_jump to pop.  */
4106        case dummy_failure_jump:
4107          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4108          /* It doesn't matter what we push for the string here.  What
4109             the code at `fail' tests is the value for the pattern.  */
4110          PUSH_FAILURE_POINT (0, 0, -2);
4111          goto unconditional_jump;
4112
4113
4114        /* At the end of an alternative, we need to push a dummy failure
4115           point in case we are followed by a `pop_failure_jump', because
4116           we don't want the failure point for the alternative to be
4117           popped.  For example, matching `(a|ab)*' against `aab'
4118           requires that we match the `ab' alternative.  */
4119        case push_dummy_failure:
4120          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4121          /* See comments just above at `dummy_failure_jump' about the
4122             two zeroes.  */
4123          PUSH_FAILURE_POINT (0, 0, -2);
4124          break;
4125
4126        /* Have to succeed matching what follows at least n times.
4127           After that, handle like `on_failure_jump'.  */
4128        case succeed_n:
4129          EXTRACT_NUMBER (mcnt, p + 2);
4130          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4131
4132          assert (mcnt >= 0);
4133          /* Originally, this is how many times we HAVE to succeed.  */
4134          if (mcnt > 0)
4135            {
4136               mcnt--;
4137	       p += 2;
4138               STORE_NUMBER_AND_INCR (p, mcnt);
4139               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4140            }
4141	  else if (mcnt == 0)
4142            {
4143              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4144	      p[2] = (unsigned char) no_op;
4145              p[3] = (unsigned char) no_op;
4146              goto on_failure;
4147            }
4148          break;
4149
4150        case jump_n:
4151          EXTRACT_NUMBER (mcnt, p + 2);
4152          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4153
4154          /* Originally, this is how many times we CAN jump.  */
4155          if (mcnt)
4156            {
4157               mcnt--;
4158               STORE_NUMBER (p + 2, mcnt);
4159	       goto unconditional_jump;
4160            }
4161          /* If don't have to jump any more, skip over the rest of command.  */
4162	  else
4163	    p += 4;
4164          break;
4165
4166	case set_number_at:
4167	  {
4168            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4169
4170            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4171            p1 = p + mcnt;
4172            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4173            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4174	    STORE_NUMBER (p1, mcnt);
4175            break;
4176          }
4177
4178        case wordbound:
4179          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4180          if (AT_WORD_BOUNDARY (d))
4181	    break;
4182          goto fail;
4183
4184	case notwordbound:
4185          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4186	  if (AT_WORD_BOUNDARY (d))
4187	    goto fail;
4188          break;
4189
4190	case wordbeg:
4191          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4192	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4193	    break;
4194          goto fail;
4195
4196	case wordend:
4197          DEBUG_PRINT1 ("EXECUTING wordend.\n");
4198	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4199              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4200	    break;
4201          goto fail;
4202
4203#ifdef emacs
4204#ifdef emacs19
4205  	case before_dot:
4206          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4207 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4208  	    goto fail;
4209  	  break;
4210
4211  	case at_dot:
4212          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4213 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
4214  	    goto fail;
4215  	  break;
4216
4217  	case after_dot:
4218          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4219          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4220  	    goto fail;
4221  	  break;
4222#else /* not emacs19 */
4223	case at_dot:
4224          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4225	  if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4226	    goto fail;
4227	  break;
4228#endif /* not emacs19 */
4229
4230	case syntaxspec:
4231          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4232	  mcnt = *p++;
4233	  goto matchsyntax;
4234
4235        case wordchar:
4236          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4237	  mcnt = (int) Sword;
4238        matchsyntax:
4239	  PREFETCH ();
4240	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4241            goto fail;
4242          SET_REGS_MATCHED ();
4243	  break;
4244
4245	case notsyntaxspec:
4246          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4247	  mcnt = *p++;
4248	  goto matchnotsyntax;
4249
4250        case notwordchar:
4251          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4252	  mcnt = (int) Sword;
4253        matchnotsyntax:
4254	  PREFETCH ();
4255	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4256            goto fail;
4257	  SET_REGS_MATCHED ();
4258          break;
4259
4260#else /* not emacs */
4261	case wordchar:
4262          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4263	  PREFETCH ();
4264          if (!WORDCHAR_P (d))
4265            goto fail;
4266	  SET_REGS_MATCHED ();
4267          d++;
4268	  break;
4269
4270	case notwordchar:
4271          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4272	  PREFETCH ();
4273	  if (WORDCHAR_P (d))
4274            goto fail;
4275          SET_REGS_MATCHED ();
4276          d++;
4277	  break;
4278#endif /* not emacs */
4279
4280        default:
4281          abort ();
4282	}
4283      continue;  /* Successfully executed one pattern command; keep going.  */
4284
4285
4286    /* We goto here if a matching operation fails. */
4287    fail:
4288      if (!FAIL_STACK_EMPTY ())
4289	{ /* A restart point is known.  Restore to that state.  */
4290          DEBUG_PRINT1 ("\nFAIL:\n");
4291          POP_FAILURE_POINT (d, p,
4292                             lowest_active_reg, highest_active_reg,
4293                             regstart, regend, reg_info);
4294
4295          /* If this failure point is a dummy, try the next one.  */
4296          if (!p)
4297	    goto fail;
4298
4299          /* If we failed to the end of the pattern, don't examine *p.  */
4300	  assert (p <= pend);
4301          if (p < pend)
4302            {
4303              boolean is_a_jump_n = false;
4304
4305              /* If failed to a backwards jump that's part of a repetition
4306                 loop, need to pop this failure point and use the next one.  */
4307              switch ((re_opcode_t) *p)
4308                {
4309                case jump_n:
4310                  is_a_jump_n = true;
4311                case maybe_pop_jump:
4312                case pop_failure_jump:
4313                case jump:
4314                  p1 = p + 1;
4315                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4316                  p1 += mcnt;
4317
4318                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4319                      || (!is_a_jump_n
4320                          && (re_opcode_t) *p1 == on_failure_jump))
4321                    goto fail;
4322                  break;
4323                default:
4324                  /* do nothing */ ;
4325                }
4326            }
4327
4328          if (d >= string1 && d <= end1)
4329	    dend = end_match_1;
4330        }
4331      else
4332        break;   /* Matching at this starting point really fails.  */
4333    } /* for (;;) */
4334
4335  if (best_regs_set)
4336    goto restore_best_regs;
4337
4338  FREE_VARIABLES ();
4339
4340  return -1;         			/* Failure to match.  */
4341} /* re_match_2 */
4342
4343/* Subroutine definitions for re_match_2.  */
4344
4345
4346/* We are passed P pointing to a register number after a start_memory.
4347
4348   Return true if the pattern up to the corresponding stop_memory can
4349   match the empty string, and false otherwise.
4350
4351   If we find the matching stop_memory, sets P to point to one past its number.
4352   Otherwise, sets P to an undefined byte less than or equal to END.
4353
4354   We don't handle duplicates properly (yet).  */
4355
4356static boolean
4357group_match_null_string_p (p, end, reg_info)
4358    unsigned char **p, *end;
4359    register_info_type *reg_info;
4360{
4361  int mcnt;
4362  /* Point to after the args to the start_memory.  */
4363  unsigned char *p1 = *p + 2;
4364
4365  while (p1 < end)
4366    {
4367      /* Skip over opcodes that can match nothing, and return true or
4368	 false, as appropriate, when we get to one that can't, or to the
4369         matching stop_memory.  */
4370
4371      switch ((re_opcode_t) *p1)
4372        {
4373        /* Could be either a loop or a series of alternatives.  */
4374        case on_failure_jump:
4375          p1++;
4376          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4377
4378          /* If the next operation is not a jump backwards in the
4379	     pattern.  */
4380
4381	  if (mcnt >= 0)
4382	    {
4383              /* Go through the on_failure_jumps of the alternatives,
4384                 seeing if any of the alternatives cannot match nothing.
4385                 The last alternative starts with only a jump,
4386                 whereas the rest start with on_failure_jump and end
4387                 with a jump, e.g., here is the pattern for `a|b|c':
4388
4389                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4390                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4391                 /exactn/1/c
4392
4393                 So, we have to first go through the first (n-1)
4394                 alternatives and then deal with the last one separately.  */
4395
4396
4397              /* Deal with the first (n-1) alternatives, which start
4398                 with an on_failure_jump (see above) that jumps to right
4399                 past a jump_past_alt.  */
4400
4401              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4402                {
4403                  /* `mcnt' holds how many bytes long the alternative
4404                     is, including the ending `jump_past_alt' and
4405                     its number.  */
4406
4407                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4408				                      reg_info))
4409                    return false;
4410
4411                  /* Move to right after this alternative, including the
4412		     jump_past_alt.  */
4413                  p1 += mcnt;
4414
4415                  /* Break if it's the beginning of an n-th alternative
4416                     that doesn't begin with an on_failure_jump.  */
4417                  if ((re_opcode_t) *p1 != on_failure_jump)
4418                    break;
4419
4420		  /* Still have to check that it's not an n-th
4421		     alternative that starts with an on_failure_jump.  */
4422		  p1++;
4423                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4424                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4425                    {
4426		      /* Get to the beginning of the n-th alternative.  */
4427                      p1 -= 3;
4428                      break;
4429                    }
4430                }
4431
4432              /* Deal with the last alternative: go back and get number
4433                 of the `jump_past_alt' just before it.  `mcnt' contains
4434                 the length of the alternative.  */
4435              EXTRACT_NUMBER (mcnt, p1 - 2);
4436
4437              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4438                return false;
4439
4440              p1 += mcnt;	/* Get past the n-th alternative.  */
4441            } /* if mcnt > 0 */
4442          break;
4443
4444
4445        case stop_memory:
4446	  assert (p1[1] == **p);
4447          *p = p1 + 2;
4448          return true;
4449
4450
4451        default:
4452          if (!common_op_match_null_string_p (&p1, end, reg_info))
4453            return false;
4454        }
4455    } /* while p1 < end */
4456
4457  return false;
4458} /* group_match_null_string_p */
4459
4460
4461/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4462   It expects P to be the first byte of a single alternative and END one
4463   byte past the last. The alternative can contain groups.  */
4464
4465static boolean
4466alt_match_null_string_p (p, end, reg_info)
4467    unsigned char *p, *end;
4468    register_info_type *reg_info;
4469{
4470  int mcnt;
4471  unsigned char *p1 = p;
4472
4473  while (p1 < end)
4474    {
4475      /* Skip over opcodes that can match nothing, and break when we get
4476         to one that can't.  */
4477
4478      switch ((re_opcode_t) *p1)
4479        {
4480	/* It's a loop.  */
4481        case on_failure_jump:
4482          p1++;
4483          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4484          p1 += mcnt;
4485          break;
4486
4487	default:
4488          if (!common_op_match_null_string_p (&p1, end, reg_info))
4489            return false;
4490        }
4491    }  /* while p1 < end */
4492
4493  return true;
4494} /* alt_match_null_string_p */
4495
4496
4497/* Deals with the ops common to group_match_null_string_p and
4498   alt_match_null_string_p.
4499
4500   Sets P to one after the op and its arguments, if any.  */
4501
4502static boolean
4503common_op_match_null_string_p (p, end, reg_info)
4504    unsigned char **p, *end;
4505    register_info_type *reg_info;
4506{
4507  int mcnt;
4508  boolean ret;
4509  int reg_no;
4510  unsigned char *p1 = *p;
4511
4512  switch ((re_opcode_t) *p1++)
4513    {
4514    case no_op:
4515    case begline:
4516    case endline:
4517    case begbuf:
4518    case endbuf:
4519    case wordbeg:
4520    case wordend:
4521    case wordbound:
4522    case notwordbound:
4523#ifdef emacs
4524    case before_dot:
4525    case at_dot:
4526    case after_dot:
4527#endif
4528      break;
4529
4530    case start_memory:
4531      reg_no = *p1;
4532      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4533      ret = group_match_null_string_p (&p1, end, reg_info);
4534
4535      /* Have to set this here in case we're checking a group which
4536         contains a group and a back reference to it.  */
4537
4538      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4539        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4540
4541      if (!ret)
4542        return false;
4543      break;
4544
4545    /* If this is an optimized succeed_n for zero times, make the jump.  */
4546    case jump:
4547      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4548      if (mcnt >= 0)
4549        p1 += mcnt;
4550      else
4551        return false;
4552      break;
4553
4554    case succeed_n:
4555      /* Get to the number of times to succeed.  */
4556      p1 += 2;
4557      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4558
4559      if (mcnt == 0)
4560        {
4561          p1 -= 4;
4562          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4563          p1 += mcnt;
4564        }
4565      else
4566        return false;
4567      break;
4568
4569    case duplicate:
4570      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4571        return false;
4572      break;
4573
4574    case set_number_at:
4575      p1 += 4;
4576
4577    default:
4578      /* All other opcodes mean we cannot match the empty string.  */
4579      return false;
4580  }
4581
4582  *p = p1;
4583  return true;
4584} /* common_op_match_null_string_p */
4585
4586
4587/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4588   bytes; nonzero otherwise.  */
4589
4590static int
4591bcmp_translate (s1, s2, len, translate)
4592     unsigned char *s1, *s2;
4593     register int len;
4594     char *translate;
4595{
4596  register unsigned char *p1 = s1, *p2 = s2;
4597  while (len)
4598    {
4599      if (translate[*p1++] != translate[*p2++]) return 1;
4600      len--;
4601    }
4602  return 0;
4603}
4604
4605/* Entry points for GNU code.  */
4606
4607/* re_compile_pattern is the GNU regular expression compiler: it
4608   compiles PATTERN (of length SIZE) and puts the result in BUFP.
4609   Returns 0 if the pattern was valid, otherwise an error string.
4610
4611   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4612   are set in BUFP on entry.
4613
4614   We call regex_compile to do the actual compilation.  */
4615
4616const char *
4617re_compile_pattern (pattern, length, bufp)
4618     const char *pattern;
4619     int length;
4620     struct re_pattern_buffer *bufp;
4621{
4622  reg_errcode_t ret;
4623
4624  /* GNU code is written to assume at least RE_NREGS registers will be set
4625     (and at least one extra will be -1).  */
4626  bufp->regs_allocated = REGS_UNALLOCATED;
4627
4628  /* And GNU code determines whether or not to get register information
4629     by passing null for the REGS argument to re_match, etc., not by
4630     setting no_sub.  */
4631  bufp->no_sub = 0;
4632
4633  /* Match anchors at newline.  */
4634  bufp->newline_anchor = 1;
4635
4636  ret = regex_compile (pattern, length, re_syntax_options, bufp);
4637
4638  return re_error_msg[(int) ret];
4639}
4640
4641/* Entry points compatible with 4.2 BSD regex library.  We don't define
4642   them if this is an Emacs or POSIX compilation.  */
4643
4644#if !defined (emacs) && !defined (_POSIX_SOURCE)
4645
4646/* BSD has one and only one pattern buffer.  */
4647static struct re_pattern_buffer re_comp_buf;
4648
4649char *
4650re_comp (s)
4651    const char *s;
4652{
4653  reg_errcode_t ret;
4654
4655  if (!s)
4656    {
4657      if (!re_comp_buf.buffer)
4658	return "No previous regular expression";
4659      return 0;
4660    }
4661
4662  if (!re_comp_buf.buffer)
4663    {
4664      re_comp_buf.buffer = (unsigned char *) malloc (200);
4665      if (re_comp_buf.buffer == NULL)
4666        return "Memory exhausted";
4667      re_comp_buf.allocated = 200;
4668
4669      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4670      if (re_comp_buf.fastmap == NULL)
4671	return "Memory exhausted";
4672    }
4673
4674  /* Since `re_exec' always passes NULL for the `regs' argument, we
4675     don't need to initialize the pattern buffer fields which affect it.  */
4676
4677  /* Match anchors at newlines.  */
4678  re_comp_buf.newline_anchor = 1;
4679
4680  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4681
4682  /* Yes, we're discarding `const' here.  */
4683  return (char *) re_error_msg[(int) ret];
4684}
4685
4686
4687int
4688re_exec (s)
4689    const char *s;
4690{
4691  const int len = strlen (s);
4692  return
4693    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4694}
4695#endif /* not emacs and not _POSIX_SOURCE */
4696
4697/* POSIX.2 functions.  Don't define these for Emacs.  */
4698
4699#ifndef emacs
4700
4701/* regcomp takes a regular expression as a string and compiles it.
4702
4703   PREG is a regex_t *.  We do not expect any fields to be initialized,
4704   since POSIX says we shouldn't.  Thus, we set
4705
4706     `buffer' to the compiled pattern;
4707     `used' to the length of the compiled pattern;
4708     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4709       REG_EXTENDED bit in CFLAGS is set; otherwise, to
4710       RE_SYNTAX_POSIX_BASIC;
4711     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4712     `fastmap' and `fastmap_accurate' to zero;
4713     `re_nsub' to the number of subexpressions in PATTERN.
4714
4715   PATTERN is the address of the pattern string.
4716
4717   CFLAGS is a series of bits which affect compilation.
4718
4719     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4720     use POSIX basic syntax.
4721
4722     If REG_NEWLINE is set, then . and [^...] don't match newline.
4723     Also, regexec will try a match beginning after every newline.
4724
4725     If REG_ICASE is set, then we considers upper- and lowercase
4726     versions of letters to be equivalent when matching.
4727
4728     If REG_NOSUB is set, then when PREG is passed to regexec, that
4729     routine will report only success or failure, and nothing about the
4730     registers.
4731
4732   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
4733   the return codes and their meanings.)  */
4734
4735int
4736regcomp (preg, pattern, cflags)
4737    regex_t *preg;
4738    const char *pattern;
4739    int cflags;
4740{
4741  reg_errcode_t ret;
4742  unsigned syntax
4743    = (cflags & REG_EXTENDED) ?
4744      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4745
4746  /* regex_compile will allocate the space for the compiled pattern.  */
4747  preg->buffer = 0;
4748  preg->allocated = 0;
4749
4750  /* Don't bother to use a fastmap when searching.  This simplifies the
4751     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4752     characters after newlines into the fastmap.  This way, we just try
4753     every character.  */
4754  preg->fastmap = 0;
4755
4756  if (cflags & REG_ICASE)
4757    {
4758      unsigned i;
4759
4760      preg->translate = (char *) malloc (CHAR_SET_SIZE);
4761      if (preg->translate == NULL)
4762        return (int) REG_ESPACE;
4763
4764      /* Map uppercase characters to corresponding lowercase ones.  */
4765      for (i = 0; i < CHAR_SET_SIZE; i++)
4766        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4767    }
4768  else
4769    preg->translate = NULL;
4770
4771  /* If REG_NEWLINE is set, newlines are treated differently.  */
4772  if (cflags & REG_NEWLINE)
4773    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4774      syntax &= ~RE_DOT_NEWLINE;
4775      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4776      /* It also changes the matching behavior.  */
4777      preg->newline_anchor = 1;
4778    }
4779  else
4780    preg->newline_anchor = 0;
4781
4782  preg->no_sub = !!(cflags & REG_NOSUB);
4783
4784  /* POSIX says a null character in the pattern terminates it, so we
4785     can use strlen here in compiling the pattern.  */
4786  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4787
4788  /* POSIX doesn't distinguish between an unmatched open-group and an
4789     unmatched close-group: both are REG_EPAREN.  */
4790  if (ret == REG_ERPAREN) ret = REG_EPAREN;
4791
4792  return (int) ret;
4793}
4794
4795
4796/* regexec searches for a given pattern, specified by PREG, in the
4797   string STRING.
4798
4799   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4800   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4801   least NMATCH elements, and we set them to the offsets of the
4802   corresponding matched substrings.
4803
4804   EFLAGS specifies `execution flags' which affect matching: if
4805   REG_NOTBOL is set, then ^ does not match at the beginning of the
4806   string; if REG_NOTEOL is set, then $ does not match at the end.
4807
4808   We return 0 if we find a match and REG_NOMATCH if not.  */
4809
4810int
4811regexec (preg, string, nmatch, pmatch, eflags)
4812    const regex_t *preg;
4813    const char *string;
4814    size_t nmatch;
4815    regmatch_t pmatch[];
4816    int eflags;
4817{
4818  int ret;
4819  struct re_registers regs;
4820  regex_t private_preg;
4821  int len = strlen (string);
4822  boolean want_reg_info = !preg->no_sub && nmatch > 0;
4823
4824  private_preg = *preg;
4825
4826  private_preg.not_bol = !!(eflags & REG_NOTBOL);
4827  private_preg.not_eol = !!(eflags & REG_NOTEOL);
4828
4829  /* The user has told us exactly how many registers to return
4830     information about, via `nmatch'.  We have to pass that on to the
4831     matching routines.  */
4832  private_preg.regs_allocated = REGS_FIXED;
4833
4834  if (want_reg_info)
4835    {
4836      regs.num_regs = nmatch;
4837      regs.start = TALLOC (nmatch, regoff_t);
4838      regs.end = TALLOC (nmatch, regoff_t);
4839      if (regs.start == NULL || regs.end == NULL)
4840        return (int) REG_NOMATCH;
4841    }
4842
4843  /* Perform the searching operation.  */
4844  ret = re_search (&private_preg, string, len,
4845                   /* start: */ 0, /* range: */ len,
4846                   want_reg_info ? &regs : (struct re_registers *) 0);
4847
4848  /* Copy the register information to the POSIX structure.  */
4849  if (want_reg_info)
4850    {
4851      if (ret >= 0)
4852        {
4853          unsigned r;
4854
4855          for (r = 0; r < nmatch; r++)
4856            {
4857              pmatch[r].rm_so = regs.start[r];
4858              pmatch[r].rm_eo = regs.end[r];
4859            }
4860        }
4861
4862      /* If we needed the temporary register info, free the space now.  */
4863      free (regs.start);
4864      free (regs.end);
4865    }
4866
4867  /* We want zero return to mean success, unlike `re_search'.  */
4868  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4869}
4870
4871
4872/* Returns a message corresponding to an error code, ERRCODE, returned
4873   from either regcomp or regexec.   We don't use PREG here.  */
4874
4875size_t
4876regerror (errcode, preg, errbuf, errbuf_size)
4877    int errcode;
4878    const regex_t *preg;
4879    char *errbuf;
4880    size_t errbuf_size;
4881{
4882  const char *msg;
4883  size_t msg_size;
4884
4885  if (errcode < 0
4886      || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4887    /* Only error codes returned by the rest of the code should be passed
4888       to this routine.  If we are given anything else, or if other regex
4889       code generates an invalid error code, then the program has a bug.
4890       Dump core so we can fix it.  */
4891    abort ();
4892
4893  msg = re_error_msg[errcode];
4894
4895  /* POSIX doesn't require that we do anything in this case, but why
4896     not be nice.  */
4897  if (! msg)
4898    msg = "Success";
4899
4900  msg_size = strlen (msg) + 1; /* Includes the null.  */
4901
4902  if (errbuf_size != 0)
4903    {
4904      if (msg_size > errbuf_size)
4905        {
4906          strncpy (errbuf, msg, errbuf_size - 1);
4907          errbuf[errbuf_size - 1] = 0;
4908        }
4909      else
4910        strcpy (errbuf, msg);
4911    }
4912
4913  return msg_size;
4914}
4915
4916
4917/* Free dynamically allocated space used by PREG.  */
4918
4919void
4920regfree (preg)
4921    regex_t *preg;
4922{
4923  if (preg->buffer != NULL)
4924    free (preg->buffer);
4925  preg->buffer = NULL;
4926
4927  preg->allocated = 0;
4928  preg->used = 0;
4929
4930  if (preg->fastmap != NULL)
4931    free (preg->fastmap);
4932  preg->fastmap = NULL;
4933  preg->fastmap_accurate = 0;
4934
4935  if (preg->translate != NULL)
4936    free (preg->translate);
4937  preg->translate = NULL;
4938}
4939
4940#endif /* not emacs  */
4941
4942/*
4943Local variables:
4944make-backup-files: t
4945version-control: t
4946trim-versions-without-asking: nil
4947End:
4948*/
4949