1/*
2  tre-compile.c - TRE regex compiler
3
4  This software is released under a BSD-style license.
5  See the file LICENSE for details and copyright.
6
7*/
8
9/*
10  TODO:
11   - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
12     function calls.
13*/
14
15
16#ifdef HAVE_CONFIG_H
17#include <config.h>
18#endif /* HAVE_CONFIG_H */
19#include <stdio.h>
20#include <assert.h>
21#include <string.h>
22#include <limits.h>
23
24#include "tre-internal.h"
25#include "tre-mem.h"
26#include "tre-stack.h"
27#include "tre-ast.h"
28#include "tre-parse.h"
29#include "tre-compile.h"
30#include "tre.h"
31#include "tre-last-matched.h"
32#include "xmalloc.h"
33
34/*
35  The bit_ffs() macro in bitstring.h is flawed.  Replace it with a working one.
36*/
37#undef bit_ffs
38#define	bit_ffs(name, nbits, value) { \
39	register bitstr_t *_name = name; \
40	register int _byte, _nbits = nbits; \
41	register int _stopbyte = _bit_byte(_nbits), _value = -1; \
42	for (_byte = 0; _byte <= _stopbyte; ++_byte) \
43		if (_name[_byte]) { \
44			_value = _byte << 3; \
45			for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
46			    ++_value, _stopbyte >>= 1); \
47			break; \
48		} \
49	*(value) = _value; \
50}
51
52/*
53  Algorithms to setup tags so that submatch addressing can be done.
54*/
55
56
57#ifdef TRE_DEBUG
58static const char *tag_dir_str[] = {
59  "minimize",
60  "maximize",
61  "left-maximize"
62};
63
64static const char _indent[] = "  ";
65
66static void
67print_indent(int indent)
68{
69  while (indent-- > 0)
70    DPRINT((_indent));
71}
72
73static void print_last_matched_pre(tre_last_matched_pre_t *lm, int indent,
74				   int num_tags);
75static void
76print_last_match_branch_pre(tre_last_matched_branch_pre_t *branch, int indent,
77			    int num_tags)
78{
79  tre_last_matched_pre_t *u = branch->last_matched;
80  int n_last_matched = 0;
81
82  while (u)
83    {
84      n_last_matched++;
85      u = u->next;
86    }
87
88  print_indent(indent);
89  DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
90	  branch->tot_branches, branch->tot_last_matched, branch->tot_tags));
91  print_indent(indent);
92  DPRINT(("..n_last_matched=%d last_matched=%d\n", branch->n_last_matched,
93	  n_last_matched));
94  if (branch->n_last_matched != n_last_matched)
95    DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
96  if (branch->cmp_tag > 0)
97    {
98      int i;
99      const char *sep = " tags=";
100      print_indent(indent);
101      DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
102      for (i = 0; i < num_tags; i++)
103	if (bit_test(branch->tags, i))
104	  {
105	    DPRINT(("%s%d", sep, i));
106	    sep = ",";
107	  }
108      DPRINT(("\n"));
109    }
110
111  u = branch->last_matched;
112  indent++;
113  while (u)
114    {
115      print_last_matched_pre(u, indent, num_tags);
116      u = u->next;
117    }
118}
119
120static void
121print_last_matched_pre(tre_last_matched_pre_t *lm, int indent, int num_tags)
122{
123  tre_last_matched_branch_pre_t *b = lm->branches;
124  int n_branches = 0;
125
126  while (b)
127    {
128      n_branches++;
129      b = b->next;
130    }
131
132  print_indent(indent);
133  DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
134	  lm->tot_branches, lm->tot_last_matched, lm->tot_tags));
135  print_indent(indent);
136  DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm->start_tag,
137	  lm->n_branches, n_branches));
138  if (lm->n_branches != n_branches)
139    DPRINT(("*** mismatch between n and branches ***\n"));
140
141  b = lm->branches;
142  indent++;
143  while (b)
144    {
145      print_last_match_branch_pre(b, indent, num_tags);
146      b = b->next;
147    }
148}
149
150static void print_last_matched(tre_last_matched_t *lm, int indent);
151static void
152print_last_match_branch(tre_last_matched_branch_t *branch, int indent)
153{
154  tre_last_matched_t *u;
155  int i;
156
157  print_indent(indent);
158  DPRINT(("BRANCH: n_last_matched=%d\n", branch->n_last_matched));
159  if (branch->cmp_tag > 0)
160    {
161      print_indent(indent);
162      DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
163      if (branch->n_tags > 0)
164	{
165	  const char *sep = " tags=";
166	  for (i = 0; i < branch->n_tags; i++)
167	    {
168	      DPRINT(("%s%d", sep, branch->tags[i]));
169	      sep = ",";
170	    }
171	}
172      DPRINT(("\n"));
173    }
174
175  u = branch->last_matched;
176  indent++;
177  for (i = branch->n_last_matched; i > 0; i--, u++)
178    print_last_matched(u, indent);
179}
180
181static void
182print_last_matched(tre_last_matched_t *lm, int indent)
183{
184  int i;
185  tre_last_matched_branch_t *b;
186
187  print_indent(indent);
188  DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm->n_branches,
189	  lm->start_tag));
190
191  b = lm->branches;
192  indent++;
193  for (i = lm->n_branches; i > 0; i--, b++)
194    print_last_match_branch(b, indent);
195}
196#endif /* TRE_DEBUG */
197
198
199/* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
200   one if needed.  If tag_id > 0, add that tag as well (a negative tag_id will
201   create an unset tre_last_matched_branch_pre_t. */
202static reg_errcode_t
203tre_merge_branches(tre_mem_t mem, tre_ast_node_t *dst, tre_ast_node_t *src,
204		   int tag_id, int num_tags)
205{
206  tre_last_matched_branch_pre_t *db = dst->last_matched_branch;
207  tre_last_matched_branch_pre_t *sb = (src ? src->last_matched_branch : NULL);
208
209  if (db)
210    {
211      if (sb)
212	{
213	  bitstr_t *l = db->tags;
214	  bitstr_t *r = sb->tags;
215	  int i = bitstr_size(num_tags);
216
217	  while(i-- > 0)
218	    *l++ |= *r++;
219	  /* db and sb are the info from two parallel sub-trees, so the tags
220	     must be mutually exclusive, and we can just add their numbers */
221	  db->n_tags += sb->n_tags;
222	  db->tot_tags += sb->tot_tags;
223	  if (db->last_matched)
224	    {
225	      if (sb->last_matched)
226		{
227		  tre_last_matched_pre_t *u = db->last_matched;
228
229		  while(u->next)
230		    u = u->next;
231		  u->next = sb->last_matched;
232		  db->n_last_matched += sb->n_last_matched;
233		  db->tot_branches += sb->tot_branches;
234		  db->tot_last_matched += sb->tot_last_matched;
235		}
236	    }
237	    else if (sb->last_matched)
238	      {
239		db->last_matched = sb->last_matched;
240		db->n_last_matched = sb->n_last_matched;
241		db->tot_branches = sb->tot_branches;
242		db->tot_last_matched = sb->tot_last_matched;
243	      }
244	}
245    }
246  else
247    db = sb;
248
249  if (tag_id != 0)
250    {
251      if (!db)
252	{
253	  db = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
254			      + bitstr_size(num_tags));
255	  if (db == NULL)
256	    return REG_ESPACE;
257	  db->tot_branches = 1;
258	}
259      if (tag_id > 0)
260	{
261	  /* tag_id is a new tag, and shouldn't exist in db's tags,
262	     so we can always increment n_tags */
263	  bit_set(db->tags, tag_id);
264	  db->n_tags++;
265	  db->tot_tags++;
266	}
267    }
268  dst->last_matched_branch = db;
269  return REG_OK;
270}
271
272
273/* Inserts a catenation node to the root of the tree given in `node'.
274   As the left child a new tag with number `tag_id' to `node' is added,
275   and the right child is the old root. */
276static reg_errcode_t
277tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
278{
279  tre_catenation_t *c;
280
281  DPRINT(("add_tag_left: tag %d\n", tag_id));
282
283  c = tre_mem_alloc(mem, sizeof(*c));
284  if (c == NULL)
285    return REG_ESPACE;
286  c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
287  if (c->left == NULL)
288    return REG_ESPACE;
289  c->right = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
290  if (c->right == NULL)
291    return REG_ESPACE;
292
293  c->right->obj = node->obj;
294  c->right->type = node->type;
295  c->right->last_matched_branch = node->last_matched_branch;
296  c->right->nullable = -1;
297  c->right->submatch_id = -1;
298  node->obj = c;
299  node->type = CATENATION;
300  node->original = c->right;
301  return REG_OK;
302}
303
304/* Inserts a catenation node to the root of the tree given in `node'.
305   As the right child a new tag with number `tag_id' to `node' is added,
306   and the left child is the old root. */
307static reg_errcode_t
308tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
309{
310  tre_catenation_t *c;
311
312  DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
313
314  c = tre_mem_alloc(mem, sizeof(*c));
315  if (c == NULL)
316    return REG_ESPACE;
317  c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
318  if (c->right == NULL)
319    return REG_ESPACE;
320  c->left = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
321  if (c->left == NULL)
322    return REG_ESPACE;
323
324  c->left->obj = node->obj;
325  c->left->type = node->type;
326  c->left->last_matched_branch = node->last_matched_branch;
327  c->left->nullable = -1;
328  c->left->submatch_id = -1;
329  node->obj = c;
330  node->type = CATENATION;
331  node->original = c->left;
332  return REG_OK;
333}
334
335typedef enum {
336  ADDTAGS_RECURSE,
337  ADDTAGS_RECURSE_NOT_TOP_UNION,
338  ADDTAGS_AFTER_ITERATION,
339  ADDTAGS_AFTER_UNION_LEFT,
340  ADDTAGS_AFTER_UNION_RIGHT,
341  ADDTAGS_AFTER_CAT_LEFT,
342  ADDTAGS_AFTER_CAT_RIGHT,
343  ADDTAGS_SET_SUBMATCH_END,
344  ADDTAGS_UNION_RECURSE,
345  ADDTAGS_UNION_RIGHT_RECURSE,
346  ADDTAGS_AFTER_UNION_TOP,
347} tre_addtags_symbol_t;
348
349enum {
350  COPY_LAST_MATCHED_BRANCH,
351  COPY_LAST_MATCHED_BRANCH_NEXT,
352  COPY_LAST_MATCHED,
353  COPY_LAST_MATCHED_NEXT,
354};
355
356
357#define REGSET_UNSET		((unsigned)-1)
358
359/* Go through `regset' and set submatch data for submatches that are
360   using this tag. */
361static void
362tre_purge_regset(unsigned *regset, tre_tnfa_t *tnfa, int tag)
363{
364  int i;
365
366  for (i = 0; regset[i] != REGSET_UNSET; i++)
367    {
368      int id = regset[i] / 2;
369      int start = !(regset[i] % 2);
370      if (id >= SUBMATCH_ID_INVISIBLE_START)
371	continue;
372      DPRINT(("  Using tag %d for %s offset of "
373	      "submatch %d\n", tag,
374	      start ? "start" : "end", id));
375      if (start)
376	tnfa->submatch_data[id].so_tag = tag;
377      else
378	tnfa->submatch_data[id].eo_tag = tag;
379    }
380  regset[0] = -1;
381}
382
383
384#define REGSET_HAS_STARTS	0x1
385#define REGSET_HAS_ENDS		0x2
386
387
388/* Adds tags to appropriate locations in the parse tree in `tree', so that
389   subexpressions marked for submatch addressing can be traced. */
390static reg_errcode_t
391tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
392	     tre_tnfa_t *tnfa)
393{
394  reg_errcode_t status = REG_OK;
395  tre_addtags_symbol_t symbol;
396  tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
397  int bottom = tre_stack_num_objects(stack);
398  /* True for first pass (counting number of needed tags) */
399  int first_pass = (mem == NULL || tnfa == NULL);
400  unsigned *regset, *orig_regset;
401  int regset_contains = 0;
402  int num_tags = 0; /* Total number of tags. */
403  int num_minimals = 0;	 /* Number of special minimal tags. */
404  int tag = 0;	    /* The tag that is to be added next. */
405  int next_tag = 1; /* Next tag to use after this one. */
406  int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
407  int *reorder_tags = NULL; /* Tag reorder array: a pair for each reorder,
408		             * the first is the tag to reorder, the second
409		             * is the tag after which the first is reordered */
410  int *rtp;		    /* Pointer used to fill in reorder_tags and
411			     * tag_order */
412  int *to_reorder;          /* Transform array converting sequential order to
413		             * that specified by reorder_tags */
414  int id;
415
416  tre_tag_direction_t direction = TRE_TAG_LEFT_MAXIMIZE;
417  if (!first_pass)
418    {
419      DPRINT(("Initializing direction to %s\n", tag_dir_str[direction]));
420      tnfa->end_tag = 0;
421      tnfa->minimal_tags[0] = -1;
422    }
423
424  regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches
425		   + tnfa->num_submatches_invisible + 1) * 2));
426  if (regset == NULL)
427    {
428      status = REG_ESPACE;
429      goto error_regset;
430    }
431  regset[0] = REGSET_UNSET;
432  orig_regset = regset;
433
434  if (!first_pass)
435    {
436      /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
437       * to_reorder in one batch (assuming all are the same type) */
438      rtp = reorder_tags = xmalloc(sizeof(*reorder_tags) *
439				   ((2 * tnfa->num_reorder_tags + 1) +
440				   tnfa->num_tags));
441      if (reorder_tags == NULL)
442	{
443	  status = REG_ESPACE;
444	  goto error_reorder_tags;
445	}
446      to_reorder = reorder_tags + (2 * tnfa->num_reorder_tags + 1);
447    }
448
449  STACK_PUSH(stack, voidptr, node);
450  STACK_PUSH(stack, int, ADDTAGS_RECURSE);
451
452  while (tre_stack_num_objects(stack) > bottom)
453    {
454      if (status != REG_OK)
455	break;
456
457      symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
458      switch (symbol)
459	{
460	  int top_union;
461
462	case ADDTAGS_SET_SUBMATCH_END:
463	  {
464	    int i;
465
466	    id = tre_stack_pop_int(stack);
467	    node = tre_stack_pop_voidptr(stack);
468	    /* Add end of this submatch to regset. */
469	    for (i = 0; regset[i] != REGSET_UNSET; i++);
470	    regset[i] = id * 2 + 1;
471	    regset[i + 1] = -1;
472	    regset_contains |= REGSET_HAS_ENDS;
473
474	    /* Always put a tag after a minimal iterator. */
475	    if (minimal_tag >= 0)
476	      {
477		if (first_pass)
478		  {
479		    node->num_tags++;
480		    DPRINT(("  ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
481			    node->num_tags));
482		  }
483		else
484		  {
485		    int i;
486		    status = tre_merge_branches(mem, node, NULL, tag,
487						tnfa->num_tags);
488		    if (status != REG_OK)
489		      break;
490		    status = tre_add_tag_right(mem, node, tag);
491		    if (status != REG_OK)
492		      break;
493		    tnfa->tag_directions[tag] = TRE_TAG_MINIMIZE;
494		    DPRINT(("Setting t%d direction to %s\n", tag,
495			    tag_dir_str[tnfa->tag_directions[tag]]));
496		    DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
497		    for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
498		    tnfa->minimal_tags[i] = tag;
499		    tnfa->minimal_tags[i + 1] = minimal_tag;
500		    tnfa->minimal_tags[i + 2] = -1;
501
502		    DPRINT(("  Minimal end: t%d reordered to "
503			    "after t%d\n", tag, minimal_tag));
504		    /* Append to tag_order, move "tag" after
505		     * "minimal_tag" */
506		    *rtp++ = tag;
507		    *rtp++ = minimal_tag;
508
509		    num_minimals++;
510		    tre_purge_regset(regset, tnfa, tag);
511		  }
512
513		minimal_tag = -1;
514		DPRINT(("  ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag));
515		regset[0] = REGSET_UNSET;
516		regset_contains = 0;
517		tag = next_tag;
518		num_tags++;
519		next_tag++;
520	      }
521	    break;
522	  }
523
524	case ADDTAGS_RECURSE_NOT_TOP_UNION:
525	  /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
526	   * indicating that if a union is being processed, it is not the
527	   * top-most of a series */
528	  top_union = 0;
529	  goto do_addtags_recurse;
530
531	case ADDTAGS_RECURSE:
532	  /* Setting top_union to 1 means that if a union is begin processed,
533	   * it is the top-most of a series, and should recurse through the
534	   * series to set the left_tag and right_tag values */
535	  top_union = 1;
536
537do_addtags_recurse:
538	  node = tre_stack_pop_voidptr(stack);
539
540	  id = node->submatch_id;
541	  if (id >= 0)
542	    {
543	      int i;
544
545
546	      /* Add start of this submatch to regset. */
547	      for (i = 0; regset[i] != REGSET_UNSET; i++);
548	      regset[i] = id * 2;
549	      regset[i + 1] = -1;
550	      regset_contains |= REGSET_HAS_STARTS;
551
552	      /* Add end of this submatch to regset after processing this
553		 node. */
554	      STACK_PUSH(stack, voidptr, node);
555	      STACK_PUSHX(stack, int, id);
556	      STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
557	    }
558
559	  switch (node->type)
560	    {
561	    case LITERAL:
562	      {
563		tre_literal_t *lit = node->obj;
564
565		if (!IS_SPECIAL(lit) || IS_BACKREF(lit) || IS_EMPTY(lit) || IS_ASSERTION(lit))
566		  {
567		    DPRINT(("Literal %d-%d\n",
568			    (int)lit->code_min, (int)lit->code_max));
569		    if (regset_contains)
570		      {
571			/* Regset is not empty, so add a tag before the
572			   literal or backref. */
573			if (first_pass)
574			  {
575			    DPRINT(("  ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
576			    node->num_tags = 1;
577			  }
578			else
579			  {
580			    status = tre_merge_branches(mem, node, NULL, tag,
581							tnfa->num_tags);
582			    if (status != REG_OK)
583			      break;
584			    status = tre_add_tag_left(mem, node, tag);
585			    if (status != REG_OK)
586			      break;
587			    if (regset_contains == REGSET_HAS_STARTS)
588			      tnfa->tag_directions[tag] = TRE_TAG_LEFT_MAXIMIZE;
589			    else
590			      tnfa->tag_directions[tag] = direction;
591			    DPRINT(("Setting t%d direction to %s\n", tag,
592				    tag_dir_str[tnfa->tag_directions[tag]]));
593			    tre_purge_regset(regset, tnfa, tag);
594
595			    if (IS_BACKREF(lit))
596			      {
597				int b = lit->code_max;
598				int t = tnfa->submatch_data[b].so_tag;
599				/* Fail if the referenced submatch hasn't been
600				 * completed yet */
601				if (tnfa->submatch_data[b].eo_tag < 0)
602				  {
603				    status = REG_ESUBREG;
604				    break;
605				  }
606				if (t < tag)
607				  {
608				    DPRINT(("  Backref %d start: "
609					    "t%d reordered to before t%d\n",
610					    b, tag, t));
611				    if(t > 0)
612				      t--;
613				    /* Append to tag_order, move "tag" after
614				     * "t" */
615				    *rtp++ = tag;
616				    *rtp++ = t;
617				  }
618#if TRE_DEBUG
619				else
620				  DPRINT(("  Backref %d start: "
621					  "(t%d already before t%d)\n",
622					    b, tag, t));
623#endif /* TRE_DEBUG */
624			      }
625			  }
626
627			DPRINT(("  ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
628				tag));
629			regset[0] = REGSET_UNSET;
630			regset_contains = 0;
631			tag = next_tag;
632			num_tags++;
633			next_tag++;
634		      }
635		  }
636		else
637		  {
638		    assert(!IS_TAG(lit));
639		  }
640		break;
641	      }
642	    case CATENATION:
643	      {
644		tre_catenation_t *cat = node->obj;
645		tre_ast_node_t *left = cat->left;
646		tre_ast_node_t *right = cat->right;
647		int reserved_tag = -1;
648		DPRINT(("Catenation, next_tag = %d\n", next_tag));
649
650
651		/* After processing right child. */
652		STACK_PUSHX(stack, voidptr, node);
653		STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
654
655		/* Process right child. */
656		STACK_PUSHX(stack, voidptr, right);
657		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
658
659		/* After processing left child. */
660		STACK_PUSHX(stack, int, next_tag + left->num_tags);
661		DPRINT(("  Pushing %d for after left\n",
662			next_tag + left->num_tags));
663		if (left->num_tags > 0 && right->num_tags > 0)
664		  {
665		    /* Reserve the next tag to the right child. */
666		    DPRINT(("  ADDTAGS_RECURSE:CATENATION num_tags++ "
667			    "Reserving next_tag %d to right child\n",
668			    next_tag));
669		    reserved_tag = next_tag;
670		    next_tag++;
671		  }
672		STACK_PUSHX(stack, int, reserved_tag);
673		STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
674
675		/* Process left child. */
676		STACK_PUSHX(stack, voidptr, left);
677		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
678
679		}
680	      break;
681	    case ITERATION:
682	      {
683		tre_iteration_t *iter = node->obj;
684		DPRINT(("Iteration\n"));
685
686		if (first_pass)
687		  STACK_PUSHX(stack, int, regset_contains != 0);
688		STACK_PUSHX(stack, int, tag);
689		STACK_PUSHX(stack, voidptr, node);
690		STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
691
692		STACK_PUSHX(stack, voidptr, iter->arg);
693		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
694
695		/* Regset is not empty, so add a tag here (this always happens
696		   because iterators always get submatch id, even if in the
697		   invisible range) */
698		if (regset_contains)
699		  {
700		    if (!first_pass)
701		      {
702			status = tre_merge_branches(mem, node, NULL, tag,
703						    tnfa->num_tags);
704			if (status != REG_OK)
705			  break;
706			status = tre_add_tag_left(mem, node, tag);
707			if (status != REG_OK)
708			  break;
709			if (regset_contains == REGSET_HAS_STARTS && tag != 0)
710			  tnfa->tag_directions[tag] = iter->minimal ?
711						      TRE_TAG_MINIMIZE :
712						      TRE_TAG_LEFT_MAXIMIZE;
713			else
714			  tnfa->tag_directions[tag] = direction;
715			DPRINT(("Setting t%d direction to %s\n", tag,
716				tag_dir_str[tnfa->tag_directions[tag]]));
717			tre_purge_regset(regset, tnfa, tag);
718		      }
719
720		    DPRINT(("  ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
721			    tag));
722		    regset[0] = REGSET_UNSET;
723		    regset_contains = 0;
724		    tag = next_tag;
725		    num_tags++;
726		    next_tag++;
727		  }
728		direction = TRE_TAG_LEFT_MAXIMIZE;
729		DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
730	      }
731	      break;
732	    case UNION:
733	      {
734		tre_union_t *uni;
735		tre_ast_node_t *left;
736		tre_ast_node_t *right;
737		int front_tag = -1;
738
739		DPRINT(("Union\n"));
740
741		if (regset_contains)
742		  {
743		    DPRINT(("  UNION num_tags++ tag=%d\n", tag));
744		    front_tag = tag;
745		    tag = next_tag;
746		    num_tags++;
747		    next_tag++;
748		  }
749
750		/* For the top union, walk the tree of consecutive unions,
751		 * setting the left_tag and right_tag values in increasing
752		 * order (left to right priority) */
753		if (top_union &&
754		    (node->num_submatches -
755		    (node->submatch_id >= 0 &&
756		    node->submatch_id < SUBMATCH_ID_INVISIBLE_START)) > 0)
757		  {
758		    tre_ast_node_t *n;
759		    int last = tre_stack_num_objects(stack);
760
761		    STACK_PUSH(stack, voidptr, node);
762		    STACK_PUSH(stack, int, ADDTAGS_UNION_RECURSE);
763
764		    while (tre_stack_num_objects(stack) > last)
765		      {
766			symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
767			switch (symbol)
768			  {
769			  case ADDTAGS_UNION_RECURSE:
770			    n = tre_stack_pop_voidptr(stack);
771			    uni = n->obj;
772			    left = uni->left;
773
774			    /* Since the top union has num_submatches > 0,
775			     * we set all the consecutive union's
776			     * make_branches to 1 to force the generation
777			     * of end tags for each union branch. */
778			    n->make_branches = 1;
779
780			    STACK_PUSH(stack, voidptr, n);
781			    STACK_PUSH(stack, int,
782				       ADDTAGS_UNION_RIGHT_RECURSE);
783
784			    if (left->type == UNION)
785			      {
786				STACK_PUSH(stack, voidptr, left);
787				STACK_PUSH(stack, int,
788					   ADDTAGS_UNION_RECURSE);
789			      }
790			    else
791			      {
792				DPRINT(("  ADDTAGS_UNION_RECURSE "
793					"num_tags++ tag=%d\n", tag));
794				uni->left_tag = tag;
795				tag = next_tag;
796				num_tags++;
797				next_tag++;
798			      }
799			    break;
800
801			  case ADDTAGS_UNION_RIGHT_RECURSE:
802			    n = tre_stack_pop_voidptr(stack);
803			    uni = n->obj;
804			    right = uni->right;
805
806			    if (right->type == UNION)
807			      {
808				STACK_PUSH(stack, voidptr, right);
809				STACK_PUSH(stack, int,
810					   ADDTAGS_UNION_RECURSE);
811			      }
812			    else
813			      {
814				DPRINT(("  ADDTAGS_UNION_RIGHT_RECURSE "
815					"num_tags++ tag=%d\n", tag));
816				uni->right_tag = tag;
817				tag = next_tag;
818				num_tags++;
819				next_tag++;
820			      }
821
822			    break;
823
824			  default:
825			    assert(0);
826			    break;
827
828			  } /* end switch(symbol) */
829		      } /* end while(tre_stack_num_objects(stack) > last */
830		    if (!first_pass)
831		      {
832			STACK_PUSHX(stack, int, front_tag);
833			STACK_PUSHX(stack, voidptr, node);
834			STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_TOP);
835		      }
836		  } /* end if (top_union && ...) */
837
838		uni = node->obj;
839		left = uni->left;
840		right = uni->right;
841
842		/* After processing right child. */
843		STACK_PUSHX(stack, voidptr, regset);
844		STACK_PUSHX(stack, int, regset_contains != 0);
845		STACK_PUSHX(stack, voidptr, node);
846		STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
847
848		/* Process right child. */
849		STACK_PUSHX(stack, voidptr, right);
850		STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
851
852		/* After processing left child. */
853		STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
854
855		/* Process left child. */
856		STACK_PUSHX(stack, voidptr, left);
857		STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
858
859		/* Regset is not empty, so add a tag here. */
860		if (regset_contains)
861		  {
862		    if (!first_pass)
863		      {
864			status = tre_merge_branches(mem, node, NULL, front_tag,
865						    tnfa->num_tags);
866			if (status != REG_OK)
867			  break;
868			status = tre_add_tag_left(mem, node, front_tag);
869			if (status != REG_OK)
870			  break;
871			if (regset_contains == REGSET_HAS_STARTS)
872			  tnfa->tag_directions[front_tag] = TRE_TAG_LEFT_MAXIMIZE;
873			else
874			  tnfa->tag_directions[front_tag] = direction;
875			DPRINT(("Setting t%d direction to %s\n", front_tag,
876				tag_dir_str[tnfa->tag_directions[front_tag]]));
877			tre_purge_regset(regset, tnfa, front_tag);
878		      }
879
880		    regset[0] = REGSET_UNSET;
881		    regset_contains = 0;
882		  }
883
884		break;
885	      }
886	    } /* end switch (node->type) */
887
888	  break; /* end case: ADDTAGS_RECURSE */
889
890	case ADDTAGS_AFTER_ITERATION:
891	  {
892	    tre_iteration_t *iter;
893	    tre_ast_node_t *orig;
894	    int enter_tag;
895
896	    node = tre_stack_pop_voidptr(stack);
897	    orig = node->original ? node->original : node;
898	    iter = (tre_iteration_t *)orig->obj;
899	    enter_tag = tre_stack_pop_int(stack);
900	    if (iter->minimal)
901	      minimal_tag = enter_tag;
902
903	    DPRINT(("After iteration\n"));
904	    if (first_pass)
905	      {
906		node->num_tags = iter->arg->num_tags + tre_stack_pop_int(stack);
907		DPRINT(("  ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
908			node->num_tags));
909	      }
910	    else
911	      {
912		/* node->last_matched_branch will have the start tag (the tag
913		   just *before* the iteration).  iter->arg->last_matched_branch
914		   will have the tag(s) inside the iteration, the ones that
915		   may need to be reset if the iteration doesn't match.  So
916		   before we merge iter->arg into node, we need to set up
917		   a new tre_last_matched_t and tre_last_matched_branch_t,
918		   using any of the inside tags as cmp_tag (we choose the first
919		   tag found by bit_ffs).  If there are no inside tags, we
920		   don't bother creating the extra structures. */
921		tre_last_matched_branch_pre_t *b =
922						iter->arg->last_matched_branch;
923
924		if (b && b->n_tags > 0)
925		  {
926		    tre_last_matched_pre_t *u;
927
928		    bit_ffs(b->tags, num_tags, &b->cmp_tag);
929		    DPRINT(("  ADDTAGS_AFTER_ITERATION: n_tags=%d "
930			    "cmp_tag = %d\n", b->n_tags, b->cmp_tag));
931
932		    u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t) +
933				       sizeof(tre_last_matched_branch_pre_t)
934				       + bitstr_size(tnfa->num_tags));
935		    if (!u)
936		      {
937			status = REG_ESPACE;
938			break;
939		      }
940		    u->branches = b;
941		    u->n_branches = 1;
942		    u->start_tag = b->cmp_tag;
943		    u->tot_branches = b->tot_branches;
944		    u->tot_last_matched = 1 + b->tot_last_matched;
945		    u->tot_tags = b->tot_tags;
946
947		    b = (tre_last_matched_branch_pre_t *)(u + 1);
948		    b->last_matched = u;
949		    b->n_last_matched = 1;
950		    b->tot_branches = 1 + u->tot_branches;
951		    b->tot_last_matched = u->tot_last_matched;
952		    b->tot_tags = u->tot_tags;
953
954		    iter->arg->last_matched_branch = b;
955		  }
956		status = tre_merge_branches(mem, node, iter->arg, 0,
957					    tnfa->num_tags);
958		if (status != REG_OK)
959		  break;
960
961		if (iter->minimal)
962		  {
963		    /* Add a union with a left EMPTY literal and the right
964		       being iter->arg.  This should force the tags inside
965		       the minimal iteration to prefer being unset */
966		    if (iter->min == 0 && iter->max <= 1)
967		      {
968			tre_ast_node_t *u, *e;
969
970			e = tre_ast_new_literal(mem, EMPTY, -1, -1);
971			if (e == NULL)
972			  {
973			    status = REG_ESPACE;
974			    break;
975			  }
976			u = tre_ast_new_union(mem, e, iter->arg);
977			if (u == NULL)
978			  {
979			    status = REG_ESPACE;
980			    break;
981			  }
982			iter->arg = u;
983		      }
984
985		    direction = TRE_TAG_MINIMIZE;
986		  }
987		else
988		  direction = TRE_TAG_MAXIMIZE;
989		DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
990	      }
991	    break;
992	  }
993
994	case ADDTAGS_AFTER_CAT_LEFT:
995	  {
996	    int new_tag = tre_stack_pop_int(stack);
997	    next_tag = tre_stack_pop_int(stack);
998	    DPRINT(("After cat left, tag = %d, next_tag = %d\n",
999		    tag, next_tag));
1000	    if (new_tag >= 0)
1001	      {
1002		DPRINT(("  Setting tag to %d\n", new_tag));
1003		tag = new_tag;
1004	      }
1005	    break;
1006	  }
1007
1008	case ADDTAGS_AFTER_CAT_RIGHT:
1009	  {
1010	    tre_catenation_t *cat;
1011
1012	    DPRINT(("After cat right\n"));
1013	    node = tre_stack_pop_voidptr(stack);
1014	    cat = node->obj;
1015	    if (first_pass)
1016	      {
1017		node->num_tags = cat->left->num_tags + cat->right->num_tags;
1018		DPRINT(("  ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1019			node->num_tags));
1020	      }
1021	    else
1022	      {
1023		status = tre_merge_branches(mem, cat->left, cat->right, 0,
1024					    tnfa->num_tags);
1025		if (status != REG_OK)
1026		  break;
1027		status = tre_merge_branches(mem, node, cat->left, 0,
1028					    tnfa->num_tags);
1029	      }
1030	    break;
1031	  }
1032
1033	case ADDTAGS_AFTER_UNION_LEFT:
1034	  DPRINT(("After union left\n"));
1035	  /* Lift the bottom of the `regset' array so that when processing
1036	     the right operand the items currently in the array are
1037	     invisible.	 The original bottom was saved at ADDTAGS_UNION and
1038	     will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1039	  while (*regset != REGSET_UNSET)
1040	    regset++;
1041	  regset_contains = 0;
1042	  break;
1043
1044	case ADDTAGS_AFTER_UNION_RIGHT:
1045	  {
1046	    int added_tags;
1047	    tre_ast_node_t *orig;
1048	    tre_union_t *uni;
1049	    /* Note: node may not be a UNION, but a CATENATION with a left
1050	     * tag.  So that is why we pass the original node->obj on the
1051	     * stack, to get the union's true values. */
1052
1053	    DPRINT(("After union right\n"));
1054	    node = tre_stack_pop_voidptr(stack);
1055	    orig = node->original ? node->original : node;
1056	    uni = (tre_union_t *)orig->obj;
1057	    added_tags = tre_stack_pop_int(stack);
1058	    if (first_pass)
1059	      {
1060		node->num_tags = uni->left->num_tags + uni->right->num_tags
1061				 + added_tags;
1062		if (uni->left_tag > 0)
1063		  node->num_tags++;
1064		if (uni->right_tag > 0)
1065		  node->num_tags++;
1066		DPRINT(("  ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1067			node->num_tags));
1068	      }
1069	    regset = tre_stack_pop_voidptr(stack);
1070
1071	    /* Add tags after both children, the left child gets a smaller
1072	       tag than the right child.  This guarantees that we prefer
1073	       the left child over the right child. */
1074	    /* XXX - This is not always necessary (if the children have
1075	       tags which must be seen for every match of that child). */
1076	    if (!first_pass && node->make_branches)
1077	      {
1078		tre_last_matched_branch_pre_t *lb =
1079		  uni->left->last_matched_branch;
1080		tre_last_matched_branch_pre_t *rb =
1081		  uni->right->last_matched_branch;
1082		tre_last_matched_pre_t *lu =
1083		  uni->left->last_matched_in_progress;
1084		tre_last_matched_pre_t *ru =
1085		  uni->right->last_matched_in_progress;
1086		tre_last_matched_pre_t *u;
1087		/* We don't need to call tre_merge_branches because these
1088		 * tags don't participate in submatch ranges, so don't need
1089		 * to be recorded.  But we do set the cmp_tag entry of the
1090		 * tre_last_matched_branch_pre_t, so we might call
1091		 * tre_merge_branches if we need to create an empty
1092		 * tre_last_matched_branch_pre_t. */
1093		if (uni->left_tag > 0)
1094		  {
1095		    DPRINT(("Setting t%d direction to maximize\n",
1096			    uni->left_tag));
1097		    status = tre_add_tag_right(mem, uni->left, uni->left_tag);
1098		    if (status != REG_OK)
1099		      break;
1100		    tnfa->tag_directions[uni->left_tag] = TRE_TAG_MAXIMIZE;
1101		    if (!lb)
1102		      {
1103			status = tre_merge_branches(mem, uni->left, NULL, -1,
1104						    tnfa->num_tags);
1105			if (status != REG_OK)
1106			  break;
1107			lb = uni->left->last_matched_branch;
1108		      }
1109		    lb->cmp_tag = uni->left_tag;
1110		  }
1111		if (uni->right_tag > 0)
1112		  {
1113		    DPRINT(("Setting t%d direction to maximize\n",
1114			    uni->right_tag));
1115		    status = tre_add_tag_right(mem, uni->right, uni->right_tag);
1116		    if (status != REG_OK)
1117		      break;
1118		    tnfa->tag_directions[uni->right_tag] = TRE_TAG_MAXIMIZE;
1119		    if (!rb)
1120		      {
1121			status = tre_merge_branches(mem, uni->right, NULL, -1,
1122						    tnfa->num_tags);
1123			if (status != REG_OK)
1124			  break;
1125			rb = uni->right->last_matched_branch;
1126		      }
1127		    rb->cmp_tag = uni->right_tag;
1128		  }
1129		/* Now merge the tre_last_matched_branch_pre_t into a
1130		   tre_last_matched_pre_t */
1131		if (lu == NULL)
1132		  {
1133		    if (ru == NULL)
1134		      {
1135			/* Create a new tre_last_matched_pre_t */
1136			u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t));
1137			if (!u)
1138			  {
1139			    status = REG_ESPACE;
1140			    break;
1141			  }
1142			u->tot_last_matched = 1;
1143
1144			if (lb)
1145			  {
1146			    u->branches = lb;
1147			    u->n_branches = 1;
1148			    u->tot_branches += lb->tot_branches;
1149			    u->tot_last_matched += lb->tot_last_matched;
1150			    u->tot_tags += lb->tot_tags;
1151			    if (rb)
1152			      {
1153				lb->next = rb;
1154				u->n_branches++;
1155				u->tot_branches += rb->tot_branches;
1156				u->tot_last_matched += rb->tot_last_matched;
1157				u->tot_tags += rb->tot_tags;
1158			      }
1159			  }
1160			else if (rb)
1161			  {
1162			    u->branches = rb;
1163			    u->n_branches = 1;
1164			    u->tot_branches += rb->tot_branches;
1165			    u->tot_last_matched += rb->tot_last_matched;
1166			    u->tot_tags += rb->tot_tags;
1167			  }
1168		      }
1169		    else
1170		      {
1171			/* Use ru, and add lb */
1172			u = ru;
1173			if (lb)
1174			  {
1175			    lb->next = u->branches;
1176			    u->branches = lb;
1177			    u->n_branches++;
1178			    u->tot_branches += lb->tot_branches;
1179			    u->tot_last_matched += lb->tot_last_matched;
1180			    u->tot_tags += lb->tot_tags;
1181			  }
1182		      }
1183		  }
1184		else if (ru == NULL)
1185		  {
1186		    /* Use lu, and add rb */
1187		    u = lu;
1188		    if (rb)
1189		      {
1190			rb->next = u->branches;
1191			u->branches = rb;
1192			u->n_branches++;
1193			u->tot_branches += rb->tot_branches;
1194			u->tot_last_matched += rb->tot_last_matched;
1195			u->tot_tags += rb->tot_tags;
1196		      }
1197		  }
1198		else
1199		  {
1200		    /* Merge lu and ru into lu */
1201		    if (lu->branches)
1202		      {
1203			if (ru->branches)
1204			  {
1205			    tre_last_matched_branch_pre_t *b = lu->branches;
1206			    while (b->next) b = b->next;
1207			    b->next = ru->branches;
1208			    lu->n_branches += ru->n_branches;
1209			  }
1210		      }
1211		    else if (ru->branches)
1212		      {
1213			lu->branches = ru->branches;
1214			lu->n_branches = ru->n_branches;
1215		      }
1216		    lu->tot_branches += ru->tot_branches;
1217		    lu->tot_last_matched += ru->tot_last_matched - 1;
1218		    lu->tot_tags += ru->tot_tags;
1219		    u = lu;
1220		  }
1221		node->last_matched_in_progress = u;
1222	      }
1223	    direction = TRE_TAG_MAXIMIZE;
1224	    break;
1225	  }
1226
1227	case ADDTAGS_AFTER_UNION_TOP: /* only called when not first_pass */
1228	  {
1229	    tre_last_matched_branch_pre_t *b;
1230	    tre_last_matched_pre_t *u;
1231	    int start_tag;
1232
1233	    DPRINT(("After union top\n"));
1234	    node = tre_stack_pop_voidptr(stack);
1235	    start_tag = tre_stack_pop_int(stack);
1236	    b = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
1237			       + bitstr_size(tnfa->num_tags));
1238	    if (!b)
1239	      {
1240		status = REG_ESPACE;
1241		break;
1242	      }
1243
1244	    u = node->last_matched_in_progress;
1245	    u->start_tag = start_tag;
1246	    b->tot_branches = 1 + u->tot_branches;
1247	    b->tot_last_matched = u->tot_last_matched;
1248	    b->tot_tags = u->tot_tags;
1249	    b->last_matched = u;
1250	    b->n_last_matched = 1;
1251	    node->last_matched_branch = b;
1252	    node->last_matched_in_progress = NULL;
1253	    break;
1254	  }
1255
1256	default:
1257	  assert(0);
1258	  break;
1259
1260	} /* end switch(symbol) */
1261    } /* end while(tre_stack_num_objects(stack) > bottom) */
1262
1263  if (status != REG_OK)
1264    {
1265      DPRINT(("Error during %s pass\n", first_pass ? "first" : "second"));
1266      goto error_post_compile;
1267    }
1268
1269  if (!first_pass)
1270    {
1271      int i;
1272      if (num_tags != tnfa->num_tags)
1273	{
1274	  DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags,
1275		  tnfa->num_tags));
1276	  status = REG_BADPAT;
1277	  goto error_post_compile;
1278	}
1279
1280      tre_purge_regset(regset, tnfa, tag);
1281      DPRINT(("Setting t%d to %s\n", num_tags,
1282	      tag_dir_str[direction]));
1283      tnfa->tag_directions[num_tags] = direction;
1284
1285      if (rtp > reorder_tags + 2 * tnfa->num_reorder_tags)
1286	{
1287	  DPRINT(("Processed %d reorder tags instead of %d\n",
1288		  (int)(rtp - reorder_tags) / 2, tnfa->num_reorder_tags));
1289	  status = REG_BADPAT;
1290	  goto error_post_compile;
1291	}
1292    *rtp = -1;
1293#if TRE_DEBUG
1294      if (reorder_tags[0] >= 0)
1295	{
1296	  DPRINT(("reorder_tags:\n"));
1297	  for (rtp = reorder_tags; *rtp >= 0;)
1298	    {
1299	      DPRINT(("%d after ", *rtp++));
1300	      DPRINT(("%d\n", *rtp++));
1301	    }
1302	}
1303	else
1304	  DPRINT(("No reorder_tags\n"));
1305#endif /* TRE_DEBUG */
1306
1307      /* Initialize to_reorder */
1308      for (i = 0; i < num_tags; i++)
1309	to_reorder[i] = i;
1310      /* Use to_seq_order to convert reorder_tags values, and use those to
1311       * reorder to_reorder */
1312      for (rtp = reorder_tags; *rtp >= 0;)
1313	{
1314	  int j, high, low;
1315	  int ti = *rtp++;
1316
1317	  /* Skip reordering the final tag */
1318	  if (ti >= num_tags)
1319	    {
1320	      DPRINT(("Skipping reorder of %d\n", ti));
1321	      rtp++;
1322	      continue;
1323	    }
1324	  /* The number of the tag to reorder */
1325	  high = to_reorder[ti];
1326	  /* Reorder after this tag */
1327	  low = to_reorder[*rtp++];
1328
1329	  DPRINT(("ti=%d high=%d low=%d\n", ti, high, low));
1330	  if (low > high)
1331	    {
1332	      DPRINT(("Tag %d already before %d\n", high, low));
1333	      continue;
1334	    }
1335	  for (j = 0; j < num_tags; j++)
1336	    if (to_reorder[j] > low && to_reorder[j] < high)
1337	      to_reorder[j]++;
1338	  to_reorder[ti] = low + 1;
1339#ifdef TRE_DEBUG
1340	  DPRINT(("to_reorder=("));
1341	  for (j = 0; j < num_tags; j++)
1342	    {
1343	      DPRINT(("%d", to_reorder[j]));
1344	      if (j < num_tags - 1)
1345		DPRINT((","));
1346	    }
1347	  DPRINT((")\n"));
1348#endif /* TRE_DEBUG */
1349	}
1350      /* Determine if reordering in really necessary */
1351      {
1352	int need_reorder = 0;
1353	for (i = 0; i < num_tags; i++)
1354	  if(to_reorder[i] != i)
1355	    {
1356	      need_reorder = 1;
1357	      break;
1358	    }
1359	/* If need_reorder is not set, free reorder_tags, and set to NULL,
1360	 * indicating no reordering is needed */
1361	if (!need_reorder)
1362	  {
1363	    DPRINT(("Don't need to reorder\n"));
1364	    xfree(reorder_tags);
1365	    reorder_tags = NULL;
1366	  }
1367      }
1368    }
1369
1370  if (reorder_tags)
1371    {
1372      int i;
1373      tre_tag_direction_t *new_tag_directions;
1374#if TRE_DEBUG
1375      DPRINT(("to_reorder:"));
1376      for (i = 0; i < num_tags; i++)
1377	DPRINT((" %d->%d", i, to_reorder[i]));
1378      DPRINT(("\n"));
1379#endif /* TRE_DEBUG */
1380
1381      DPRINT(("Reordering submatch_data\n"));
1382      for (i = 0; i < tnfa->num_submatches; i++)
1383	{
1384#if TRE_DEBUG
1385	  int so = tnfa->submatch_data[i].so_tag;
1386	  int eo = tnfa->submatch_data[i].eo_tag;
1387#endif /* TRE_DEBUG */
1388	  tnfa->submatch_data[i].so_tag =
1389	    to_reorder[tnfa->submatch_data[i].so_tag];
1390	  tnfa->submatch_data[i].eo_tag =
1391	    tnfa->submatch_data[i].eo_tag < num_tags ?
1392	    to_reorder[tnfa->submatch_data[i].eo_tag] :
1393	    tnfa->submatch_data[i].eo_tag;
1394	  DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i, so, eo,
1395		  tnfa->submatch_data[i].so_tag,
1396		  tnfa->submatch_data[i].eo_tag));
1397	}
1398
1399      DPRINT(("Reordering tag_directions\n"));
1400      /* We only allocate num_tags directions and reorder them.  The
1401       * num_tags-th direction (end tag) is left unchanged. */
1402      new_tag_directions = xmalloc(sizeof(*new_tag_directions) * num_tags);
1403      if (new_tag_directions == NULL)
1404	{
1405	  status = REG_ESPACE;
1406	  goto error_post_compile;
1407	}
1408      for (i = 0; i < num_tags; i++)
1409	{
1410	  new_tag_directions[to_reorder[i]] = tnfa->tag_directions[i];
1411	}
1412#if TRE_DEBUG
1413      for (i = 0; i < num_tags; i++)
1414	{
1415	  DPRINT(("t%d %s->%s\n", i,
1416		  tag_dir_str[tnfa->tag_directions[i]],
1417		  tag_dir_str[new_tag_directions[i]]));
1418	}
1419	DPRINT(("t%d %s->%s\n", num_tags,
1420		tag_dir_str[tnfa->tag_directions[num_tags]],
1421		tag_dir_str[tnfa->tag_directions[num_tags]]));
1422#endif /* TRE_DEBUG */
1423      memcpy(tnfa->tag_directions, new_tag_directions, sizeof(*new_tag_directions) * num_tags);
1424      xfree(new_tag_directions);
1425
1426      DPRINT(("Reordering minimal_tags\n"));
1427      for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1428	tnfa->minimal_tags[i] = tnfa->minimal_tags[i] < num_tags ?
1429				to_reorder[tnfa->minimal_tags[i]] :
1430				tnfa->minimal_tags[i];
1431
1432      DPRINT(("Reordering AST tags\n"));
1433      STACK_PUSH(stack, voidptr, tree);
1434      while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1435	{
1436	  node = tre_stack_pop_voidptr(stack);
1437
1438	  switch (node->type)
1439	    {
1440	    case LITERAL:
1441	      {
1442		tre_literal_t *lit = (tre_literal_t *)node->obj;
1443		if (IS_TAG(lit))
1444		  lit->code_max = to_reorder[lit->code_max];
1445		break;
1446	      }
1447
1448	    case UNION:
1449	      {
1450		tre_union_t *uni = (tre_union_t *)node->obj;
1451		STACK_PUSHX(stack, voidptr, uni->right);
1452		STACK_PUSHX(stack, voidptr, uni->left);
1453		break;
1454	      }
1455
1456	    case CATENATION:
1457	      {
1458		tre_catenation_t *cat = (tre_catenation_t *)node->obj;
1459		STACK_PUSHX(stack, voidptr, cat->right);
1460		STACK_PUSHX(stack, voidptr, cat->left);
1461		break;
1462	      }
1463
1464	    case ITERATION:
1465	      {
1466		tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1467		STACK_PUSHX(stack, voidptr, iter->arg);
1468		break;
1469	      }
1470
1471	    default:
1472	      assert(0);
1473	      break;
1474	    }
1475	}
1476	if (status != REG_OK)
1477	  {
1478	    DPRINT(("Error while reordering tags\n"));
1479	    goto error_post_compile;
1480	  }
1481    }
1482
1483
1484  if (!first_pass)
1485    {
1486      if (tree->last_matched_branch)
1487	{
1488	  tre_last_matched_branch_t *buf, *b, *bb;
1489	  tre_last_matched_branch_pre_t *bp;
1490	  tre_last_matched_t *u, *uu;
1491	  tre_last_matched_pre_t *up;
1492	  int *t;
1493	  int i;
1494#ifdef TRE_DEBUG
1495	  tre_last_matched_branch_t *_b;
1496	  tre_last_matched_t *_u;
1497	  int *_t;
1498
1499	  DPRINT(("last_match_branch_pre:\n"));
1500	  print_last_match_branch_pre(tree->last_matched_branch, 0, num_tags);
1501#endif /* TRE_DEBUG */
1502	  buf = (tre_last_matched_branch_t *)xcalloc(1,
1503				     tree->last_matched_branch->tot_branches
1504				     * sizeof(tre_last_matched_branch_t) +
1505				     tree->last_matched_branch->tot_last_matched
1506				     * sizeof(tre_last_matched_t) +
1507				     tree->last_matched_branch->tot_tags *
1508				     sizeof(int));
1509	  if (!buf)
1510	    {
1511	      status = REG_ESPACE;
1512	      goto error_post_compile;
1513	    }
1514
1515	  b = buf;
1516	  u = (tre_last_matched_t *)(b +
1517	      tree->last_matched_branch->tot_branches);
1518	  t = (int *)(u + tree->last_matched_branch->tot_last_matched);
1519#ifdef TRE_DEBUG
1520	  _b = b;
1521	  _u = u;
1522	  _t = t;
1523#endif /* TRE_DEBUG */
1524	  DPRINT(("Copying info_pre to info\n"));
1525	  STACK_PUSH(stack, voidptr, tree->last_matched_branch);
1526	  STACK_PUSH(stack, int, 1);
1527	  STACK_PUSH(stack, int, COPY_LAST_MATCHED_BRANCH);
1528
1529	  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1530	    {
1531	      switch (tre_stack_pop_int(stack))
1532		{
1533		case COPY_LAST_MATCHED_BRANCH:
1534		  i = tre_stack_pop_int(stack);
1535		  /* The tre_last_matched_branch_pre_t * is still on the
1536		     stack */
1537		  STACK_PUSHX(stack, voidptr, b);
1538		  STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1539		  b += i;
1540		  break;
1541
1542		case COPY_LAST_MATCHED_BRANCH_NEXT:
1543		  bb = tre_stack_pop_voidptr(stack);
1544		  bp = tre_stack_pop_voidptr(stack);
1545		  bb->n_last_matched = bp->n_last_matched;
1546		  bb->cmp_tag = bp->cmp_tag;
1547		  if (bp->n_tags > 0)
1548		    {
1549		      int n;
1550		      n = bb->n_tags = bp->n_tags;
1551		      bb->tags = t;
1552		      for (i = 0; i < num_tags; i++)
1553			if (bit_test(bp->tags, i))
1554			  {
1555			    *t++ = i;
1556			    if (--n <= 0)
1557			      break;
1558			  }
1559		    }
1560		  if (bp->next)
1561		    {
1562		      STACK_PUSHX(stack, voidptr, bp->next);
1563		      STACK_PUSHX(stack, voidptr, bb + 1);
1564		      STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1565		    }
1566		  if (bp->n_last_matched > 0)
1567		    {
1568		      bb->last_matched = u;
1569		      STACK_PUSHX(stack, voidptr, bp->last_matched);
1570		      STACK_PUSHX(stack, int, bp->n_last_matched);
1571		      STACK_PUSHX(stack, int, COPY_LAST_MATCHED);
1572		    }
1573		  break;
1574
1575		case COPY_LAST_MATCHED:
1576		  i = tre_stack_pop_int(stack);
1577		  /* The tre_last_matched_pre_t * is still on the stack */
1578		  STACK_PUSHX(stack, voidptr, u);
1579		  STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1580		  u += i;
1581		  break;
1582
1583		case COPY_LAST_MATCHED_NEXT:
1584		  uu = tre_stack_pop_voidptr(stack);
1585		  up = tre_stack_pop_voidptr(stack);
1586		  uu->n_branches = up->n_branches;
1587		  uu->branches = b;
1588		  uu->start_tag = up->start_tag;
1589		  if (up->next)
1590		    {
1591		      STACK_PUSHX(stack, voidptr, up->next);
1592		      STACK_PUSHX(stack, voidptr, uu + 1);
1593		      STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1594		    }
1595		  STACK_PUSHX(stack, voidptr, up->branches);
1596		  STACK_PUSHX(stack, int, up->n_branches);
1597		  STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH);
1598		  break;
1599		}
1600	    }
1601	  if (status != REG_OK)
1602	    goto error_post_compile;
1603#ifdef TRE_DEBUG
1604	  DPRINT(("last_matched_branch:\n"));
1605	  print_last_match_branch(buf, 0);
1606	  if (b != _b + tree->last_matched_branch->tot_branches)
1607	    DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1608		    b, _b + tree->last_matched_branch->tot_branches));
1609	  if (u != _u + tree->last_matched_branch->tot_last_matched)
1610	    DPRINT(("u/%p != _u + "
1611		    "tree->last_matched_branch->tot_last_matched/%p\n",
1612		    u, _u + tree->last_matched_branch->tot_last_matched));
1613	  if (t != _t + tree->last_matched_branch->tot_tags)
1614	    DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1615		    t, _t + tree->last_matched_branch->tot_tags));
1616#endif /* TRE_DEBUG */
1617	  tnfa->last_matched_branch = buf;
1618	}
1619#ifdef TRE_DEBUG
1620      else
1621	DPRINT(("No last_match_branch_pre\n"));
1622#endif /* TRE_DEBUG */
1623    }
1624
1625  DPRINT(("tre_add_tags: %s complete.  Number of tags %d.\n",
1626	  first_pass? "First pass" : "Second pass", num_tags));
1627#ifdef TRE_DEBUG
1628  tre_ast_print(tree);
1629#endif /* TRE_DEBUG */
1630  DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree->num_tags,
1631	  num_tags));
1632  assert(tree->num_tags == num_tags);
1633  tnfa->end_tag = num_tags;
1634  tnfa->num_tags = num_tags;
1635  tnfa->num_minimals = num_minimals;
1636error_post_compile:
1637  xfree(reorder_tags);
1638error_reorder_tags:
1639  xfree(orig_regset);
1640error_regset:
1641  return status;
1642}
1643
1644
1645
1646/*
1647  AST to TNFA compilation routines.
1648*/
1649
1650typedef enum {
1651  COPY_RECURSE,
1652  COPY_SET_RESULT_PTR
1653} tre_copyast_symbol_t;
1654
1655/* Flags for tre_copy_ast(). */
1656#define COPY_REMOVE_TAGS	 1
1657#define COPY_MAXIMIZE_FIRST_TAG	 2
1658
1659static reg_errcode_t
1660tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1661	     int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1662	     tre_ast_node_t **copy, int *max_pos)
1663{
1664  reg_errcode_t status = REG_OK;
1665  int bottom = tre_stack_num_objects(stack);
1666  int num_copied = 0;
1667  int first_tag = 1;
1668  tre_ast_node_t **result = copy;
1669  tre_copyast_symbol_t symbol;
1670
1671  STACK_PUSH(stack, voidptr, ast);
1672  STACK_PUSH(stack, int, COPY_RECURSE);
1673
1674  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1675    {
1676      tre_ast_node_t *node;
1677      if (status != REG_OK)
1678	break;
1679
1680      symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1681      switch (symbol)
1682	{
1683	case COPY_SET_RESULT_PTR:
1684	  result = tre_stack_pop_voidptr(stack);
1685	  break;
1686	case COPY_RECURSE:
1687	  node = tre_stack_pop_voidptr(stack);
1688	  switch (node->type)
1689	    {
1690	    case LITERAL:
1691	      {
1692		tre_literal_t *lit = node->obj;
1693		int pos = lit->position;
1694		int min = lit->code_min;
1695		int max = lit->code_max;
1696		tre_bracket_match_list_t *list = !IS_SPECIAL(lit) ?
1697						  lit->u.bracket_match_list :
1698						  NULL;
1699		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1700		  {
1701		    /* XXX - e.g. [ab] has only one position but two
1702		       nodes, so we are creating holes in the state space
1703		       here.  Not fatal, just wastes memory. */
1704		    pos += *pos_add;
1705		    num_copied++;
1706		  }
1707		else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1708		  {
1709		    /* Change this tag to empty. */
1710		    min = EMPTY;
1711		    max = pos = -1;
1712		  }
1713		else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1714			 && first_tag)
1715		  {
1716		    /* Maximize the first tag. */
1717		    if (tag_directions[max] == TRE_TAG_LEFT_MAXIMIZE)
1718		      tag_directions[max] = TRE_TAG_MAXIMIZE;
1719		    first_tag = 0;
1720		  }
1721		*result = tre_ast_new_literal(mem, min, max, pos);
1722		if (*result == NULL)
1723		  status = REG_ESPACE;
1724
1725		if (pos > *max_pos)
1726		  *max_pos = pos;
1727
1728		if (!IS_SPECIAL(lit))
1729		  ((tre_literal_t *)(*result)->obj)->u.bracket_match_list
1730		      = list;
1731		break;
1732	      }
1733	    case UNION:
1734	      {
1735		tre_union_t *uni = node->obj;
1736		tre_union_t *tmp;
1737		*result = tre_ast_new_union(mem, uni->left, uni->right);
1738		if (*result == NULL)
1739		  {
1740		    status = REG_ESPACE;
1741		    break;
1742		  }
1743		tmp = (*result)->obj;
1744		result = &tmp->left;
1745		STACK_PUSHX(stack, voidptr, uni->right);
1746		STACK_PUSHX(stack, int, COPY_RECURSE);
1747		STACK_PUSHX(stack, voidptr, &tmp->right);
1748		STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1749		STACK_PUSHX(stack, voidptr, uni->left);
1750		STACK_PUSHX(stack, int, COPY_RECURSE);
1751		break;
1752	      }
1753	    case CATENATION:
1754	      {
1755		tre_catenation_t *cat = node->obj;
1756		tre_catenation_t *tmp;
1757		*result = tre_ast_new_catenation(mem, cat->left, cat->right);
1758		if (*result == NULL)
1759		  {
1760		    status = REG_ESPACE;
1761		    break;
1762		  }
1763		tmp = (*result)->obj;
1764		tmp->left = NULL;
1765		tmp->right = NULL;
1766		result = &tmp->left;
1767
1768		STACK_PUSHX(stack, voidptr, cat->right);
1769		STACK_PUSHX(stack, int, COPY_RECURSE);
1770		STACK_PUSHX(stack, voidptr, &tmp->right);
1771		STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1772		STACK_PUSHX(stack, voidptr, cat->left);
1773		STACK_PUSHX(stack, int, COPY_RECURSE);
1774		break;
1775	      }
1776	    case ITERATION:
1777	      {
1778		tre_iteration_t *iter = node->obj;
1779		STACK_PUSHX(stack, voidptr, iter->arg);
1780		STACK_PUSHX(stack, int, COPY_RECURSE);
1781		*result = tre_ast_new_iter(mem, iter->arg, iter->min,
1782					   iter->max, iter->minimal);
1783		if (*result == NULL)
1784		  {
1785		    status = REG_ESPACE;
1786		    break;
1787		  }
1788		iter = (*result)->obj;
1789		result = &iter->arg;
1790		break;
1791	      }
1792	    default:
1793	      assert(0);
1794	      break;
1795	    }
1796	  break;
1797	}
1798    }
1799  *pos_add += num_copied;
1800  return status;
1801}
1802
1803typedef enum {
1804  EXPAND_RECURSE,
1805  EXPAND_AFTER_ITER
1806} tre_expand_ast_symbol_t;
1807
1808/* Expands each iteration node that has a finite nonzero minimum or maximum
1809   iteration count to a catenated sequence of copies of the node. */
1810static reg_errcode_t
1811tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1812	       int *position, tre_tag_direction_t *tag_directions,
1813	       int *max_depth)
1814{
1815  reg_errcode_t status = REG_OK;
1816  int bottom = tre_stack_num_objects(stack);
1817  int pos_add = 0;
1818  int pos_add_total = 0;
1819  int max_pos = 0;
1820#ifdef TRE_APPROX
1821  /* Current approximate matching parameters. */
1822  int params[TRE_PARAM_LAST];
1823  /* Approximate parameter nesting level. */
1824  int params_depth = 0;
1825#endif /* TRE_APPROX */
1826  int iter_depth = 0;
1827#ifdef TRE_APPROX
1828  int i;
1829#endif /* TRE_APPROX */
1830
1831#ifdef TRE_APPROX
1832  for (i = 0; i < TRE_PARAM_LAST; i++)
1833    params[i] = TRE_PARAM_DEFAULT;
1834#endif /* TRE_APPROX */
1835
1836  STACK_PUSHR(stack, voidptr, ast);
1837  STACK_PUSHR(stack, int, EXPAND_RECURSE);
1838  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1839    {
1840      tre_ast_node_t *node;
1841      tre_expand_ast_symbol_t symbol;
1842
1843      if (status != REG_OK)
1844	break;
1845
1846      DPRINT(("pos_add %d\n", pos_add));
1847
1848      symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1849      node = tre_stack_pop_voidptr(stack);
1850      switch (symbol)
1851	{
1852	case EXPAND_RECURSE:
1853	  switch (node->type)
1854	    {
1855	    case LITERAL:
1856	      {
1857		tre_literal_t *lit= node->obj;
1858		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1859		  {
1860		    lit->position += pos_add;
1861		    if (lit->position > max_pos)
1862		      max_pos = lit->position;
1863		  }
1864		break;
1865	      }
1866	    case UNION:
1867	      {
1868		tre_union_t *uni = node->obj;
1869		STACK_PUSHX(stack, voidptr, uni->right);
1870		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1871		STACK_PUSHX(stack, voidptr, uni->left);
1872		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1873		break;
1874	      }
1875	    case CATENATION:
1876	      {
1877		tre_catenation_t *cat = node->obj;
1878		STACK_PUSHX(stack, voidptr, cat->right);
1879		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1880		STACK_PUSHX(stack, voidptr, cat->left);
1881		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1882		break;
1883	      }
1884	    case ITERATION:
1885	      {
1886		tre_iteration_t *iter = node->obj;
1887		STACK_PUSHX(stack, int, pos_add);
1888		STACK_PUSHX(stack, voidptr, node);
1889		STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1890		STACK_PUSHX(stack, voidptr, iter->arg);
1891		STACK_PUSHX(stack, int, EXPAND_RECURSE);
1892		/* If we are going to expand this node at EXPAND_AFTER_ITER
1893		   then don't increase the `pos' fields of the nodes now, it
1894		   will get done when expanding. */
1895		if (iter->min > 1 || iter->max > 1)
1896		  pos_add = 0;
1897		iter_depth++;
1898		DPRINT(("iter\n"));
1899		break;
1900	      }
1901	    default:
1902	      assert(0);
1903	      break;
1904	    }
1905	  break;
1906	case EXPAND_AFTER_ITER:
1907	  {
1908	    tre_iteration_t *iter = node->obj;
1909	    int pos_add_last;
1910	    pos_add = tre_stack_pop_int(stack);
1911	    pos_add_last = pos_add;
1912	    /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1913	       immediate replace the whole iteration with EMPTY.  This
1914	       unfortunately drops any submatches, and messes up setting the
1915	       pmatch values (we can get tags of -1, and tag values in the
1916	       billions).  So we left it there and replace with EMPTY here. */
1917	    if (iter->min == 0 && iter->max == 0)
1918	      {
1919		tre_ast_node_t *empty = tre_ast_new_literal(mem, EMPTY, -1, -1);
1920		if (empty == NULL)
1921		  return REG_ESPACE;
1922		node->obj = empty->obj;
1923		node->type = empty->type;
1924	      }
1925	    else if (iter->min > 1 || iter->max > 1)
1926	      {
1927		tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1928		int j;
1929		int pos_add_save = pos_add;
1930
1931		/* Create a catenated sequence of copies of the node. */
1932		for (j = 0; j < iter->min; j++)
1933		  {
1934		    tre_ast_node_t *copy;
1935		    /* Remove tags from all but the last copy. */
1936		    int flags = ((j + 1 < iter->min)
1937				 ? COPY_REMOVE_TAGS
1938				 : COPY_MAXIMIZE_FIRST_TAG);
1939		    DPRINT(("  pos_add %d\n", pos_add));
1940		    pos_add_save = pos_add;
1941		    status = tre_copy_ast(mem, stack, iter->arg, flags,
1942					  &pos_add, tag_directions, &copy,
1943					  &max_pos);
1944		    if (status != REG_OK)
1945		      return status;
1946		    if (seq1 != NULL)
1947		      seq1 = tre_ast_new_catenation(mem, seq1, copy);
1948		    else
1949		      seq1 = copy;
1950		    if (seq1 == NULL)
1951		      return REG_ESPACE;
1952		  }
1953
1954		if (iter->max == -1)
1955		  {
1956		    /* No upper limit. */
1957		    pos_add_save = pos_add;
1958		    status = tre_copy_ast(mem, stack, iter->arg, 0,
1959					  &pos_add, NULL, &seq2, &max_pos);
1960		    if (status != REG_OK)
1961		      return status;
1962		    seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1963		    if (seq2 == NULL)
1964		      return REG_ESPACE;
1965		  }
1966		else
1967		  {
1968		    for (j = iter->min; j < iter->max; j++)
1969		      {
1970			tre_ast_node_t *tmp, *copy;
1971			pos_add_save = pos_add;
1972			status = tre_copy_ast(mem, stack, iter->arg, 0,
1973					      &pos_add, NULL, &copy, &max_pos);
1974			if (status != REG_OK)
1975			  return status;
1976			if (seq2 != NULL)
1977			  seq2 = tre_ast_new_catenation(mem, copy, seq2);
1978			else
1979			  seq2 = copy;
1980			if (seq2 == NULL)
1981			  return REG_ESPACE;
1982			tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1983			if (tmp == NULL)
1984			  return REG_ESPACE;
1985			seq2 = tre_ast_new_union(mem, tmp, seq2);
1986			if (seq2 == NULL)
1987			  return REG_ESPACE;
1988		      }
1989		  }
1990
1991		pos_add = pos_add_save;
1992		if (seq1 == NULL)
1993		  seq1 = seq2;
1994		else if (seq2 != NULL)
1995		  seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1996		if (seq1 == NULL)
1997		  return REG_ESPACE;
1998		node->obj = seq1->obj;
1999		node->type = seq1->type;
2000	      }
2001
2002	    iter_depth--;
2003	    pos_add_total += pos_add - pos_add_last;
2004	    if (iter_depth == 0)
2005	      pos_add = pos_add_total;
2006
2007#ifdef TRE_APPROX
2008	    /* If approximate parameters are specified, surround the result
2009	       with two parameter setting nodes.  The one on the left sets
2010	       the specified parameters, and the one on the right restores
2011	       the old parameters. */
2012	    if (iter->params)
2013	      {
2014		tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
2015		int *old_params;
2016
2017		tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2018		if (!tmp_l)
2019		  return REG_ESPACE;
2020		((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
2021		iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
2022		tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2023		if (!tmp_r)
2024		  return REG_ESPACE;
2025		old_params = tre_mem_alloc(mem, sizeof(*old_params)
2026					   * TRE_PARAM_LAST);
2027		if (!old_params)
2028		  return REG_ESPACE;
2029		for (i = 0; i < TRE_PARAM_LAST; i++)
2030		  old_params[i] = params[i];
2031		((tre_literal_t *)tmp_r->obj)->u.params = old_params;
2032		old_params[TRE_PARAM_DEPTH] = params_depth;
2033		/* XXX - this is the only place where ast_new_node is
2034		   needed -- should be moved inside AST module. */
2035		node_copy = tre_ast_new_node(mem, ITERATION,
2036					     sizeof(tre_iteration_t));
2037		if (!node_copy)
2038		  return REG_ESPACE;
2039		node_copy->obj = node->obj;
2040		tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
2041		if (!tmp_node)
2042		  return REG_ESPACE;
2043		tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
2044		if (!tmp_node)
2045		  return REG_ESPACE;
2046		/* Replace the contents of `node' with `tmp_node'. */
2047		memcpy(node, tmp_node, sizeof(*node));
2048		node->obj = tmp_node->obj;
2049		node->type = tmp_node->type;
2050		params_depth++;
2051		if (params_depth > *max_depth)
2052		  *max_depth = params_depth;
2053	      }
2054#endif /* TRE_APPROX */
2055	    break;
2056	  }
2057	default:
2058	  assert(0);
2059	  break;
2060	}
2061    }
2062
2063  *position += pos_add_total;
2064
2065  /* `max_pos' should never be larger than `*position' if the above
2066     code works, but just an extra safeguard let's make sure
2067     `*position' is set large enough so enough memory will be
2068     allocated for the transition table. */
2069  if (max_pos > *position)
2070    *position = max_pos;
2071
2072#ifdef TRE_DEBUG
2073  DPRINT(("Expanded AST:\n"));
2074  tre_ast_print(ast);
2075  DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
2076#endif
2077
2078  return status;
2079}
2080
2081static tre_pos_and_tags_t *
2082tre_set_empty(tre_mem_t mem)
2083{
2084  tre_pos_and_tags_t *new_set;
2085
2086  new_set = tre_mem_calloc(mem, sizeof(*new_set));
2087  if (new_set == NULL)
2088    return NULL;
2089
2090  new_set[0].position = -1;
2091  new_set[0].code_min = -1;
2092  new_set[0].code_max = -1;
2093
2094  return new_set;
2095}
2096
2097static tre_pos_and_tags_t *
2098tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2099	    tre_bracket_match_list_t *bracket_match_list, int backref)
2100{
2101  tre_pos_and_tags_t *new_set;
2102
2103  new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2104  if (new_set == NULL)
2105    return NULL;
2106
2107  new_set[0].position = position;
2108  new_set[0].code_min = code_min;
2109  new_set[0].code_max = code_max;
2110  new_set[0].bracket_match_list = bracket_match_list;
2111  new_set[0].backref = backref;
2112  new_set[1].position = -1;
2113  new_set[1].code_min = -1;
2114  new_set[1].code_max = -1;
2115
2116  return new_set;
2117}
2118
2119static tre_pos_and_tags_t *
2120tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2121	      int *tags, int assertions, int *params)
2122{
2123  int s1, s2, i, j;
2124  tre_pos_and_tags_t *new_set;
2125  int *new_tags;
2126  int num_tags;
2127
2128  for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2129  for (s1 = 0; set1[s1].position >= 0; s1++);
2130  for (s2 = 0; set2[s2].position >= 0; s2++);
2131  new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2132  if (!new_set )
2133    return NULL;
2134
2135  for (s1 = 0; set1[s1].position >= 0; s1++)
2136    {
2137      new_set[s1].position = set1[s1].position;
2138      new_set[s1].code_min = set1[s1].code_min;
2139      new_set[s1].code_max = set1[s1].code_max;
2140      new_set[s1].assertions = set1[s1].assertions | assertions;
2141      new_set[s1].bracket_match_list = set1[s1].bracket_match_list;
2142      new_set[s1].backref = set1[s1].backref;
2143      if (set1[s1].tags == NULL && tags == NULL)
2144	new_set[s1].tags = NULL;
2145      else
2146	{
2147	  for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2148	  new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2149					 * (i + num_tags + 1)));
2150	  if (new_tags == NULL)
2151	    return NULL;
2152	  for (j = 0; j < i; j++)
2153	    new_tags[j] = set1[s1].tags[j];
2154	  for (i = 0; i < num_tags; i++)
2155	    new_tags[j + i] = tags[i];
2156	  new_tags[j + i] = -1;
2157	  new_set[s1].tags = new_tags;
2158	}
2159      if (set1[s1].params)
2160	new_set[s1].params = set1[s1].params;
2161      if (params)
2162	{
2163	  if (!new_set[s1].params)
2164	    new_set[s1].params = params;
2165	  else
2166	    {
2167	      new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
2168						 TRE_PARAM_LAST);
2169	      if (!new_set[s1].params)
2170		return NULL;
2171	      for (i = 0; i < TRE_PARAM_LAST; i++)
2172		if (params[i] != TRE_PARAM_UNSET)
2173		  new_set[s1].params[i] = params[i];
2174	    }
2175	}
2176    }
2177
2178  for (s2 = 0; set2[s2].position >= 0; s2++)
2179    {
2180      new_set[s1 + s2].position = set2[s2].position;
2181      new_set[s1 + s2].code_min = set2[s2].code_min;
2182      new_set[s1 + s2].code_max = set2[s2].code_max;
2183      /* XXX - why not | assertions here as well? */
2184      new_set[s1 + s2].assertions = set2[s2].assertions;
2185      new_set[s1 + s2].bracket_match_list = set2[s2].bracket_match_list;
2186      new_set[s1 + s2].backref = set2[s2].backref;
2187      if (set2[s2].tags == NULL)
2188	new_set[s1 + s2].tags = NULL;
2189      else
2190	{
2191	  for (i = 0; set2[s2].tags[i] >= 0; i++);
2192	  new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2193	  if (new_tags == NULL)
2194	    return NULL;
2195	  for (j = 0; j < i; j++)
2196	    new_tags[j] = set2[s2].tags[j];
2197	  new_tags[j] = -1;
2198	  new_set[s1 + s2].tags = new_tags;
2199	}
2200      if (set2[s2].params)
2201	new_set[s1 + s2].params = set2[s2].params;
2202      if (params)
2203	{
2204	  if (!new_set[s1 + s2].params)
2205	    new_set[s1 + s2].params = params;
2206	  else
2207	    {
2208	      new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
2209						      TRE_PARAM_LAST);
2210	      if (!new_set[s1 + s2].params)
2211		return NULL;
2212	      for (i = 0; i < TRE_PARAM_LAST; i++)
2213		if (params[i] != TRE_PARAM_UNSET)
2214		  new_set[s1 + s2].params[i] = params[i];
2215	    }
2216	}
2217    }
2218  new_set[s1 + s2].position = -1;
2219  return new_set;
2220}
2221
2222/* Finds the empty path through `node' which is the one that should be
2223   taken according to POSIX.2 rules, and adds the tags on that path to
2224   `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
2225   set to the number of tags seen on the path. */
2226static reg_errcode_t
2227tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2228		int *assertions, int *params, int *num_tags_seen,
2229		int *params_seen)
2230{
2231  tre_literal_t *lit;
2232  tre_union_t *uni;
2233  tre_catenation_t *cat;
2234  tre_iteration_t *iter;
2235  int i;
2236  int bottom = tre_stack_num_objects(stack);
2237  reg_errcode_t status = REG_OK;
2238  if (num_tags_seen)
2239    *num_tags_seen = 0;
2240  if (params_seen)
2241    *params_seen = 0;
2242
2243  status = tre_stack_push_voidptr(stack, node);
2244
2245  /* Walk through the tree recursively. */
2246  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2247    {
2248      node = tre_stack_pop_voidptr(stack);
2249
2250      switch (node->type)
2251	{
2252	case LITERAL:
2253	  lit = (tre_literal_t *)node->obj;
2254	  switch (lit->code_min)
2255	    {
2256	    case TAG:
2257	      if (lit->code_max >= 0)
2258		{
2259		  if (tags != NULL)
2260		    {
2261		      /* Add the tag to `tags'. */
2262		      for (i = 0; tags[i] >= 0; i++)
2263			if (tags[i] == lit->code_max)
2264			  break;
2265		      if (tags[i] < 0)
2266			{
2267			  tags[i] = lit->code_max;
2268			  tags[i + 1] = -1;
2269			}
2270		    }
2271		  if (num_tags_seen)
2272		    (*num_tags_seen)++;
2273		}
2274	      break;
2275	    case ASSERTION:
2276	      assert(lit->code_max >= 1
2277		     || lit->code_max <= ASSERT_LAST);
2278	      if (assertions != NULL)
2279		*assertions |= lit->code_max;
2280	      break;
2281	    case PARAMETER:
2282	      if (params != NULL)
2283		for (i = 0; i < TRE_PARAM_LAST; i++)
2284		  params[i] = lit->u.params[i];
2285	      if (params_seen != NULL)
2286		*params_seen = 1;
2287	      break;
2288	    case EMPTY:
2289	      break;
2290	    default:
2291	      assert(0);
2292	      break;
2293	    }
2294	  break;
2295
2296	case UNION:
2297	  /* Subexpressions starting earlier take priority over ones
2298	     starting later, so we prefer the left subexpression over the
2299	     right subexpression. */
2300	  uni = (tre_union_t *)node->obj;
2301	  if (uni->left->nullable)
2302	    STACK_PUSHX(stack, voidptr, uni->left)
2303	  else if (uni->right->nullable)
2304	    STACK_PUSHX(stack, voidptr, uni->right)
2305	  else
2306	    assert(0);
2307	  break;
2308
2309	case CATENATION:
2310	  /* The path must go through both children. */
2311	  cat = (tre_catenation_t *)node->obj;
2312	  assert(cat->left->nullable);
2313	  assert(cat->right->nullable);
2314	  STACK_PUSHX(stack, voidptr, cat->left);
2315	  STACK_PUSHX(stack, voidptr, cat->right);
2316	  break;
2317
2318	case ITERATION:
2319	  /* A match with an empty string is preferred over no match at
2320	     all, so we go through the argument if possible. */
2321	  iter = (tre_iteration_t *)node->obj;
2322	  if (iter->arg->nullable)
2323	    STACK_PUSHX(stack, voidptr, iter->arg);
2324	  break;
2325
2326	default:
2327	  assert(0);
2328	  break;
2329	}
2330    }
2331
2332  return status;
2333}
2334
2335
2336typedef enum {
2337  NFL_RECURSE,
2338  NFL_POST_UNION,
2339  NFL_POST_CATENATION,
2340  NFL_POST_ITERATION
2341} tre_nfl_stack_symbol_t;
2342
2343
2344/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2345   the nodes of the AST `tree'. */
2346static reg_errcode_t
2347tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2348{
2349  int bottom = tre_stack_num_objects(stack);
2350
2351  STACK_PUSHR(stack, voidptr, tree);
2352  STACK_PUSHR(stack, int, NFL_RECURSE);
2353
2354  while (tre_stack_num_objects(stack) > bottom)
2355    {
2356      tre_nfl_stack_symbol_t symbol;
2357      tre_ast_node_t *node;
2358
2359      symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2360      node = tre_stack_pop_voidptr(stack);
2361      switch (symbol)
2362	{
2363	case NFL_RECURSE:
2364	  switch (node->type)
2365	    {
2366	    case LITERAL:
2367	      {
2368		tre_literal_t *lit = (tre_literal_t *)node->obj;
2369		if (IS_BACKREF(lit))
2370		  {
2371		    /* Back references: nullable = false, firstpos = {i},
2372		       lastpos = {i}. */
2373		    node->nullable = 0;
2374		    node->firstpos = tre_set_one(mem, lit->position, 0,
2375					     TRE_CHAR_MAX, NULL, -1);
2376		    if (!node->firstpos)
2377		      return REG_ESPACE;
2378		    node->lastpos = tre_set_one(mem, lit->position, 0,
2379						TRE_CHAR_MAX, NULL,
2380						(int)lit->code_max);
2381		    if (!node->lastpos)
2382		      return REG_ESPACE;
2383		  }
2384		else if (lit->code_min < 0)
2385		  {
2386		    /* Tags, empty strings, params, and zero width assertions:
2387		       nullable = true, firstpos = {}, and lastpos = {}. */
2388		    node->nullable = 1;
2389		    node->firstpos = tre_set_empty(mem);
2390		    if (!node->firstpos)
2391		      return REG_ESPACE;
2392		    node->lastpos = tre_set_empty(mem);
2393		    if (!node->lastpos)
2394		      return REG_ESPACE;
2395		  }
2396		else
2397		  {
2398		    /* Literal at position i: nullable = false, firstpos = {i},
2399		       lastpos = {i}. */
2400		    node->nullable = 0;
2401		    node->firstpos =
2402		      tre_set_one(mem, lit->position, (int)lit->code_min,
2403				  (int)lit->code_max, NULL, -1);
2404		    if (!node->firstpos)
2405		      return REG_ESPACE;
2406		    node->lastpos = tre_set_one(mem, lit->position,
2407						(int)lit->code_min,
2408						(int)lit->code_max,
2409						lit->u.bracket_match_list,
2410						-1);
2411		    if (!node->lastpos)
2412		      return REG_ESPACE;
2413		  }
2414		break;
2415	      }
2416
2417	    case UNION:
2418	      /* Compute the attributes for the two subtrees, and after that
2419		 for this node. */
2420	      STACK_PUSHR(stack, voidptr, node);
2421	      STACK_PUSHR(stack, int, NFL_POST_UNION);
2422	      STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2423	      STACK_PUSHR(stack, int, NFL_RECURSE);
2424	      STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2425	      STACK_PUSHR(stack, int, NFL_RECURSE);
2426	      break;
2427
2428	    case CATENATION:
2429	      /* Compute the attributes for the two subtrees, and after that
2430		 for this node. */
2431	      STACK_PUSHR(stack, voidptr, node);
2432	      STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2433	      STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2434	      STACK_PUSHR(stack, int, NFL_RECURSE);
2435	      STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2436	      STACK_PUSHR(stack, int, NFL_RECURSE);
2437	      break;
2438
2439	    case ITERATION:
2440	      /* Compute the attributes for the subtree, and after that for
2441		 this node. */
2442	      STACK_PUSHR(stack, voidptr, node);
2443	      STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2444	      STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2445	      STACK_PUSHR(stack, int, NFL_RECURSE);
2446	      break;
2447	    }
2448	  break; /* end case: NFL_RECURSE */
2449
2450	case NFL_POST_UNION:
2451	  {
2452	    tre_union_t *uni = (tre_union_t *)node->obj;
2453	    node->nullable = uni->left->nullable || uni->right->nullable;
2454	    node->firstpos = tre_set_union(mem, uni->left->firstpos,
2455					   uni->right->firstpos, NULL, 0, NULL);
2456	    if (!node->firstpos)
2457	      return REG_ESPACE;
2458	    node->lastpos = tre_set_union(mem, uni->left->lastpos,
2459					  uni->right->lastpos, NULL, 0, NULL);
2460	    if (!node->lastpos)
2461	      return REG_ESPACE;
2462	    break;
2463	  }
2464
2465	case NFL_POST_ITERATION:
2466	  {
2467	    int num_tags, *tags, assertions, params_seen;
2468	    int *params;
2469	    reg_errcode_t status;
2470	    tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2471
2472	    /* From Ville Laurikari's original 2001 Master's thesis, the
2473	       firstpos(n) and lastpos(n) of an iteration is just the
2474	       corresponding values of the iteration's argument.  Unfortunately,
2475	       this isn't sufficient for the following BRE:
2476
2477	           \(a*\)*b\(\1\)    matched against    ab
2478
2479	       The backreference wants to force the first subexpression to
2480	       be the empty string, but there is no transition for this.  So
2481	       we need to modify the lastpos(n) of an iteration to be the
2482	       equivalent of that of catentation.  Using the same notation as
2483	       in the thesis, lastpos(n) is redefined as:
2484
2485	           if nullable(c1) then
2486		       lastpos(c1) U
2487		       addtags(lastpos(c1),
2488		               emptymatch(c1))
2489		   else
2490		       lastpos(c1)
2491
2492	       where c1 is the argument node.  firstpos(n) remains the same. */
2493
2494	    /* Compute lastpos. */
2495	    if (iter->min == 0 || iter->arg->nullable)
2496	      {
2497		node->nullable = 1;
2498		if (iter->arg->nullable)
2499		  {
2500		    /* The arg matches the empty string.  Make a first pass
2501		       with tre_match_empty() to get the number of tags and
2502		       parameters. */
2503		    status = tre_match_empty(stack, iter->arg,
2504					     NULL, NULL, NULL, &num_tags,
2505					     &params_seen);
2506		    if (status != REG_OK)
2507		      return status;
2508		    /* Allocate arrays for the tags and parameters. */
2509		    tags = xmalloc(sizeof(int) * (num_tags + 1));
2510		    if (!tags)
2511		      return REG_ESPACE;
2512		    tags[0] = -1;
2513		    assertions = 0;
2514		    params = NULL;
2515		    if (params_seen)
2516		      {
2517			params = tre_mem_alloc(mem, sizeof(*params)
2518					       * TRE_PARAM_LAST);
2519			if (!params)
2520			  {
2521			    xfree(tags);
2522			    return REG_ESPACE;
2523			  }
2524		      }
2525		    /* Second pass with tre_mach_empty() to get the list of
2526		       tags and parameters. */
2527		    status = tre_match_empty(stack, iter->arg, tags,
2528					     &assertions, params, NULL, NULL);
2529		    if (status != REG_OK)
2530		      {
2531			xfree(tags);
2532			return status;
2533		      }
2534		    node->lastpos =
2535		      tre_set_union(mem, iter->arg->lastpos, iter->arg->lastpos,
2536				    tags, assertions, params);
2537		    xfree(tags);
2538		    if (!node->lastpos)
2539		      return REG_ESPACE;
2540		  }
2541		else
2542		  node->lastpos = iter->arg->lastpos;
2543	      }
2544	    else
2545	      {
2546		node->nullable = 0;
2547		node->lastpos = iter->arg->lastpos;
2548	      }
2549	    node->firstpos = iter->arg->firstpos;
2550	    break;
2551	  }
2552
2553	case NFL_POST_CATENATION:
2554	  {
2555	    int num_tags, *tags, assertions, params_seen;
2556	    int *params;
2557	    reg_errcode_t status;
2558	    tre_catenation_t *cat = node->obj;
2559	    node->nullable = cat->left->nullable && cat->right->nullable;
2560
2561	    /* Compute firstpos. */
2562	    if (cat->left->nullable)
2563	      {
2564		/* The left side matches the empty string.  Make a first pass
2565		   with tre_match_empty() to get the number of tags and
2566		   parameters. */
2567		status = tre_match_empty(stack, cat->left,
2568					 NULL, NULL, NULL, &num_tags,
2569					 &params_seen);
2570		if (status != REG_OK)
2571		  return status;
2572		/* Allocate arrays for the tags and parameters. */
2573		tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2574		if (!tags)
2575		  return REG_ESPACE;
2576		tags[0] = -1;
2577		assertions = 0;
2578		params = NULL;
2579		if (params_seen)
2580		  {
2581		    params = tre_mem_alloc(mem, sizeof(*params)
2582					   * TRE_PARAM_LAST);
2583		    if (!params)
2584		      {
2585			xfree(tags);
2586			return REG_ESPACE;
2587		      }
2588		  }
2589		/* Second pass with tre_mach_empty() to get the list of
2590		   tags and parameters. */
2591		status = tre_match_empty(stack, cat->left, tags,
2592					 &assertions, params, NULL, NULL);
2593		if (status != REG_OK)
2594		  {
2595		    xfree(tags);
2596		    return status;
2597		  }
2598		node->firstpos =
2599		  tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2600				tags, assertions, params);
2601		xfree(tags);
2602		if (!node->firstpos)
2603		  return REG_ESPACE;
2604	      }
2605	    else
2606	      {
2607		node->firstpos = cat->left->firstpos;
2608	      }
2609
2610	    /* Compute lastpos. */
2611	    if (cat->right->nullable)
2612	      {
2613		/* The right side matches the empty string.  Make a first pass
2614		   with tre_match_empty() to get the number of tags and
2615		   parameters. */
2616		status = tre_match_empty(stack, cat->right,
2617					 NULL, NULL, NULL, &num_tags,
2618					 &params_seen);
2619		if (status != REG_OK)
2620		  return status;
2621		/* Allocate arrays for the tags and parameters. */
2622		tags = xmalloc(sizeof(int) * (num_tags + 1));
2623		if (!tags)
2624		  return REG_ESPACE;
2625		tags[0] = -1;
2626		assertions = 0;
2627		params = NULL;
2628		if (params_seen)
2629		  {
2630		    params = tre_mem_alloc(mem, sizeof(*params)
2631					   * TRE_PARAM_LAST);
2632		    if (!params)
2633		      {
2634			xfree(tags);
2635			return REG_ESPACE;
2636		      }
2637		  }
2638		/* Second pass with tre_mach_empty() to get the list of
2639		   tags and parameters. */
2640		status = tre_match_empty(stack, cat->right, tags,
2641					 &assertions, params, NULL, NULL);
2642		if (status != REG_OK)
2643		  {
2644		    xfree(tags);
2645		    return status;
2646		  }
2647		node->lastpos =
2648		  tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2649				tags, assertions, params);
2650		xfree(tags);
2651		if (!node->lastpos)
2652		  return REG_ESPACE;
2653	      }
2654	    else
2655	      {
2656		node->lastpos = cat->right->lastpos;
2657	      }
2658	    break;
2659	  }
2660
2661	default:
2662	  assert(0);
2663	  break;
2664	}
2665    }
2666
2667  return REG_OK;
2668}
2669
2670
2671/* Adds a transition from each position in `p1' to each position in `p2'. */
2672static reg_errcode_t
2673tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2674	       tre_tnfa_transition_t *transitions,
2675	       int *counts, int *offs)
2676{
2677  tre_pos_and_tags_t *orig_p2 = p2;
2678  tre_tnfa_transition_t *trans;
2679  int i, j, k, l, dup, prev_p2_pos;
2680
2681  if (transitions != NULL)
2682    while (p1->position >= 0)
2683      {
2684	p2 = orig_p2;
2685	prev_p2_pos = -1;
2686	while (p2->position >= 0)
2687	  {
2688	    /* Optimization: if this position was already handled, skip it. */
2689	    if (p2->position == prev_p2_pos)
2690	      {
2691		p2++;
2692		continue;
2693	      }
2694	    prev_p2_pos = p2->position;
2695	    /* Set `trans' to point to the next unused transition from
2696	       position `p1->position'. */
2697	    trans = transitions + offs[p1->position];
2698	    while (trans->state != NULL)
2699	      {
2700#if 0
2701		/* If we find a previous transition from `p1->position' to
2702		   `p2->position', it is overwritten.  This can happen only
2703		   if there are nested loops in the regexp, like in "((a)*)*".
2704		   In POSIX.2 repetition using the outer loop is always
2705		   preferred over using the inner loop.	 Therefore the
2706		   transition for the inner loop is useless and can be thrown
2707		   away. */
2708		/* XXX - The same position is used for all nodes in a bracket
2709		   expression, so this optimization cannot be used (it will
2710		   break bracket expressions) unless I figure out a way to
2711		   detect it here. */
2712		if (trans->state_id == p2->position)
2713		  {
2714		    DPRINT(("*"));
2715		    break;
2716		  }
2717#endif
2718		trans++;
2719	      }
2720
2721	    if (trans->state == NULL)
2722	      (trans + 1)->state = NULL;
2723	    /* Use the character ranges, assertions, etc. from `p1' for
2724	       the transition from `p1' to `p2'. */
2725	    trans->code_min = p1->code_min;
2726	    trans->code_max = p1->code_max;
2727	    trans->state = transitions + offs[p2->position];
2728	    trans->state_id = p2->position;
2729	    trans->assertions = p1->assertions | p2->assertions
2730	      | (p1->bracket_match_list != NULL ? ASSERT_BRACKET_MATCH : 0);
2731	    if (p1->backref >= 0)
2732	      {
2733		assert((trans->assertions & ASSERT_BRACKET_MATCH) == 0);
2734		assert(p2->backref < 0);
2735		trans->u.backref = p1->backref;
2736		trans->assertions |= ASSERT_BACKREF;
2737	      }
2738	    if (p1->bracket_match_list != NULL)
2739	      {
2740		trans->u.bracket_match_list =
2741		  xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2742		if (trans->u.bracket_match_list == NULL)
2743		  return REG_ESPACE;
2744		memcpy(trans->u.bracket_match_list, p1->bracket_match_list,
2745		       SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2746	      }
2747
2748	    /* Find out how many tags this transition has. */
2749	    i = 0;
2750	    if (p1->tags != NULL)
2751	      while(p1->tags[i] >= 0)
2752		i++;
2753	    j = 0;
2754	    if (p2->tags != NULL)
2755	      while(p2->tags[j] >= 0)
2756		j++;
2757
2758	    /* If we are overwriting a transition, free the old tag array. */
2759	    if (trans->tags != NULL)
2760	      xfree(trans->tags);
2761	    trans->tags = NULL;
2762
2763	    /* If there were any tags, allocate an array and fill it. */
2764	    if (i + j > 0)
2765	      {
2766		trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2767		if (!trans->tags)
2768		  return REG_ESPACE;
2769		i = 0;
2770		if (p1->tags != NULL)
2771		  while(p1->tags[i] >= 0)
2772		    {
2773		      trans->tags[i] = p1->tags[i];
2774		      i++;
2775		    }
2776		l = i;
2777		j = 0;
2778		if (p2->tags != NULL)
2779		  while (p2->tags[j] >= 0)
2780		    {
2781		      /* Don't add duplicates. */
2782		      dup = 0;
2783		      for (k = 0; k < i; k++)
2784			if (trans->tags[k] == p2->tags[j])
2785			  {
2786			    dup = 1;
2787			    break;
2788			  }
2789		      if (!dup)
2790			trans->tags[l++] = p2->tags[j];
2791		      j++;
2792		    }
2793		trans->tags[l] = -1;
2794	      }
2795
2796	    /* Set the parameter array.	 If both `p2' and `p1' have same
2797	       parameters, the values in `p2' override those in `p1'. */
2798	    if (p1->params || p2->params)
2799	      {
2800		if (!trans->params)
2801		  trans->params = xmalloc(sizeof(*trans->params)
2802					  * TRE_PARAM_LAST);
2803		if (!trans->params)
2804		  return REG_ESPACE;
2805		for (i = 0; i < TRE_PARAM_LAST; i++)
2806		  {
2807		    trans->params[i] = TRE_PARAM_UNSET;
2808		    if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
2809		      trans->params[i] = p1->params[i];
2810		    if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
2811		      trans->params[i] = p2->params[i];
2812		  }
2813	      }
2814	    else
2815	      {
2816		if (trans->params)
2817		  xfree(trans->params);
2818		trans->params = NULL;
2819	      }
2820
2821
2822#ifdef TRE_DEBUG
2823	    {
2824	      int *tags;
2825
2826	      DPRINT(("	 %2d -> %2d on %3d", p1->position, p2->position,
2827		      p1->code_min));
2828	      if (p1->code_max != p1->code_min)
2829		DPRINT(("-%3d", p1->code_max));
2830	      tags = trans->tags;
2831	      if (tags)
2832		{
2833		  DPRINT((", tags ["));
2834		  while (*tags >= 0)
2835		    {
2836		      DPRINT(("%d", *tags));
2837		      tags++;
2838		      if (*tags >= 0)
2839			DPRINT((","));
2840		    }
2841		  DPRINT(("]"));
2842		}
2843	      if (trans->assertions)
2844		DPRINT((", assert %d", trans->assertions));
2845	      if (trans->assertions & ASSERT_BACKREF)
2846		DPRINT((", backref %d", trans->u.backref));
2847	      else if (trans->assertions & ASSERT_BRACKET_MATCH)
2848		DPRINT((", bracket_match_list %p",
2849			trans->u.bracket_match_list));
2850	      if (trans->params)
2851		{
2852		  DPRINT((", "));
2853		  tre_print_params(trans->params);
2854		}
2855	      DPRINT(("\n"));
2856	    }
2857#endif /* TRE_DEBUG */
2858	    p2++;
2859	  }
2860	p1++;
2861      }
2862  else
2863    /* Compute a maximum limit for the number of transitions leaving
2864       from each state. */
2865    while (p1->position >= 0)
2866      {
2867	p2 = orig_p2;
2868	while (p2->position >= 0)
2869	  {
2870	    counts[p1->position]++;
2871	    p2++;
2872	  }
2873	p1++;
2874      }
2875  return REG_OK;
2876}
2877
2878/* Converts the syntax tree to a TNFA.	All the transitions in the TNFA are
2879   labelled with one character range (there are no transitions on empty
2880   strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
2881   the regexp. */
2882static reg_errcode_t
2883tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2884		int *counts, int *offs)
2885{
2886  tre_union_t *uni;
2887  tre_catenation_t *cat;
2888  tre_iteration_t *iter;
2889  reg_errcode_t errcode = REG_OK;
2890
2891  /* XXX - recurse using a stack!. */
2892  switch (node->type)
2893    {
2894    case LITERAL:
2895      break;
2896    case UNION:
2897      uni = (tre_union_t *)node->obj;
2898      errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2899      if (errcode != REG_OK)
2900	return errcode;
2901      errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2902      break;
2903
2904    case CATENATION:
2905      cat = (tre_catenation_t *)node->obj;
2906      /* Add a transition from each position in cat->left->lastpos
2907	 to each position in cat->right->firstpos. */
2908      errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2909			       transitions, counts, offs);
2910      if (errcode != REG_OK)
2911	return errcode;
2912      errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2913      if (errcode != REG_OK)
2914	return errcode;
2915      errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2916      break;
2917
2918    case ITERATION:
2919      iter = (tre_iteration_t *)node->obj;
2920      assert(iter->max == -1 || iter->max == 1);
2921
2922      if (iter->max == -1)
2923	{
2924	  assert(iter->min == 0 || iter->min == 1);
2925	  /* Add a transition from each last position in the iterated
2926	     expression to each first position. */
2927	  errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2928				   transitions, counts, offs);
2929	  if (errcode != REG_OK)
2930	    return errcode;
2931	}
2932      errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2933      break;
2934    }
2935  return errcode;
2936}
2937
2938
2939#define ERROR_EXIT(err)		  \
2940  do				  \
2941    {				  \
2942      errcode = err;		  \
2943      if (/*CONSTCOND*/1)	  \
2944      	goto error_exit;	  \
2945    }				  \
2946 while (/*CONSTCOND*/0)
2947
2948
2949int
2950tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
2951	    locale_t loc)
2952{
2953  tre_stack_t *stack;
2954  tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2955  tre_pos_and_tags_t *p;
2956  int *counts = NULL, *offs = NULL;
2957  int i, add = 0;
2958  tre_tnfa_transition_t *transitions, *initial;
2959  tre_tnfa_t *tnfa = NULL;
2960  tre_submatch_data_t *submatch_data = NULL;
2961  tre_tag_direction_t *tag_directions = NULL;
2962  reg_errcode_t errcode;
2963  tre_mem_t mem;
2964
2965  /* Parse context. */
2966  tre_parse_ctx_t parse_ctx;
2967
2968  /* Allocate a stack used throughout the compilation process for various
2969     purposes. */
2970  stack = tre_stack_new(512, 10240, 128);
2971  if (!stack)
2972    return REG_ESPACE;
2973  /* Allocate a fast memory allocator. */
2974  mem = tre_mem_new();
2975  if (!mem)
2976    {
2977      tre_stack_destroy(stack);
2978      return REG_ESPACE;
2979    }
2980
2981  /* Parse the regexp. */
2982  memset(&parse_ctx, 0, sizeof(parse_ctx));
2983  parse_ctx.mem = mem;
2984  parse_ctx.stack = stack;
2985  parse_ctx.re = regex;
2986  parse_ctx.len = n;
2987  /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2988     are also set */
2989  if ((cflags & (REG_ENHANCED | REG_EXTENDED)) != (REG_ENHANCED | REG_EXTENDED))
2990    cflags &= ~REG_UNGREEDY;
2991  parse_ctx.cflags = cflags;
2992  parse_ctx.max_backref = -1;
2993  parse_ctx.loc = loc;
2994  parse_ctx.submatch_id_invisible = SUBMATCH_ID_INVISIBLE_START;
2995
2996  DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
2997  errcode = tre_parse(&parse_ctx);
2998  if (errcode != REG_OK)
2999    ERROR_EXIT(errcode);
3000  preg->re_nsub = parse_ctx.submatch_id - 1;
3001  tree = parse_ctx.result;
3002
3003  /* Back references and approximate matching cannot currently be used
3004     in the same regexp. */
3005  if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
3006    ERROR_EXIT(REG_BADPAT);
3007
3008#ifdef TRE_DEBUG
3009  tre_ast_print(tree);
3010#endif /* TRE_DEBUG */
3011
3012  /* Referring to nonexistent subexpressions is illegal. */
3013  if (parse_ctx.max_backref > (int)preg->re_nsub)
3014    ERROR_EXIT(REG_ESUBREG);
3015
3016  /* Allocate the TNFA struct. */
3017  tnfa = xcalloc(1, sizeof(tre_tnfa_t));
3018  if (tnfa == NULL)
3019    ERROR_EXIT(REG_ESPACE);
3020  tnfa->have_backrefs = parse_ctx.max_backref >= 0;
3021  tnfa->have_approx = parse_ctx.have_approx;
3022  tnfa->num_submatches = parse_ctx.submatch_id;
3023  tnfa->num_submatches_invisible = parse_ctx.submatch_id_invisible
3024				   - SUBMATCH_ID_INVISIBLE_START;
3025  tnfa->num_reorder_tags = parse_ctx.num_reorder_tags;
3026  tnfa->loc = parse_ctx.loc;
3027
3028  /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
3029     regexp does not have back references, this can be skipped. */
3030  if (tnfa->num_reorder_tags > 0 || !(cflags & REG_NOSUB))
3031    {
3032      DPRINT(("tre_compile: setting up tags\n"));
3033
3034      /* Figure out how many tags we will need. */
3035      errcode = tre_add_tags(NULL, stack, tree, tnfa);
3036      if (errcode != REG_OK)
3037	ERROR_EXIT(errcode);
3038#ifdef TRE_DEBUG
3039      tre_ast_print(tree);
3040#endif /* TRE_DEBUG */
3041
3042      if (tnfa->num_tags > 0)
3043	{
3044	  tag_directions = xmalloc(sizeof(*tag_directions)
3045				   * (tnfa->num_tags + 1));
3046	  if (tag_directions == NULL)
3047	    ERROR_EXIT(REG_ESPACE);
3048	  tnfa->tag_directions = tag_directions;
3049	  memset(tag_directions, -1,
3050		 sizeof(*tag_directions) * (tnfa->num_tags + 1));
3051	}
3052      tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 3,
3053				   sizeof(tnfa->minimal_tags));
3054      if (tnfa->minimal_tags == NULL)
3055	ERROR_EXIT(REG_ESPACE);
3056
3057      submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
3058			      sizeof(*submatch_data));
3059      if (submatch_data == NULL)
3060	ERROR_EXIT(REG_ESPACE);
3061      /* Set the eo_tag value to -1 to indicate that that corresponding
3062       * submatch has not be completed yet */
3063      for (i = 0; i < parse_ctx.submatch_id; i++)
3064	{
3065	  submatch_data[i].eo_tag = -1;
3066	}
3067      tnfa->submatch_data = submatch_data;
3068
3069      errcode = tre_add_tags(mem, stack, tree, tnfa);
3070      if (errcode != REG_OK)
3071	ERROR_EXIT(errcode);
3072
3073#ifdef TRE_DEBUG
3074      for (i = 0; i < parse_ctx.submatch_id; i++)
3075	DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3076		i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3077      for (i = 0; i <= tnfa->num_tags; i++)
3078	DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3079#endif /* TRE_DEBUG */
3080    }
3081
3082  /* Expand iteration nodes. */
3083  errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
3084			   tag_directions, &tnfa->params_depth);
3085  if (errcode != REG_OK)
3086    ERROR_EXIT(errcode);
3087
3088  /* Add a dummy node for the final state.
3089     XXX - For certain patterns this dummy node can be optimized away,
3090	   for example "a*" or "ab*".	Figure out a simple way to detect
3091	   this possibility. */
3092  tmp_ast_l = tree;
3093  tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
3094  if (tmp_ast_r == NULL)
3095    ERROR_EXIT(REG_ESPACE);
3096
3097  tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
3098  if (tree == NULL)
3099    ERROR_EXIT(REG_ESPACE);
3100
3101#ifdef TRE_DEBUG
3102  tre_ast_print(tree);
3103  DPRINT(("Number of states: %d\n", parse_ctx.position));
3104  if (submatch_data)
3105    for (i = 0; i < parse_ctx.submatch_id; i++)
3106      DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3107	      i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3108  if (tag_directions)
3109    for (i = 0; i <= tnfa->num_tags; i++)
3110      DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3111#endif /* TRE_DEBUG */
3112
3113  errcode = tre_compute_nfl(mem, stack, tree);
3114  if (errcode != REG_OK)
3115    ERROR_EXIT(errcode);
3116
3117  counts = xmalloc(sizeof(int) * parse_ctx.position);
3118  if (counts == NULL)
3119    ERROR_EXIT(REG_ESPACE);
3120
3121  offs = xmalloc(sizeof(int) * parse_ctx.position);
3122  if (offs == NULL)
3123    ERROR_EXIT(REG_ESPACE);
3124
3125  for (i = 0; i < parse_ctx.position; i++)
3126    counts[i] = 0;
3127  tre_ast_to_tnfa(tree, NULL, counts, NULL);
3128
3129  add = 0;
3130  for (i = 0; i < parse_ctx.position; i++)
3131    {
3132      offs[i] = add;
3133      add += counts[i] + 1;
3134      counts[i] = 0;
3135    }
3136  transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
3137  if (transitions == NULL)
3138    ERROR_EXIT(REG_ESPACE);
3139  tnfa->transitions = transitions;
3140  tnfa->num_transitions = add;
3141
3142  DPRINT(("Converting to TNFA:\n"));
3143  errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
3144  if (errcode != REG_OK)
3145    ERROR_EXIT(errcode);
3146
3147#ifdef USE_FIRSTPOS_CHARS /* not defined */
3148  /* If in eight bit mode, compute a table of characters that can be the
3149     first character of a match. */
3150  tnfa->first_char = -1;
3151  if (TRE_MB_CUR_MAX_L(tnfa->loc) == 1 && !tmp_ast_l->nullable)
3152    {
3153      int count = 0;
3154      tre_cint_t k;
3155      DPRINT(("Characters that can start a match:"));
3156      tnfa->firstpos_chars = xcalloc(256, sizeof(char));
3157      if (tnfa->firstpos_chars == NULL)
3158	ERROR_EXIT(REG_ESPACE);
3159      for (p = tree->firstpos; p->position >= 0; p++)
3160	{
3161	  tre_tnfa_transition_t *j = transitions + offs[p->position];
3162	  while (j->state != NULL)
3163	    {
3164	      for (k = j->code_min; k <= j->code_max && k < 256; k++)
3165		{
3166		  DPRINT((" %d", k));
3167		  tnfa->firstpos_chars[k] = 1;
3168		  count++;
3169		}
3170	      j++;
3171	    }
3172	}
3173      DPRINT(("\n"));
3174#define TRE_OPTIMIZE_FIRST_CHAR 1
3175#if TRE_OPTIMIZE_FIRST_CHAR
3176      if (count == 1)
3177	{
3178	  for (k = 0; k < 256; k++)
3179	    if (tnfa->firstpos_chars[k])
3180	      {
3181		DPRINT(("first char must be %d\n", k));
3182		tnfa->first_char = k;
3183		xfree(tnfa->firstpos_chars);
3184		tnfa->firstpos_chars = NULL;
3185		break;
3186	      }
3187	}
3188#endif
3189
3190    }
3191  else
3192    tnfa->firstpos_chars = NULL;
3193#else /* !USE_FIRSTPOS_CHARS */
3194
3195  /* Set first_char only if there is only one character that can be the
3196     first character of a match */
3197  tnfa->first_char = -1;
3198  if (!tmp_ast_l->nullable)
3199    {
3200      int scanning = 1;
3201      for (p = tree->firstpos; scanning && p->position >= 0; p++)
3202	{
3203	  tre_tnfa_transition_t *j = transitions + offs[p->position];
3204	  while (j->state != NULL)
3205	    {
3206	      if (j->code_min <= j->code_max)
3207		{
3208		  if (j->code_max != j->code_min || j->code_min == -1 || tnfa->first_char != -1)
3209		    {
3210		      tnfa->first_char = -1;
3211		      scanning = 0;
3212		      break;
3213		    }
3214		  tnfa->first_char = j->code_min;
3215		}
3216	      j++;
3217	    }
3218	}
3219#ifdef TRE_DEBUG
3220      if (tnfa->first_char >= 0)
3221	DPRINT(("first char must be %d\n", tnfa->first_char));
3222#endif /* TRE_DEBUG */
3223    }
3224#endif /* !USE_FIRSTPOS_CHARS */
3225
3226  p = tree->firstpos;
3227  i = 0;
3228  while (p->position >= 0)
3229    {
3230      i++;
3231
3232#ifdef TRE_DEBUG
3233      {
3234	int *tags;
3235	DPRINT(("initial: %d", p->position));
3236	tags = p->tags;
3237	if (tags != NULL)
3238	  {
3239	    if (*tags >= 0)
3240	      DPRINT(("/"));
3241	    while (*tags >= 0)
3242	      {
3243		DPRINT(("%d", *tags));
3244		tags++;
3245		if (*tags >= 0)
3246		  DPRINT((","));
3247	      }
3248	  }
3249	DPRINT((", assert %d", p->assertions));
3250	if (p->params)
3251	  {
3252	    DPRINT((", "));
3253	    tre_print_params(p->params);
3254	  }
3255	DPRINT(("\n"));
3256      }
3257#endif /* TRE_DEBUG */
3258
3259      p++;
3260    }
3261
3262  initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
3263  if (initial == NULL)
3264    ERROR_EXIT(REG_ESPACE);
3265  tnfa->initial = initial;
3266
3267  i = 0;
3268  for (p = tree->firstpos; p->position >= 0; p++)
3269    {
3270      initial[i].state = transitions + offs[p->position];
3271      initial[i].state_id = p->position;
3272      initial[i].tags = NULL;
3273      /* Copy the arrays p->tags, and p->params, they are allocated
3274	 from a tre_mem object. */
3275      if (p->tags)
3276	{
3277	  int j;
3278	  for (j = 0; p->tags[j] >= 0; j++);
3279	  initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
3280	  if (!initial[i].tags)
3281	    ERROR_EXIT(REG_ESPACE);
3282	  memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
3283	}
3284      initial[i].params = NULL;
3285      if (p->params)
3286	{
3287	  initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
3288	  if (!initial[i].params)
3289	    ERROR_EXIT(REG_ESPACE);
3290	  memcpy(initial[i].params, p->params,
3291		 sizeof(*p->params) * TRE_PARAM_LAST);
3292	}
3293      initial[i].assertions = p->assertions;
3294      i++;
3295    }
3296  initial[i].state = NULL;
3297
3298  tnfa->num_transitions = add;
3299  tnfa->final = transitions + offs[tree->lastpos[0].position];
3300  tnfa->num_states = parse_ctx.position;
3301  tnfa->cflags = cflags;
3302
3303  DPRINT(("final state %d (%p)\n", tree->lastpos[0].position,
3304	  (void *)tnfa->final));
3305
3306  tre_mem_destroy(mem);
3307  tre_stack_destroy(stack);
3308  xfree(counts);
3309  xfree(offs);
3310
3311#ifdef TRE_USE_SYSTEM_REGEX_H
3312  preg->re_magic = RE_MAGIC;
3313#endif /* TRE_USE_SYSTEM_REGEX_H */
3314  preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3315#ifdef __LIBC__
3316  /* In Libc, we need to retain the locale.  Outside Libc, we already called
3317     duplocale() which does the retaining. */
3318  XL_RETAIN(tnfa->loc);
3319#endif /* __LIBC__ */
3320  return REG_OK;
3321
3322 error_exit:
3323  /* Free everything that was allocated and return the error code. */
3324  tre_mem_destroy(mem);
3325  if (stack != NULL)
3326    tre_stack_destroy(stack);
3327  if (counts != NULL)
3328    xfree(counts);
3329  if (offs != NULL)
3330    xfree(offs);
3331
3332  /* Set tnfa into preg, so that calling tre_free() will free the contents
3333     of tnfa.  But in Libc, NULL out the loc field since we never retained
3334     the locale.  Outside Libc, we let tre_free() call freelocale(). */
3335  preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3336#ifdef __LIBC__
3337  if(tnfa) tnfa->loc = NULL;
3338#endif /* __LIBC__ */
3339
3340  tre_free(preg);
3341  return errcode;
3342}
3343
3344
3345
3346
3347void
3348tre_free(regex_t *preg)
3349{
3350  tre_tnfa_t *tnfa;
3351  unsigned int i;
3352  tre_tnfa_transition_t *trans;
3353
3354#ifdef TRE_USE_SYSTEM_REGEX_H
3355  preg->re_magic = 0;
3356#endif /* TRE_USE_SYSTEM_REGEX_H */
3357  tnfa = (void *)preg->TRE_REGEX_T_FIELD;
3358  if (!tnfa)
3359    return;
3360  preg->TRE_REGEX_T_FIELD = NULL;
3361
3362  for (i = 0; i < tnfa->num_transitions; i++)
3363    if (tnfa->transitions[i].state)
3364      {
3365	if (tnfa->transitions[i].tags)
3366	  xfree(tnfa->transitions[i].tags);
3367	if (tnfa->transitions[i].assertions & ASSERT_BRACKET_MATCH)
3368	  xfree(tnfa->transitions[i].u.bracket_match_list);
3369	if (tnfa->transitions[i].params)
3370	  xfree(tnfa->transitions[i].params);
3371      }
3372  if (tnfa->transitions)
3373    xfree(tnfa->transitions);
3374
3375  if (tnfa->initial)
3376    {
3377      for (trans = tnfa->initial; trans->state; trans++)
3378	{
3379	  if (trans->tags)
3380	    xfree(trans->tags);
3381	  if (trans->params)
3382	    xfree(trans->params);
3383	}
3384      xfree(tnfa->initial);
3385    }
3386
3387  if (tnfa->submatch_data)
3388    {
3389      xfree(tnfa->submatch_data);
3390    }
3391
3392  if (tnfa->tag_directions)
3393    xfree(tnfa->tag_directions);
3394#ifdef USE_FIRSTPOS_CHARS /* not defined */
3395  if (tnfa->firstpos_chars)
3396    xfree(tnfa->firstpos_chars);
3397#endif /* USE_FIRSTPOS_CHARS */
3398  if (tnfa->minimal_tags)
3399    xfree(tnfa->minimal_tags);
3400
3401  if (tnfa->loc)
3402#ifdef __LIBC__
3403    XL_RELEASE(tnfa->loc);
3404#else /* !__LIBC__ */
3405    freelocale(tnfa->loc);
3406#endif /* !__LIBC__ */
3407
3408  if (tnfa->last_matched_branch)
3409    xfree(tnfa->last_matched_branch);
3410
3411  xfree(tnfa);
3412}
3413
3414#ifndef __LIBC__
3415char *
3416tre_version(void)
3417{
3418  static char str[256];
3419  char *version;
3420
3421  if (str[0] == 0)
3422    {
3423      (void) tre_config(TRE_CONFIG_VERSION, &version);
3424      (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
3425    }
3426  return str;
3427}
3428
3429int
3430tre_config(int query, void *result)
3431{
3432  int *int_result = result;
3433  const char **string_result = result;
3434
3435  switch (query)
3436    {
3437    case TRE_CONFIG_APPROX:
3438#ifdef TRE_APPROX
3439      *int_result = 1;
3440#else /* !TRE_APPROX */
3441      *int_result = 0;
3442#endif /* !TRE_APPROX */
3443      return REG_OK;
3444
3445    case TRE_CONFIG_WCHAR:
3446#ifdef TRE_WCHAR
3447      *int_result = 1;
3448#else /* !TRE_WCHAR */
3449      *int_result = 0;
3450#endif /* !TRE_WCHAR */
3451      return REG_OK;
3452
3453    case TRE_CONFIG_MULTIBYTE:
3454#ifdef TRE_MULTIBYTE
3455      *int_result = 1;
3456#else /* !TRE_MULTIBYTE */
3457      *int_result = 0;
3458#endif /* !TRE_MULTIBYTE */
3459      return REG_OK;
3460
3461    case TRE_CONFIG_SYSTEM_ABI:
3462#ifdef TRE_CONFIG_SYSTEM_ABI
3463      *int_result = 1;
3464#else /* !TRE_CONFIG_SYSTEM_ABI */
3465      *int_result = 0;
3466#endif /* !TRE_CONFIG_SYSTEM_ABI */
3467      return REG_OK;
3468
3469    case TRE_CONFIG_VERSION:
3470      *string_result = TRE_VERSION;
3471      return REG_OK;
3472    }
3473
3474  return REG_NOMATCH;
3475}
3476#endif /* !__LIBC__ */
3477
3478
3479/* EOF */
3480