1/*
2  tre-compile.c - TRE regex compiler
3
4  This software is released under a BSD-style license.
5  See the file LICENSE for details and copyright.
6
7*/
8
9/*
10  TODO:
11   - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
12     function calls.
13*/
14
15
16#ifdef HAVE_CONFIG_H
17#include <config.h>
18#endif /* HAVE_CONFIG_H */
19#include <stdio.h>
20#include <assert.h>
21#include <string.h>
22
23#include "tre-internal.h"
24#include "tre-mem.h"
25#include "tre-stack.h"
26#include "tre-ast.h"
27#include "tre-parse.h"
28#include "tre-compile.h"
29#include "tre.h"
30#include "xmalloc.h"
31
32/*
33  Algorithms to setup tags so that submatch addressing can be done.
34*/
35
36
37/* Inserts a catenation node to the root of the tree given in `node'.
38   As the left child a new tag with number `tag_id' to `node' is added,
39   and the right child is the old root. */
40static reg_errcode_t
41tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
42{
43  tre_catenation_t *c;
44
45  DPRINT(("add_tag_left: tag %d\n", tag_id));
46
47  c = tre_mem_alloc(mem, sizeof(*c));
48  if (c == NULL)
49    return REG_ESPACE;
50  c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
51  if (c->left == NULL)
52    return REG_ESPACE;
53  c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
54  if (c->right == NULL)
55    return REG_ESPACE;
56
57  c->right->obj = node->obj;
58  c->right->type = node->type;
59  c->right->nullable = -1;
60  c->right->submatch_id = -1;
61  c->right->firstpos = NULL;
62  c->right->lastpos = NULL;
63  c->right->num_tags = 0;
64  node->obj = c;
65  node->type = CATENATION;
66  return REG_OK;
67}
68
69/* Inserts a catenation node to the root of the tree given in `node'.
70   As the right child a new tag with number `tag_id' to `node' is added,
71   and the left child is the old root. */
72static reg_errcode_t
73tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
74{
75  tre_catenation_t *c;
76
77  DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
78
79  c = tre_mem_alloc(mem, sizeof(*c));
80  if (c == NULL)
81    return REG_ESPACE;
82  c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
83  if (c->right == NULL)
84    return REG_ESPACE;
85  c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
86  if (c->left == NULL)
87    return REG_ESPACE;
88
89  c->left->obj = node->obj;
90  c->left->type = node->type;
91  c->left->nullable = -1;
92  c->left->submatch_id = -1;
93  c->left->firstpos = NULL;
94  c->left->lastpos = NULL;
95  c->left->num_tags = 0;
96  node->obj = c;
97  node->type = CATENATION;
98  return REG_OK;
99}
100
101typedef enum {
102  ADDTAGS_RECURSE,
103  ADDTAGS_AFTER_ITERATION,
104  ADDTAGS_AFTER_UNION_LEFT,
105  ADDTAGS_AFTER_UNION_RIGHT,
106  ADDTAGS_AFTER_CAT_LEFT,
107  ADDTAGS_AFTER_CAT_RIGHT,
108  ADDTAGS_SET_SUBMATCH_END
109} tre_addtags_symbol_t;
110
111
112typedef struct {
113  int tag;
114  int next_tag;
115} tre_tag_states_t;
116
117
118/* Go through `regset' and set submatch data for submatches that are
119   using this tag. */
120static void
121tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
122{
123  int i;
124
125  for (i = 0; regset[i] >= 0; i++)
126    {
127      int id = regset[i] / 2;
128      int start = !(regset[i] % 2);
129      DPRINT(("  Using tag %d for %s offset of "
130	      "submatch %d\n", tag,
131	      start ? "start" : "end", id));
132      if (start)
133	tnfa->submatch_data[id].so_tag = tag;
134      else
135	tnfa->submatch_data[id].eo_tag = tag;
136    }
137  regset[0] = -1;
138}
139
140
141/* Adds tags to appropriate locations in the parse tree in `tree', so that
142   subexpressions marked for submatch addressing can be traced. */
143static reg_errcode_t
144tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
145	     tre_tnfa_t *tnfa)
146{
147  reg_errcode_t status = REG_OK;
148  tre_addtags_symbol_t symbol;
149  tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
150  int bottom = tre_stack_num_objects(stack);
151  /* True for first pass (counting number of needed tags) */
152  int first_pass = (mem == NULL || tnfa == NULL);
153  int *regset, *orig_regset;
154  int num_tags = 0; /* Total number of tags. */
155  int num_minimals = 0;	 /* Number of special minimal tags. */
156  int tag = 0;	    /* The tag that is to be added next. */
157  int next_tag = 1; /* Next tag to use after this one. */
158  int *parents;	    /* Stack of submatches the current submatch is
159		       contained in. */
160  int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
161  tre_tag_states_t *saved_states;
162
163  tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
164  if (!first_pass)
165    {
166      tnfa->end_tag = 0;
167      tnfa->minimal_tags[0] = -1;
168    }
169
170  regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
171  if (regset == NULL)
172    return REG_ESPACE;
173  regset[0] = -1;
174  orig_regset = regset;
175
176  parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
177  if (parents == NULL)
178    {
179      xfree(regset);
180      return REG_ESPACE;
181    }
182  parents[0] = -1;
183
184  saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
185  if (saved_states == NULL)
186    {
187      xfree(regset);
188      xfree(parents);
189      return REG_ESPACE;
190    }
191  else
192    {
193      unsigned int i;
194      for (i = 0; i <= tnfa->num_submatches; i++)
195	saved_states[i].tag = -1;
196    }
197
198  STACK_PUSH(stack, voidptr, node);
199  STACK_PUSH(stack, long, ADDTAGS_RECURSE);
200
201  while (tre_stack_num_objects(stack) > bottom)
202    {
203      if (status != REG_OK)
204	break;
205
206      symbol = (tre_addtags_symbol_t)tre_stack_pop_long(stack);
207      switch (symbol)
208	{
209
210	case ADDTAGS_SET_SUBMATCH_END:
211	  {
212	    int id = (int)tre_stack_pop_long(stack);
213	    int i;
214
215	    /* Add end of this submatch to regset. */
216	    for (i = 0; regset[i] >= 0; i++);
217	    regset[i] = id * 2 + 1;
218	    regset[i + 1] = -1;
219
220	    /* Pop this submatch from the parents stack. */
221	    for (i = 0; parents[i] >= 0; i++);
222	    parents[i - 1] = -1;
223	    break;
224	  }
225
226	case ADDTAGS_RECURSE:
227	  node = tre_stack_pop_voidptr(stack);
228
229	  if (node->submatch_id >= 0)
230	    {
231	      int id = node->submatch_id;
232	      int i;
233
234
235	      /* Add start of this submatch to regset. */
236	      for (i = 0; regset[i] >= 0; i++);
237	      regset[i] = id * 2;
238	      regset[i + 1] = -1;
239
240	      if (!first_pass)
241		{
242		  for (i = 0; parents[i] >= 0; i++);
243		  tnfa->submatch_data[id].parents = NULL;
244		  if (i > 0)
245		    {
246		      int *p = xmalloc(sizeof(*p) * (i + 1));
247		      if (p == NULL)
248			{
249			  status = REG_ESPACE;
250			  break;
251			}
252		      assert(tnfa->submatch_data[id].parents == NULL);
253		      tnfa->submatch_data[id].parents = p;
254		      for (i = 0; parents[i] >= 0; i++)
255			p[i] = parents[i];
256		      p[i] = -1;
257		    }
258		}
259
260	      /* Add end of this submatch to regset after processing this
261		 node. */
262	      STACK_PUSHX(stack, long, node->submatch_id);
263	      STACK_PUSHX(stack, long, ADDTAGS_SET_SUBMATCH_END);
264	    }
265
266	  switch (node->type)
267	    {
268	    case LITERAL:
269	      {
270		tre_literal_t *lit = node->obj;
271
272		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
273		  {
274		    int i;
275		    DPRINT(("Literal %d-%d\n",
276			    (int)lit->code_min, (int)lit->code_max));
277		    if (regset[0] >= 0)
278		      {
279			/* Regset is not empty, so add a tag before the
280			   literal or backref. */
281			if (!first_pass)
282			  {
283			    status = tre_add_tag_left(mem, node, tag);
284			    tnfa->tag_directions[tag] = direction;
285			    if (minimal_tag >= 0)
286			      {
287				DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
288				for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
289				tnfa->minimal_tags[i] = tag;
290				tnfa->minimal_tags[i + 1] = minimal_tag;
291				tnfa->minimal_tags[i + 2] = -1;
292				minimal_tag = -1;
293				num_minimals++;
294			      }
295			    tre_purge_regset(regset, tnfa, tag);
296			  }
297			else
298			  {
299			    DPRINT(("  num_tags = 1\n"));
300			    node->num_tags = 1;
301			  }
302
303			DPRINT(("  num_tags++\n"));
304			regset[0] = -1;
305			tag = next_tag;
306			num_tags++;
307			next_tag++;
308		      }
309		  }
310		else
311		  {
312		    assert(!IS_TAG(lit));
313		  }
314		break;
315	      }
316	    case CATENATION:
317	      {
318		tre_catenation_t *cat = node->obj;
319		tre_ast_node_t *left = cat->left;
320		tre_ast_node_t *right = cat->right;
321		int reserved_tag = -1;
322		DPRINT(("Catenation, next_tag = %d\n", next_tag));
323
324
325		/* After processing right child. */
326		STACK_PUSHX(stack, voidptr, node);
327		STACK_PUSHX(stack, long, ADDTAGS_AFTER_CAT_RIGHT);
328
329		/* Process right child. */
330		STACK_PUSHX(stack, voidptr, right);
331		STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
332
333		/* After processing left child. */
334		STACK_PUSHX(stack, long, (long)(next_tag + left->num_tags));
335		DPRINT(("  Pushing %d for after left\n",
336			next_tag + left->num_tags));
337		if (left->num_tags > 0 && right->num_tags > 0)
338		  {
339		    /* Reserve the next tag to the right child. */
340		    DPRINT(("  Reserving next_tag %d to right child\n",
341			    next_tag));
342		    reserved_tag = next_tag;
343		    next_tag++;
344		  }
345		STACK_PUSHX(stack, long, reserved_tag);
346		STACK_PUSHX(stack, long, ADDTAGS_AFTER_CAT_LEFT);
347
348		/* Process left child. */
349		STACK_PUSHX(stack, voidptr, left);
350		STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
351
352		}
353	      break;
354	    case ITERATION:
355	      {
356		tre_iteration_t *iter = node->obj;
357		DPRINT(("Iteration\n"));
358
359		if (first_pass)
360		  {
361		    STACK_PUSHX(stack, long, regset[0] >= 0 || iter->minimal);
362		  }
363		else
364		  {
365		    STACK_PUSHX(stack, long, tag);
366		    STACK_PUSHX(stack, long, iter->minimal);
367		  }
368		STACK_PUSHX(stack, voidptr, node);
369		STACK_PUSHX(stack, long, ADDTAGS_AFTER_ITERATION);
370
371		STACK_PUSHX(stack, voidptr, iter->arg);
372		STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
373
374		/* Regset is not empty, so add a tag here. */
375		if (regset[0] >= 0 || iter->minimal)
376		  {
377		    if (!first_pass)
378		      {
379			int i;
380			status = tre_add_tag_left(mem, node, tag);
381			if (iter->minimal)
382			  tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
383			else
384			  tnfa->tag_directions[tag] = direction;
385			if (minimal_tag >= 0)
386			  {
387			    DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
388			    for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
389			    tnfa->minimal_tags[i] = tag;
390			    tnfa->minimal_tags[i + 1] = minimal_tag;
391			    tnfa->minimal_tags[i + 2] = -1;
392			    minimal_tag = -1;
393			    num_minimals++;
394			  }
395			tre_purge_regset(regset, tnfa, tag);
396		      }
397
398		    DPRINT(("  num_tags++\n"));
399		    regset[0] = -1;
400		    tag = next_tag;
401		    num_tags++;
402		    next_tag++;
403		  }
404		direction = TRE_TAG_MINIMIZE;
405	      }
406	      break;
407	    case UNION:
408	      {
409		tre_union_t *uni = node->obj;
410		tre_ast_node_t *left = uni->left;
411		tre_ast_node_t *right = uni->right;
412		int left_tag;
413		int right_tag;
414
415		if (regset[0] >= 0)
416		  {
417		    left_tag = next_tag;
418		    right_tag = next_tag + 1;
419		  }
420		else
421		  {
422		    left_tag = tag;
423		    right_tag = next_tag;
424		  }
425
426		DPRINT(("Union\n"));
427
428		/* After processing right child. */
429		STACK_PUSHX(stack, long, right_tag);
430		STACK_PUSHX(stack, long, left_tag);
431		STACK_PUSHX(stack, voidptr, regset);
432		STACK_PUSHX(stack, long, regset[0] >= 0);
433		STACK_PUSHX(stack, voidptr, node);
434		STACK_PUSHX(stack, voidptr, right);
435		STACK_PUSHX(stack, voidptr, left);
436		STACK_PUSHX(stack, long, ADDTAGS_AFTER_UNION_RIGHT);
437
438		/* Process right child. */
439		STACK_PUSHX(stack, voidptr, right);
440		STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
441
442		/* After processing left child. */
443		STACK_PUSHX(stack, long, ADDTAGS_AFTER_UNION_LEFT);
444
445		/* Process left child. */
446		STACK_PUSHX(stack, voidptr, left);
447		STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
448
449		/* Regset is not empty, so add a tag here. */
450		if (regset[0] >= 0)
451		  {
452		    if (!first_pass)
453		      {
454			int i;
455			status = tre_add_tag_left(mem, node, tag);
456			tnfa->tag_directions[tag] = direction;
457			if (minimal_tag >= 0)
458			  {
459			    DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
460			    for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
461			    tnfa->minimal_tags[i] = tag;
462			    tnfa->minimal_tags[i + 1] = minimal_tag;
463			    tnfa->minimal_tags[i + 2] = -1;
464			    minimal_tag = -1;
465			    num_minimals++;
466			  }
467			tre_purge_regset(regset, tnfa, tag);
468		      }
469
470		    DPRINT(("  num_tags++\n"));
471		    regset[0] = -1;
472		    tag = next_tag;
473		    num_tags++;
474		    next_tag++;
475		  }
476
477		if (node->num_submatches > 0)
478		  {
479		    /* The next two tags are reserved for markers. */
480		    next_tag++;
481		    tag = next_tag;
482		    next_tag++;
483		  }
484
485		break;
486	      }
487	    }
488
489	  if (node->submatch_id >= 0)
490	    {
491	      int i;
492	      /* Push this submatch on the parents stack. */
493	      for (i = 0; parents[i] >= 0; i++);
494	      parents[i] = node->submatch_id;
495	      parents[i + 1] = -1;
496	    }
497
498	  break; /* end case: ADDTAGS_RECURSE */
499
500	case ADDTAGS_AFTER_ITERATION:
501	  {
502	    int minimal = 0;
503	    int enter_tag;
504	    node = tre_stack_pop_voidptr(stack);
505	    if (first_pass)
506	      {
507		node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
508		  + tre_stack_pop_long(stack);
509		minimal_tag = -1;
510	      }
511	    else
512	      {
513		minimal = tre_stack_pop_int(stack);
514		enter_tag = tre_stack_pop_int(stack);
515		if (minimal)
516		  minimal_tag = enter_tag;
517	      }
518
519	    DPRINT(("After iteration\n"));
520	    if (!first_pass)
521	      {
522		DPRINT(("  Setting direction to %s\n",
523			minimal ? "minimize" : "maximize"));
524		if (minimal)
525		  direction = TRE_TAG_MINIMIZE;
526		else
527		  direction = TRE_TAG_MAXIMIZE;
528	      }
529	    break;
530	  }
531
532	case ADDTAGS_AFTER_CAT_LEFT:
533	  {
534	    int new_tag = tre_stack_pop_int(stack);
535	    next_tag = tre_stack_pop_int(stack);
536	    DPRINT(("After cat left, tag = %d, next_tag = %d\n",
537		    tag, next_tag));
538	    if (new_tag >= 0)
539	      {
540		DPRINT(("  Setting tag to %d\n", new_tag));
541		tag = new_tag;
542	      }
543	    break;
544	  }
545
546	case ADDTAGS_AFTER_CAT_RIGHT:
547	  DPRINT(("After cat right\n"));
548	  node = tre_stack_pop_voidptr(stack);
549	  if (first_pass)
550	    node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
551	      + ((tre_catenation_t *)node->obj)->right->num_tags;
552	  break;
553
554	case ADDTAGS_AFTER_UNION_LEFT:
555	  DPRINT(("After union left\n"));
556	  /* Lift the bottom of the `regset' array so that when processing
557	     the right operand the items currently in the array are
558	     invisible.	 The original bottom was saved at ADDTAGS_UNION and
559	     will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
560	  while (*regset >= 0)
561	    regset++;
562	  break;
563
564	case ADDTAGS_AFTER_UNION_RIGHT:
565	  {
566	    int added_tags, tag_left, tag_right;
567	    tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
568	    tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
569	    DPRINT(("After union right\n"));
570	    node = tre_stack_pop_voidptr(stack);
571	    added_tags = tre_stack_pop_int(stack);
572	    if (first_pass)
573	      {
574		node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
575		  + ((tre_union_t *)node->obj)->right->num_tags + added_tags
576		  + ((node->num_submatches > 0) ? 2 : 0);
577	      }
578	    regset = tre_stack_pop_voidptr(stack);
579	    tag_left = tre_stack_pop_int(stack);
580	    tag_right = tre_stack_pop_int(stack);
581
582	    /* Add tags after both children, the left child gets a smaller
583	       tag than the right child.  This guarantees that we prefer
584	       the left child over the right child. */
585	    /* XXX - This is not always necessary (if the children have
586	       tags which must be seen for every match of that child). */
587	    /* XXX - Check if this is the only place where tre_add_tag_right
588	       is used.	 If so, use tre_add_tag_left (putting the tag before
589	       the child as opposed after the child) and throw away
590	       tre_add_tag_right. */
591	    if (node->num_submatches > 0)
592	      {
593		if (!first_pass)
594		  {
595		    status = tre_add_tag_right(mem, left, tag_left);
596		    tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
597		    status = tre_add_tag_right(mem, right, tag_right);
598		    tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
599		  }
600		DPRINT(("  num_tags += 2\n"));
601		num_tags += 2;
602	      }
603	    direction = TRE_TAG_MAXIMIZE;
604	    break;
605	  }
606
607	default:
608	  assert(0);
609	  break;
610
611	} /* end switch(symbol) */
612    } /* end while(tre_stack_num_objects(stack) > bottom) */
613
614  if (!first_pass)
615    tre_purge_regset(regset, tnfa, tag);
616
617  if (!first_pass && minimal_tag >= 0)
618    {
619      int i;
620      DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
621      for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
622      tnfa->minimal_tags[i] = tag;
623      tnfa->minimal_tags[i + 1] = minimal_tag;
624      tnfa->minimal_tags[i + 2] = -1;
625      minimal_tag = -1;
626      num_minimals++;
627    }
628
629  DPRINT(("tre_add_tags: %s complete.  Number of tags %d.\n",
630	  first_pass? "First pass" : "Second pass", num_tags));
631
632  assert(tree->num_tags == num_tags);
633  tnfa->end_tag = num_tags;
634  tnfa->num_tags = num_tags;
635  tnfa->num_minimals = num_minimals;
636  xfree(orig_regset);
637  xfree(parents);
638  xfree(saved_states);
639  return status;
640}
641
642
643
644/*
645  AST to TNFA compilation routines.
646*/
647
648typedef enum {
649  COPY_RECURSE,
650  COPY_SET_RESULT_PTR
651} tre_copyast_symbol_t;
652
653/* Flags for tre_copy_ast(). */
654#define COPY_REMOVE_TAGS	 1
655#define COPY_MAXIMIZE_FIRST_TAG	 2
656
657static reg_errcode_t
658tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
659	     int flags, int *pos_add, tre_tag_direction_t *tag_directions,
660	     tre_ast_node_t **copy, int *max_pos)
661{
662  reg_errcode_t status = REG_OK;
663  int bottom = tre_stack_num_objects(stack);
664  int num_copied = 0;
665  int first_tag = 1;
666  tre_ast_node_t **result = copy;
667  tre_copyast_symbol_t symbol;
668
669  STACK_PUSH(stack, voidptr, ast);
670  STACK_PUSH(stack, long, COPY_RECURSE);
671
672  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
673    {
674      tre_ast_node_t *node;
675      if (status != REG_OK)
676	break;
677
678      symbol = (tre_copyast_symbol_t)tre_stack_pop_long(stack);
679      switch (symbol)
680	{
681	case COPY_SET_RESULT_PTR:
682	  result = tre_stack_pop_voidptr(stack);
683	  break;
684	case COPY_RECURSE:
685	  node = tre_stack_pop_voidptr(stack);
686	  switch (node->type)
687	    {
688	    case LITERAL:
689	      {
690		tre_literal_t *lit = node->obj;
691		int pos = lit->position;
692		int min = lit->code_min;
693		int max = lit->code_max;
694		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
695		  {
696		    /* XXX - e.g. [ab] has only one position but two
697		       nodes, so we are creating holes in the state space
698		       here.  Not fatal, just wastes memory. */
699		    pos += *pos_add;
700		    num_copied++;
701		  }
702		else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
703		  {
704		    /* Change this tag to empty. */
705		    min = EMPTY;
706		    max = pos = -1;
707		  }
708		else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
709			 && first_tag)
710		  {
711		    /* Maximize the first tag. */
712		    tag_directions[max] = TRE_TAG_MAXIMIZE;
713		    first_tag = 0;
714		  }
715		*result = tre_ast_new_literal(mem, min, max, pos);
716		if (*result == NULL)
717		  status = REG_ESPACE;
718
719		if (pos > *max_pos)
720		  *max_pos = pos;
721		break;
722	      }
723	    case UNION:
724	      {
725		tre_union_t *uni = node->obj;
726		tre_union_t *tmp;
727		*result = tre_ast_new_union(mem, uni->left, uni->right);
728		if (*result == NULL)
729		  {
730		    status = REG_ESPACE;
731		    break;
732		  }
733		tmp = (*result)->obj;
734		result = &tmp->left;
735		STACK_PUSHX(stack, voidptr, uni->right);
736		STACK_PUSHX(stack, long, COPY_RECURSE);
737		STACK_PUSHX(stack, voidptr, &tmp->right);
738		STACK_PUSHX(stack, long, COPY_SET_RESULT_PTR);
739		STACK_PUSHX(stack, voidptr, uni->left);
740		STACK_PUSHX(stack, long, COPY_RECURSE);
741		break;
742	      }
743	    case CATENATION:
744	      {
745		tre_catenation_t *cat = node->obj;
746		tre_catenation_t *tmp;
747		*result = tre_ast_new_catenation(mem, cat->left, cat->right);
748		if (*result == NULL)
749		  {
750		    status = REG_ESPACE;
751		    break;
752		  }
753		tmp = (*result)->obj;
754		tmp->left = NULL;
755		tmp->right = NULL;
756		result = &tmp->left;
757
758		STACK_PUSHX(stack, voidptr, cat->right);
759		STACK_PUSHX(stack, long, COPY_RECURSE);
760		STACK_PUSHX(stack, voidptr, &tmp->right);
761		STACK_PUSHX(stack, long, COPY_SET_RESULT_PTR);
762		STACK_PUSHX(stack, voidptr, cat->left);
763		STACK_PUSHX(stack, long, COPY_RECURSE);
764		break;
765	      }
766	    case ITERATION:
767	      {
768		tre_iteration_t *iter = node->obj;
769		STACK_PUSHX(stack, voidptr, iter->arg);
770		STACK_PUSHX(stack, long, COPY_RECURSE);
771		*result = tre_ast_new_iter(mem, iter->arg, iter->min,
772					   iter->max, iter->minimal);
773		if (*result == NULL)
774		  {
775		    status = REG_ESPACE;
776		    break;
777		  }
778		iter = (*result)->obj;
779		result = &iter->arg;
780		break;
781	      }
782	    default:
783	      assert(0);
784	      break;
785	    }
786	  break;
787	}
788    }
789  *pos_add += num_copied;
790  return status;
791}
792
793typedef enum {
794  EXPAND_RECURSE,
795  EXPAND_AFTER_ITER
796} tre_expand_ast_symbol_t;
797
798/* Expands each iteration node that has a finite nonzero minimum or maximum
799   iteration count to a catenated sequence of copies of the node. */
800static reg_errcode_t
801tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
802	       int *position, tre_tag_direction_t *tag_directions,
803	       int *max_depth)
804{
805  reg_errcode_t status = REG_OK;
806  int bottom = tre_stack_num_objects(stack);
807  int pos_add = 0;
808  int pos_add_total = 0;
809  int max_pos = 0;
810  /* Current approximate matching parameters. */
811  int params[TRE_PARAM_LAST];
812  /* Approximate parameter nesting level. */
813  int params_depth = 0;
814  int iter_depth = 0;
815  int i;
816
817  for (i = 0; i < TRE_PARAM_LAST; i++)
818    params[i] = TRE_PARAM_DEFAULT;
819
820  STACK_PUSHR(stack, voidptr, ast);
821  STACK_PUSHR(stack, long, EXPAND_RECURSE);
822  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
823    {
824      tre_ast_node_t *node;
825      tre_expand_ast_symbol_t symbol;
826
827      if (status != REG_OK)
828	break;
829
830      DPRINT(("pos_add %d\n", pos_add));
831
832      symbol = (tre_expand_ast_symbol_t)tre_stack_pop_long(stack);
833      node = tre_stack_pop_voidptr(stack);
834      switch (symbol)
835	{
836	case EXPAND_RECURSE:
837	  switch (node->type)
838	    {
839	    case LITERAL:
840	      {
841		tre_literal_t *lit= node->obj;
842		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
843		  {
844		    lit->position += pos_add;
845		    if (lit->position > max_pos)
846		      max_pos = lit->position;
847		  }
848		break;
849	      }
850	    case UNION:
851	      {
852		tre_union_t *uni = node->obj;
853		STACK_PUSHX(stack, voidptr, uni->right);
854		STACK_PUSHX(stack, long, EXPAND_RECURSE);
855		STACK_PUSHX(stack, voidptr, uni->left);
856		STACK_PUSHX(stack, long, EXPAND_RECURSE);
857		break;
858	      }
859	    case CATENATION:
860	      {
861		tre_catenation_t *cat = node->obj;
862		STACK_PUSHX(stack, voidptr, cat->right);
863		STACK_PUSHX(stack, long, EXPAND_RECURSE);
864		STACK_PUSHX(stack, voidptr, cat->left);
865		STACK_PUSHX(stack, long, EXPAND_RECURSE);
866		break;
867	      }
868	    case ITERATION:
869	      {
870		tre_iteration_t *iter = node->obj;
871		STACK_PUSHX(stack, long, pos_add);
872		STACK_PUSHX(stack, voidptr, node);
873		STACK_PUSHX(stack, long, EXPAND_AFTER_ITER);
874		STACK_PUSHX(stack, voidptr, iter->arg);
875		STACK_PUSHX(stack, long, EXPAND_RECURSE);
876		/* If we are going to expand this node at EXPAND_AFTER_ITER
877		   then don't increase the `pos' fields of the nodes now, it
878		   will get done when expanding. */
879		if (iter->min > 1 || iter->max > 1)
880		  pos_add = 0;
881		iter_depth++;
882		DPRINT(("iter\n"));
883		break;
884	      }
885	    default:
886	      assert(0);
887	      break;
888	    }
889	  break;
890	case EXPAND_AFTER_ITER:
891	  {
892	    tre_iteration_t *iter = node->obj;
893	    int pos_add_last;
894	    pos_add = tre_stack_pop_int(stack);
895	    pos_add_last = pos_add;
896	    if (iter->min > 1 || iter->max > 1)
897	      {
898		tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
899		int j;
900		int pos_add_save = pos_add;
901
902		/* Create a catenated sequence of copies of the node. */
903		for (j = 0; j < iter->min; j++)
904		  {
905		    tre_ast_node_t *copy;
906		    /* Remove tags from all but the last copy. */
907		    int flags = ((j + 1 < iter->min)
908				 ? COPY_REMOVE_TAGS
909				 : COPY_MAXIMIZE_FIRST_TAG);
910		    DPRINT(("  pos_add %d\n", pos_add));
911		    pos_add_save = pos_add;
912		    status = tre_copy_ast(mem, stack, iter->arg, flags,
913					  &pos_add, tag_directions, &copy,
914					  &max_pos);
915		    if (status != REG_OK)
916		      return status;
917		    if (seq1 != NULL)
918		      seq1 = tre_ast_new_catenation(mem, seq1, copy);
919		    else
920		      seq1 = copy;
921		    if (seq1 == NULL)
922		      return REG_ESPACE;
923		  }
924
925		if (iter->max == -1)
926		  {
927		    /* No upper limit. */
928		    pos_add_save = pos_add;
929		    status = tre_copy_ast(mem, stack, iter->arg, 0,
930					  &pos_add, NULL, &seq2, &max_pos);
931		    if (status != REG_OK)
932		      return status;
933		    seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
934		    if (seq2 == NULL)
935		      return REG_ESPACE;
936		  }
937		else
938		  {
939		    for (j = iter->min; j < iter->max; j++)
940		      {
941			tre_ast_node_t *tmp, *copy;
942			pos_add_save = pos_add;
943			status = tre_copy_ast(mem, stack, iter->arg, 0,
944					      &pos_add, NULL, &copy, &max_pos);
945			if (status != REG_OK)
946			  return status;
947			if (seq2 != NULL)
948			  seq2 = tre_ast_new_catenation(mem, copy, seq2);
949			else
950			  seq2 = copy;
951			if (seq2 == NULL)
952			  return REG_ESPACE;
953			tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
954			if (tmp == NULL)
955			  return REG_ESPACE;
956			seq2 = tre_ast_new_union(mem, tmp, seq2);
957			if (seq2 == NULL)
958			  return REG_ESPACE;
959		      }
960		  }
961
962		pos_add = pos_add_save;
963		if (seq1 == NULL)
964		  seq1 = seq2;
965		else if (seq2 != NULL)
966		  seq1 = tre_ast_new_catenation(mem, seq1, seq2);
967		if (seq1 == NULL)
968		  return REG_ESPACE;
969		node->obj = seq1->obj;
970		node->type = seq1->type;
971	      }
972
973	    iter_depth--;
974	    pos_add_total += pos_add - pos_add_last;
975	    if (iter_depth == 0)
976	      pos_add = pos_add_total;
977
978	    /* If approximate parameters are specified, surround the result
979	       with two parameter setting nodes.  The one on the left sets
980	       the specified parameters, and the one on the right restores
981	       the old parameters. */
982	    if (iter->params)
983	      {
984		tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
985		int *old_params;
986
987		tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
988		if (!tmp_l)
989		  return REG_ESPACE;
990		((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
991		iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
992		tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
993		if (!tmp_r)
994		  return REG_ESPACE;
995		old_params = tre_mem_alloc(mem, sizeof(*old_params)
996					   * TRE_PARAM_LAST);
997		if (!old_params)
998		  return REG_ESPACE;
999		for (i = 0; i < TRE_PARAM_LAST; i++)
1000		  old_params[i] = params[i];
1001		((tre_literal_t *)tmp_r->obj)->u.params = old_params;
1002		old_params[TRE_PARAM_DEPTH] = params_depth;
1003		/* XXX - this is the only place where ast_new_node is
1004		   needed -- should be moved inside AST module. */
1005		node_copy = tre_ast_new_node(mem, ITERATION,
1006					     sizeof(tre_iteration_t));
1007		if (!node_copy)
1008		  return REG_ESPACE;
1009		node_copy->obj = node->obj;
1010		tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
1011		if (!tmp_node)
1012		  return REG_ESPACE;
1013		tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
1014		if (!tmp_node)
1015		  return REG_ESPACE;
1016		/* Replace the contents of `node' with `tmp_node'. */
1017		memcpy(node, tmp_node, sizeof(*node));
1018		node->obj = tmp_node->obj;
1019		node->type = tmp_node->type;
1020		params_depth++;
1021		if (params_depth > *max_depth)
1022		  *max_depth = params_depth;
1023	      }
1024	    break;
1025	  }
1026	default:
1027	  assert(0);
1028	  break;
1029	}
1030    }
1031
1032  *position += pos_add_total;
1033
1034  /* `max_pos' should never be larger than `*position' if the above
1035     code works, but just an extra safeguard let's make sure
1036     `*position' is set large enough so enough memory will be
1037     allocated for the transition table. */
1038  if (max_pos > *position)
1039    *position = max_pos;
1040
1041#ifdef TRE_DEBUG
1042  DPRINT(("Expanded AST:\n"));
1043  tre_ast_print(ast);
1044  DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
1045#endif
1046
1047  return status;
1048}
1049
1050static tre_pos_and_tags_t *
1051tre_set_empty(tre_mem_t mem)
1052{
1053  tre_pos_and_tags_t *new_set;
1054
1055  new_set = tre_mem_calloc(mem, sizeof(*new_set));
1056  if (new_set == NULL)
1057    return NULL;
1058
1059  new_set[0].position = -1;
1060  new_set[0].code_min = -1;
1061  new_set[0].code_max = -1;
1062
1063  return new_set;
1064}
1065
1066static tre_pos_and_tags_t *
1067tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
1068	    tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
1069{
1070  tre_pos_and_tags_t *new_set;
1071
1072  new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
1073  if (new_set == NULL)
1074    return NULL;
1075
1076  new_set[0].position = position;
1077  new_set[0].code_min = code_min;
1078  new_set[0].code_max = code_max;
1079  new_set[0].class = class;
1080  new_set[0].neg_classes = neg_classes;
1081  new_set[0].backref = backref;
1082  new_set[1].position = -1;
1083  new_set[1].code_min = -1;
1084  new_set[1].code_max = -1;
1085
1086  return new_set;
1087}
1088
1089static tre_pos_and_tags_t *
1090tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
1091	      int *tags, int assertions, int *params)
1092{
1093  int s1, s2, i, j;
1094  tre_pos_and_tags_t *new_set;
1095  int *new_tags;
1096  int num_tags;
1097
1098  for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
1099  for (s1 = 0; set1[s1].position >= 0; s1++);
1100  for (s2 = 0; set2[s2].position >= 0; s2++);
1101  new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
1102  if (!new_set )
1103    return NULL;
1104
1105  for (s1 = 0; set1[s1].position >= 0; s1++)
1106    {
1107      new_set[s1].position = set1[s1].position;
1108      new_set[s1].code_min = set1[s1].code_min;
1109      new_set[s1].code_max = set1[s1].code_max;
1110      new_set[s1].assertions = set1[s1].assertions | assertions;
1111      new_set[s1].class = set1[s1].class;
1112      new_set[s1].neg_classes = set1[s1].neg_classes;
1113      new_set[s1].backref = set1[s1].backref;
1114      if (set1[s1].tags == NULL && tags == NULL)
1115	new_set[s1].tags = NULL;
1116      else
1117	{
1118	  for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
1119	  new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
1120					 * (i + num_tags + 1)));
1121	  if (new_tags == NULL)
1122	    return NULL;
1123	  for (j = 0; j < i; j++)
1124	    new_tags[j] = set1[s1].tags[j];
1125	  for (i = 0; i < num_tags; i++)
1126	    new_tags[j + i] = tags[i];
1127	  new_tags[j + i] = -1;
1128	  new_set[s1].tags = new_tags;
1129	}
1130      if (set1[s1].params)
1131	new_set[s1].params = set1[s1].params;
1132      if (params)
1133	{
1134	  if (!new_set[s1].params)
1135	    new_set[s1].params = params;
1136	  else
1137	    {
1138	      new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
1139						 TRE_PARAM_LAST);
1140	      if (!new_set[s1].params)
1141		return NULL;
1142	      for (i = 0; i < TRE_PARAM_LAST; i++)
1143		if (params[i] != TRE_PARAM_UNSET)
1144		  new_set[s1].params[i] = params[i];
1145	    }
1146	}
1147    }
1148
1149  for (s2 = 0; set2[s2].position >= 0; s2++)
1150    {
1151      new_set[s1 + s2].position = set2[s2].position;
1152      new_set[s1 + s2].code_min = set2[s2].code_min;
1153      new_set[s1 + s2].code_max = set2[s2].code_max;
1154      /* XXX - why not | assertions here as well? */
1155      new_set[s1 + s2].assertions = set2[s2].assertions;
1156      new_set[s1 + s2].class = set2[s2].class;
1157      new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
1158      new_set[s1 + s2].backref = set2[s2].backref;
1159      if (set2[s2].tags == NULL)
1160	new_set[s1 + s2].tags = NULL;
1161      else
1162	{
1163	  for (i = 0; set2[s2].tags[i] >= 0; i++);
1164	  new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
1165	  if (new_tags == NULL)
1166	    return NULL;
1167	  for (j = 0; j < i; j++)
1168	    new_tags[j] = set2[s2].tags[j];
1169	  new_tags[j] = -1;
1170	  new_set[s1 + s2].tags = new_tags;
1171	}
1172      if (set2[s2].params)
1173	new_set[s1 + s2].params = set2[s2].params;
1174      if (params)
1175	{
1176	  if (!new_set[s1 + s2].params)
1177	    new_set[s1 + s2].params = params;
1178	  else
1179	    {
1180	      new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
1181						      TRE_PARAM_LAST);
1182	      if (!new_set[s1 + s2].params)
1183		return NULL;
1184	      for (i = 0; i < TRE_PARAM_LAST; i++)
1185		if (params[i] != TRE_PARAM_UNSET)
1186		  new_set[s1 + s2].params[i] = params[i];
1187	    }
1188	}
1189    }
1190  new_set[s1 + s2].position = -1;
1191  return new_set;
1192}
1193
1194/* Finds the empty path through `node' which is the one that should be
1195   taken according to POSIX.2 rules, and adds the tags on that path to
1196   `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
1197   set to the number of tags seen on the path. */
1198static reg_errcode_t
1199tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
1200		int *assertions, int *params, int *num_tags_seen,
1201		int *params_seen)
1202{
1203  tre_literal_t *lit;
1204  tre_union_t *uni;
1205  tre_catenation_t *cat;
1206  tre_iteration_t *iter;
1207  int i;
1208  int bottom = tre_stack_num_objects(stack);
1209  reg_errcode_t status = REG_OK;
1210  if (num_tags_seen)
1211    *num_tags_seen = 0;
1212  if (params_seen)
1213    *params_seen = 0;
1214
1215  status = tre_stack_push_voidptr(stack, node);
1216
1217  /* Walk through the tree recursively. */
1218  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1219    {
1220      node = tre_stack_pop_voidptr(stack);
1221
1222      switch (node->type)
1223	{
1224	case LITERAL:
1225	  lit = (tre_literal_t *)node->obj;
1226	  switch (lit->code_min)
1227	    {
1228	    case TAG:
1229	      if (lit->code_max >= 0)
1230		{
1231		  if (tags != NULL)
1232		    {
1233		      /* Add the tag to `tags'. */
1234		      for (i = 0; tags[i] >= 0; i++)
1235			if (tags[i] == lit->code_max)
1236			  break;
1237		      if (tags[i] < 0)
1238			{
1239			  tags[i] = lit->code_max;
1240			  tags[i + 1] = -1;
1241			}
1242		    }
1243		  if (num_tags_seen)
1244		    (*num_tags_seen)++;
1245		}
1246	      break;
1247	    case ASSERTION:
1248	      assert(lit->code_max >= 1
1249		     || lit->code_max <= ASSERT_LAST);
1250	      if (assertions != NULL)
1251		*assertions |= lit->code_max;
1252	      break;
1253	    case PARAMETER:
1254	      if (params != NULL)
1255		for (i = 0; i < TRE_PARAM_LAST; i++)
1256		  params[i] = lit->u.params[i];
1257	      if (params_seen != NULL)
1258		*params_seen = 1;
1259	      break;
1260	    case EMPTY:
1261	      break;
1262	    default:
1263	      assert(0);
1264	      break;
1265	    }
1266	  break;
1267
1268	case UNION:
1269	  /* Subexpressions starting earlier take priority over ones
1270	     starting later, so we prefer the left subexpression over the
1271	     right subexpression. */
1272	  uni = (tre_union_t *)node->obj;
1273	  if (uni->left->nullable)
1274	    STACK_PUSHX(stack, voidptr, uni->left)
1275	  else if (uni->right->nullable)
1276	    STACK_PUSHX(stack, voidptr, uni->right)
1277	  else
1278	    assert(0);
1279	  break;
1280
1281	case CATENATION:
1282	  /* The path must go through both children. */
1283	  cat = (tre_catenation_t *)node->obj;
1284	  assert(cat->left->nullable);
1285	  assert(cat->right->nullable);
1286	  STACK_PUSHX(stack, voidptr, cat->left);
1287	  STACK_PUSHX(stack, voidptr, cat->right);
1288	  break;
1289
1290	case ITERATION:
1291	  /* A match with an empty string is preferred over no match at
1292	     all, so we go through the argument if possible. */
1293	  iter = (tre_iteration_t *)node->obj;
1294	  if (iter->arg->nullable)
1295	    STACK_PUSHX(stack, voidptr, iter->arg);
1296	  break;
1297
1298	default:
1299	  assert(0);
1300	  break;
1301	}
1302    }
1303
1304  return status;
1305}
1306
1307
1308typedef enum {
1309  NFL_RECURSE,
1310  NFL_POST_UNION,
1311  NFL_POST_CATENATION,
1312  NFL_POST_ITERATION
1313} tre_nfl_stack_symbol_t;
1314
1315
1316/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
1317   the nodes of the AST `tree'. */
1318static reg_errcode_t
1319tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
1320{
1321  int bottom = tre_stack_num_objects(stack);
1322
1323  STACK_PUSHR(stack, voidptr, tree);
1324  STACK_PUSHR(stack, long, NFL_RECURSE);
1325
1326  while (tre_stack_num_objects(stack) > bottom)
1327    {
1328      tre_nfl_stack_symbol_t symbol;
1329      tre_ast_node_t *node;
1330
1331      symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_long(stack);
1332      node = tre_stack_pop_voidptr(stack);
1333      switch (symbol)
1334	{
1335	case NFL_RECURSE:
1336	  switch (node->type)
1337	    {
1338	    case LITERAL:
1339	      {
1340		tre_literal_t *lit = (tre_literal_t *)node->obj;
1341		if (IS_BACKREF(lit))
1342		  {
1343		    /* Back references: nullable = false, firstpos = {i},
1344		       lastpos = {i}. */
1345		    node->nullable = 0;
1346		    node->firstpos = tre_set_one(mem, lit->position, 0,
1347					     TRE_CHAR_MAX, 0, NULL, -1);
1348		    if (!node->firstpos)
1349		      return REG_ESPACE;
1350		    node->lastpos = tre_set_one(mem, lit->position, 0,
1351						TRE_CHAR_MAX, 0, NULL,
1352						(int)lit->code_max);
1353		    if (!node->lastpos)
1354		      return REG_ESPACE;
1355		  }
1356		else if (lit->code_min < 0)
1357		  {
1358		    /* Tags, empty strings, params, and zero width assertions:
1359		       nullable = true, firstpos = {}, and lastpos = {}. */
1360		    node->nullable = 1;
1361		    node->firstpos = tre_set_empty(mem);
1362		    if (!node->firstpos)
1363		      return REG_ESPACE;
1364		    node->lastpos = tre_set_empty(mem);
1365		    if (!node->lastpos)
1366		      return REG_ESPACE;
1367		  }
1368		else
1369		  {
1370		    /* Literal at position i: nullable = false, firstpos = {i},
1371		       lastpos = {i}. */
1372		    node->nullable = 0;
1373		    node->firstpos =
1374		      tre_set_one(mem, lit->position, (int)lit->code_min,
1375				  (int)lit->code_max, 0, NULL, -1);
1376		    if (!node->firstpos)
1377		      return REG_ESPACE;
1378		    node->lastpos = tre_set_one(mem, lit->position,
1379						(int)lit->code_min,
1380						(int)lit->code_max,
1381						lit->u.class, lit->neg_classes,
1382						-1);
1383		    if (!node->lastpos)
1384		      return REG_ESPACE;
1385		  }
1386		break;
1387	      }
1388
1389	    case UNION:
1390	      /* Compute the attributes for the two subtrees, and after that
1391		 for this node. */
1392	      STACK_PUSHR(stack, voidptr, node);
1393	      STACK_PUSHR(stack, long, NFL_POST_UNION);
1394	      STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
1395	      STACK_PUSHR(stack, long, NFL_RECURSE);
1396	      STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
1397	      STACK_PUSHR(stack, long, NFL_RECURSE);
1398	      break;
1399
1400	    case CATENATION:
1401	      /* Compute the attributes for the two subtrees, and after that
1402		 for this node. */
1403	      STACK_PUSHR(stack, voidptr, node);
1404	      STACK_PUSHR(stack, long, NFL_POST_CATENATION);
1405	      STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
1406	      STACK_PUSHR(stack, long, NFL_RECURSE);
1407	      STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
1408	      STACK_PUSHR(stack, long, NFL_RECURSE);
1409	      break;
1410
1411	    case ITERATION:
1412	      /* Compute the attributes for the subtree, and after that for
1413		 this node. */
1414	      STACK_PUSHR(stack, voidptr, node);
1415	      STACK_PUSHR(stack, long, NFL_POST_ITERATION);
1416	      STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
1417	      STACK_PUSHR(stack, long, NFL_RECURSE);
1418	      break;
1419	    }
1420	  break; /* end case: NFL_RECURSE */
1421
1422	case NFL_POST_UNION:
1423	  {
1424	    tre_union_t *uni = (tre_union_t *)node->obj;
1425	    node->nullable = uni->left->nullable || uni->right->nullable;
1426	    node->firstpos = tre_set_union(mem, uni->left->firstpos,
1427					   uni->right->firstpos, NULL, 0, NULL);
1428	    if (!node->firstpos)
1429	      return REG_ESPACE;
1430	    node->lastpos = tre_set_union(mem, uni->left->lastpos,
1431					  uni->right->lastpos, NULL, 0, NULL);
1432	    if (!node->lastpos)
1433	      return REG_ESPACE;
1434	    break;
1435	  }
1436
1437	case NFL_POST_ITERATION:
1438	  {
1439	    tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1440
1441	    if (iter->min == 0 || iter->arg->nullable)
1442	      node->nullable = 1;
1443	    else
1444	      node->nullable = 0;
1445	    node->firstpos = iter->arg->firstpos;
1446	    node->lastpos = iter->arg->lastpos;
1447	    break;
1448	  }
1449
1450	case NFL_POST_CATENATION:
1451	  {
1452	    int num_tags, *tags, assertions, params_seen;
1453	    int *params;
1454	    reg_errcode_t status;
1455	    tre_catenation_t *cat = node->obj;
1456	    node->nullable = cat->left->nullable && cat->right->nullable;
1457
1458	    /* Compute firstpos. */
1459	    if (cat->left->nullable)
1460	      {
1461		/* The left side matches the empty string.  Make a first pass
1462		   with tre_match_empty() to get the number of tags and
1463		   parameters. */
1464		status = tre_match_empty(stack, cat->left,
1465					 NULL, NULL, NULL, &num_tags,
1466					 &params_seen);
1467		if (status != REG_OK)
1468		  return status;
1469		/* Allocate arrays for the tags and parameters. */
1470		tags = xmalloc(sizeof(*tags) * (num_tags + 1));
1471		if (!tags)
1472		  return REG_ESPACE;
1473		tags[0] = -1;
1474		assertions = 0;
1475		params = NULL;
1476		if (params_seen)
1477		  {
1478		    params = tre_mem_alloc(mem, sizeof(*params)
1479					   * TRE_PARAM_LAST);
1480		    if (!params)
1481		      {
1482			xfree(tags);
1483			return REG_ESPACE;
1484		      }
1485		  }
1486		/* Second pass with tre_mach_empty() to get the list of
1487		   tags and parameters. */
1488		status = tre_match_empty(stack, cat->left, tags,
1489					 &assertions, params, NULL, NULL);
1490		if (status != REG_OK)
1491		  {
1492		    xfree(tags);
1493		    return status;
1494		  }
1495		node->firstpos =
1496		  tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
1497				tags, assertions, params);
1498		xfree(tags);
1499		if (!node->firstpos)
1500		  return REG_ESPACE;
1501	      }
1502	    else
1503	      {
1504		node->firstpos = cat->left->firstpos;
1505	      }
1506
1507	    /* Compute lastpos. */
1508	    if (cat->right->nullable)
1509	      {
1510		/* The right side matches the empty string.  Make a first pass
1511		   with tre_match_empty() to get the number of tags and
1512		   parameters. */
1513		status = tre_match_empty(stack, cat->right,
1514					 NULL, NULL, NULL, &num_tags,
1515					 &params_seen);
1516		if (status != REG_OK)
1517		  return status;
1518		/* Allocate arrays for the tags and parameters. */
1519		tags = xmalloc(sizeof(int) * (num_tags + 1));
1520		if (!tags)
1521		  return REG_ESPACE;
1522		tags[0] = -1;
1523		assertions = 0;
1524		params = NULL;
1525		if (params_seen)
1526		  {
1527		    params = tre_mem_alloc(mem, sizeof(*params)
1528					   * TRE_PARAM_LAST);
1529		    if (!params)
1530		      {
1531			xfree(tags);
1532			return REG_ESPACE;
1533		      }
1534		  }
1535		/* Second pass with tre_mach_empty() to get the list of
1536		   tags and parameters. */
1537		status = tre_match_empty(stack, cat->right, tags,
1538					 &assertions, params, NULL, NULL);
1539		if (status != REG_OK)
1540		  {
1541		    xfree(tags);
1542		    return status;
1543		  }
1544		node->lastpos =
1545		  tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
1546				tags, assertions, params);
1547		xfree(tags);
1548		if (!node->lastpos)
1549		  return REG_ESPACE;
1550	      }
1551	    else
1552	      {
1553		node->lastpos = cat->right->lastpos;
1554	      }
1555	    break;
1556	  }
1557
1558	default:
1559	  assert(0);
1560	  break;
1561	}
1562    }
1563
1564  return REG_OK;
1565}
1566
1567
1568/* Adds a transition from each position in `p1' to each position in `p2'. */
1569static reg_errcode_t
1570tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
1571	       tre_tnfa_transition_t *transitions,
1572	       int *counts, int *offs)
1573{
1574  tre_pos_and_tags_t *orig_p2 = p2;
1575  tre_tnfa_transition_t *trans;
1576  int i, j, k, l, dup, prev_p2_pos;
1577
1578  if (transitions != NULL)
1579    while (p1->position >= 0)
1580      {
1581	p2 = orig_p2;
1582	prev_p2_pos = -1;
1583	while (p2->position >= 0)
1584	  {
1585	    /* Optimization: if this position was already handled, skip it. */
1586	    if (p2->position == prev_p2_pos)
1587	      {
1588		p2++;
1589		continue;
1590	      }
1591	    prev_p2_pos = p2->position;
1592	    /* Set `trans' to point to the next unused transition from
1593	       position `p1->position'. */
1594	    trans = transitions + offs[p1->position];
1595	    while (trans->state != NULL)
1596	      {
1597#if 0
1598		/* If we find a previous transition from `p1->position' to
1599		   `p2->position', it is overwritten.  This can happen only
1600		   if there are nested loops in the regexp, like in "((a)*)*".
1601		   In POSIX.2 repetition using the outer loop is always
1602		   preferred over using the inner loop.	 Therefore the
1603		   transition for the inner loop is useless and can be thrown
1604		   away. */
1605		/* XXX - The same position is used for all nodes in a bracket
1606		   expression, so this optimization cannot be used (it will
1607		   break bracket expressions) unless I figure out a way to
1608		   detect it here. */
1609		if (trans->state_id == p2->position)
1610		  {
1611		    DPRINT(("*"));
1612		    break;
1613		  }
1614#endif
1615		trans++;
1616	      }
1617
1618	    if (trans->state == NULL)
1619	      (trans + 1)->state = NULL;
1620	    /* Use the character ranges, assertions, etc. from `p1' for
1621	       the transition from `p1' to `p2'. */
1622	    trans->code_min = p1->code_min;
1623	    trans->code_max = p1->code_max;
1624	    trans->state = transitions + offs[p2->position];
1625	    trans->state_id = p2->position;
1626	    trans->assertions = p1->assertions | p2->assertions
1627	      | (p1->class ? ASSERT_CHAR_CLASS : 0)
1628	      | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
1629	    if (p1->backref >= 0)
1630	      {
1631		assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
1632		assert(p2->backref < 0);
1633		trans->u.backref = p1->backref;
1634		trans->assertions |= ASSERT_BACKREF;
1635	      }
1636	    else
1637	      trans->u.class = p1->class;
1638	    if (p1->neg_classes != NULL)
1639	      {
1640		for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
1641		trans->neg_classes =
1642		  xmalloc(sizeof(*trans->neg_classes) * (i + 1));
1643		if (trans->neg_classes == NULL)
1644		  return REG_ESPACE;
1645		for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
1646		  trans->neg_classes[i] = p1->neg_classes[i];
1647		trans->neg_classes[i] = (tre_ctype_t)0;
1648	      }
1649	    else
1650	      trans->neg_classes = NULL;
1651
1652	    /* Find out how many tags this transition has. */
1653	    i = 0;
1654	    if (p1->tags != NULL)
1655	      while(p1->tags[i] >= 0)
1656		i++;
1657	    j = 0;
1658	    if (p2->tags != NULL)
1659	      while(p2->tags[j] >= 0)
1660		j++;
1661
1662	    /* If we are overwriting a transition, free the old tag array. */
1663	    if (trans->tags != NULL)
1664	      xfree(trans->tags);
1665	    trans->tags = NULL;
1666
1667	    /* If there were any tags, allocate an array and fill it. */
1668	    if (i + j > 0)
1669	      {
1670		trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
1671		if (!trans->tags)
1672		  return REG_ESPACE;
1673		i = 0;
1674		if (p1->tags != NULL)
1675		  while(p1->tags[i] >= 0)
1676		    {
1677		      trans->tags[i] = p1->tags[i];
1678		      i++;
1679		    }
1680		l = i;
1681		j = 0;
1682		if (p2->tags != NULL)
1683		  while (p2->tags[j] >= 0)
1684		    {
1685		      /* Don't add duplicates. */
1686		      dup = 0;
1687		      for (k = 0; k < i; k++)
1688			if (trans->tags[k] == p2->tags[j])
1689			  {
1690			    dup = 1;
1691			    break;
1692			  }
1693		      if (!dup)
1694			trans->tags[l++] = p2->tags[j];
1695		      j++;
1696		    }
1697		trans->tags[l] = -1;
1698	      }
1699
1700	    /* Set the parameter array.	 If both `p2' and `p1' have same
1701	       parameters, the values in `p2' override those in `p1'. */
1702	    if (p1->params || p2->params)
1703	      {
1704		if (!trans->params)
1705		  trans->params = xmalloc(sizeof(*trans->params)
1706					  * TRE_PARAM_LAST);
1707		if (!trans->params)
1708		  return REG_ESPACE;
1709		for (i = 0; i < TRE_PARAM_LAST; i++)
1710		  {
1711		    trans->params[i] = TRE_PARAM_UNSET;
1712		    if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
1713		      trans->params[i] = p1->params[i];
1714		    if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
1715		      trans->params[i] = p2->params[i];
1716		  }
1717	      }
1718	    else
1719	      {
1720		if (trans->params)
1721		  xfree(trans->params);
1722		trans->params = NULL;
1723	      }
1724
1725
1726#ifdef TRE_DEBUG
1727	    {
1728	      int *tags;
1729
1730	      DPRINT(("	 %2d -> %2d on %3d", p1->position, p2->position,
1731		      p1->code_min));
1732	      if (p1->code_max != p1->code_min)
1733		DPRINT(("-%3d", p1->code_max));
1734	      tags = trans->tags;
1735	      if (tags)
1736		{
1737		  DPRINT((", tags ["));
1738		  while (*tags >= 0)
1739		    {
1740		      DPRINT(("%d", *tags));
1741		      tags++;
1742		      if (*tags >= 0)
1743			DPRINT((","));
1744		    }
1745		  DPRINT(("]"));
1746		}
1747	      if (trans->assertions)
1748		DPRINT((", assert %d", trans->assertions));
1749	      if (trans->assertions & ASSERT_BACKREF)
1750		DPRINT((", backref %d", trans->u.backref));
1751	      else if (trans->u.class)
1752		DPRINT((", class %ld", (long)trans->u.class));
1753	      if (trans->neg_classes)
1754		DPRINT((", neg_classes %p", trans->neg_classes));
1755	      if (trans->params)
1756		{
1757		  DPRINT((", "));
1758		  tre_print_params(trans->params);
1759		}
1760	      DPRINT(("\n"));
1761	    }
1762#endif /* TRE_DEBUG */
1763	    p2++;
1764	  }
1765	p1++;
1766      }
1767  else
1768    /* Compute a maximum limit for the number of transitions leaving
1769       from each state. */
1770    while (p1->position >= 0)
1771      {
1772	p2 = orig_p2;
1773	while (p2->position >= 0)
1774	  {
1775	    counts[p1->position]++;
1776	    p2++;
1777	  }
1778	p1++;
1779      }
1780  return REG_OK;
1781}
1782
1783/* Converts the syntax tree to a TNFA.	All the transitions in the TNFA are
1784   labelled with one character range (there are no transitions on empty
1785   strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
1786   the regexp. */
1787static reg_errcode_t
1788tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
1789		int *counts, int *offs)
1790{
1791  tre_union_t *uni;
1792  tre_catenation_t *cat;
1793  tre_iteration_t *iter;
1794  reg_errcode_t errcode = REG_OK;
1795
1796  /* XXX - recurse using a stack!. */
1797  switch (node->type)
1798    {
1799    case LITERAL:
1800      break;
1801    case UNION:
1802      uni = (tre_union_t *)node->obj;
1803      errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
1804      if (errcode != REG_OK)
1805	return errcode;
1806      errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
1807      break;
1808
1809    case CATENATION:
1810      cat = (tre_catenation_t *)node->obj;
1811      /* Add a transition from each position in cat->left->lastpos
1812	 to each position in cat->right->firstpos. */
1813      errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
1814			       transitions, counts, offs);
1815      if (errcode != REG_OK)
1816	return errcode;
1817      errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
1818      if (errcode != REG_OK)
1819	return errcode;
1820      errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
1821      break;
1822
1823    case ITERATION:
1824      iter = (tre_iteration_t *)node->obj;
1825      assert(iter->max == -1 || iter->max == 1);
1826
1827      if (iter->max == -1)
1828	{
1829	  assert(iter->min == 0 || iter->min == 1);
1830	  /* Add a transition from each last position in the iterated
1831	     expression to each first position. */
1832	  errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
1833				   transitions, counts, offs);
1834	  if (errcode != REG_OK)
1835	    return errcode;
1836	}
1837      errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
1838      break;
1839    }
1840  return errcode;
1841}
1842
1843
1844#define ERROR_EXIT(err)		  \
1845  do				  \
1846    {				  \
1847      errcode = err;		  \
1848      if (/*CONSTCOND*/1)	  \
1849      	goto error_exit;	  \
1850    }				  \
1851 while (/*CONSTCOND*/0)
1852
1853
1854int
1855tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
1856{
1857  tre_stack_t *stack;
1858  tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
1859  tre_pos_and_tags_t *p;
1860  int *counts = NULL, *offs = NULL;
1861  int i, add = 0;
1862  tre_tnfa_transition_t *transitions, *initial;
1863  tre_tnfa_t *tnfa = NULL;
1864  tre_submatch_data_t *submatch_data;
1865  tre_tag_direction_t *tag_directions = NULL;
1866  reg_errcode_t errcode;
1867  tre_mem_t mem;
1868
1869  /* Parse context. */
1870  tre_parse_ctx_t parse_ctx;
1871
1872  /* Allocate a stack used throughout the compilation process for various
1873     purposes. */
1874  stack = tre_stack_new(512, 10240, 128);
1875  if (!stack)
1876    return REG_ESPACE;
1877  /* Allocate a fast memory allocator. */
1878  mem = tre_mem_new();
1879  if (!mem)
1880    {
1881      tre_stack_destroy(stack);
1882      return REG_ESPACE;
1883    }
1884
1885  /* Parse the regexp. */
1886  memset(&parse_ctx, 0, sizeof(parse_ctx));
1887  parse_ctx.mem = mem;
1888  parse_ctx.stack = stack;
1889  parse_ctx.re = regex;
1890  parse_ctx.len = n;
1891  parse_ctx.cflags = cflags;
1892  parse_ctx.max_backref = -1;
1893  DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
1894  errcode = tre_parse(&parse_ctx);
1895  if (errcode != REG_OK)
1896    ERROR_EXIT(errcode);
1897  preg->re_nsub = parse_ctx.submatch_id - 1;
1898  tree = parse_ctx.result;
1899
1900  /* Back references and approximate matching cannot currently be used
1901     in the same regexp. */
1902  if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
1903    ERROR_EXIT(REG_BADPAT);
1904
1905#ifdef TRE_DEBUG
1906  tre_ast_print(tree);
1907#endif /* TRE_DEBUG */
1908
1909  /* Referring to nonexistent subexpressions is illegal. */
1910  if (parse_ctx.max_backref > (int)preg->re_nsub)
1911    ERROR_EXIT(REG_ESUBREG);
1912
1913  /* Allocate the TNFA struct. */
1914  tnfa = xcalloc(1, sizeof(tre_tnfa_t));
1915  if (tnfa == NULL)
1916    ERROR_EXIT(REG_ESPACE);
1917  tnfa->have_backrefs = parse_ctx.max_backref >= 0;
1918  tnfa->have_approx = parse_ctx.have_approx;
1919  tnfa->num_submatches = parse_ctx.submatch_id;
1920
1921  /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
1922     regexp does not have back references, this can be skipped. */
1923  if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
1924    {
1925      DPRINT(("tre_compile: setting up tags\n"));
1926
1927      /* Figure out how many tags we will need. */
1928      errcode = tre_add_tags(NULL, stack, tree, tnfa);
1929      if (errcode != REG_OK)
1930	ERROR_EXIT(errcode);
1931#ifdef TRE_DEBUG
1932      tre_ast_print(tree);
1933#endif /* TRE_DEBUG */
1934
1935      if (tnfa->num_tags > 0)
1936	{
1937	  tag_directions = xmalloc(sizeof(*tag_directions)
1938				   * (tnfa->num_tags + 1));
1939	  if (tag_directions == NULL)
1940	    ERROR_EXIT(REG_ESPACE);
1941	  tnfa->tag_directions = tag_directions;
1942	  memset(tag_directions, -1,
1943		 sizeof(*tag_directions) * (tnfa->num_tags + 1));
1944	}
1945      tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
1946				   sizeof(tnfa->minimal_tags));
1947      if (tnfa->minimal_tags == NULL)
1948	ERROR_EXIT(REG_ESPACE);
1949
1950      submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
1951			      sizeof(*submatch_data));
1952      if (submatch_data == NULL)
1953	ERROR_EXIT(REG_ESPACE);
1954      tnfa->submatch_data = submatch_data;
1955
1956      errcode = tre_add_tags(mem, stack, tree, tnfa);
1957      if (errcode != REG_OK)
1958	ERROR_EXIT(errcode);
1959
1960#ifdef TRE_DEBUG
1961      for (i = 0; i < parse_ctx.submatch_id; i++)
1962	DPRINT(("pmatch[%d] = {t%d, t%d}\n",
1963		i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
1964      for (i = 0; i < tnfa->num_tags; i++)
1965	DPRINT(("t%d is %s\n", i,
1966		tag_directions[i] == TRE_TAG_MINIMIZE ?
1967		"minimized" : "maximized"));
1968#endif /* TRE_DEBUG */
1969    }
1970
1971  /* Expand iteration nodes. */
1972  errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
1973			   tag_directions, &tnfa->params_depth);
1974  if (errcode != REG_OK)
1975    ERROR_EXIT(errcode);
1976
1977  /* Add a dummy node for the final state.
1978     XXX - For certain patterns this dummy node can be optimized away,
1979	   for example "a*" or "ab*".	Figure out a simple way to detect
1980	   this possibility. */
1981  tmp_ast_l = tree;
1982  tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
1983  if (tmp_ast_r == NULL)
1984    ERROR_EXIT(REG_ESPACE);
1985
1986  tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
1987  if (tree == NULL)
1988    ERROR_EXIT(REG_ESPACE);
1989
1990#ifdef TRE_DEBUG
1991  tre_ast_print(tree);
1992  DPRINT(("Number of states: %d\n", parse_ctx.position));
1993#endif /* TRE_DEBUG */
1994
1995  errcode = tre_compute_nfl(mem, stack, tree);
1996  if (errcode != REG_OK)
1997    ERROR_EXIT(errcode);
1998
1999  counts = xmalloc(sizeof(int) * parse_ctx.position);
2000  if (counts == NULL)
2001    ERROR_EXIT(REG_ESPACE);
2002
2003  offs = xmalloc(sizeof(int) * parse_ctx.position);
2004  if (offs == NULL)
2005    ERROR_EXIT(REG_ESPACE);
2006
2007  for (i = 0; i < parse_ctx.position; i++)
2008    counts[i] = 0;
2009  tre_ast_to_tnfa(tree, NULL, counts, NULL);
2010
2011  add = 0;
2012  for (i = 0; i < parse_ctx.position; i++)
2013    {
2014      offs[i] = add;
2015      add += counts[i] + 1;
2016      counts[i] = 0;
2017    }
2018  transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2019  if (transitions == NULL)
2020    ERROR_EXIT(REG_ESPACE);
2021  tnfa->transitions = transitions;
2022  tnfa->num_transitions = add;
2023
2024  DPRINT(("Converting to TNFA:\n"));
2025  errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2026  if (errcode != REG_OK)
2027    ERROR_EXIT(errcode);
2028
2029  /* If in eight bit mode, compute a table of characters that can be the
2030     first character of a match. */
2031  tnfa->first_char = -1;
2032  if (TRE_MB_CUR_MAX == 1 && !tmp_ast_l->nullable)
2033    {
2034      int count = 0;
2035      tre_cint_t k;
2036      DPRINT(("Characters that can start a match:"));
2037      tnfa->firstpos_chars = xcalloc(256, sizeof(char));
2038      if (tnfa->firstpos_chars == NULL)
2039	ERROR_EXIT(REG_ESPACE);
2040      for (p = tree->firstpos; p->position >= 0; p++)
2041	{
2042	  tre_tnfa_transition_t *j = transitions + offs[p->position];
2043	  while (j->state != NULL)
2044	    {
2045	      for (k = j->code_min; k <= j->code_max && k < 256; k++)
2046		{
2047		  DPRINT((" %d", k));
2048		  tnfa->firstpos_chars[k] = 1;
2049		  count++;
2050		}
2051	      j++;
2052	    }
2053	}
2054      DPRINT(("\n"));
2055#define TRE_OPTIMIZE_FIRST_CHAR 1
2056#if TRE_OPTIMIZE_FIRST_CHAR
2057      if (count == 1)
2058	{
2059	  for (k = 0; k < 256; k++)
2060	    if (tnfa->firstpos_chars[k])
2061	      {
2062		DPRINT(("first char must be %d\n", k));
2063		tnfa->first_char = k;
2064		xfree(tnfa->firstpos_chars);
2065		tnfa->firstpos_chars = NULL;
2066		break;
2067	      }
2068	}
2069#endif
2070
2071    }
2072  else
2073    tnfa->firstpos_chars = NULL;
2074
2075
2076  p = tree->firstpos;
2077  i = 0;
2078  while (p->position >= 0)
2079    {
2080      i++;
2081
2082#ifdef TRE_DEBUG
2083      {
2084	int *tags;
2085	DPRINT(("initial: %d", p->position));
2086	tags = p->tags;
2087	if (tags != NULL)
2088	  {
2089	    if (*tags >= 0)
2090	      DPRINT(("/"));
2091	    while (*tags >= 0)
2092	      {
2093		DPRINT(("%d", *tags));
2094		tags++;
2095		if (*tags >= 0)
2096		  DPRINT((","));
2097	      }
2098	  }
2099	DPRINT((", assert %d", p->assertions));
2100	if (p->params)
2101	  {
2102	    DPRINT((", "));
2103	    tre_print_params(p->params);
2104	  }
2105	DPRINT(("\n"));
2106      }
2107#endif /* TRE_DEBUG */
2108
2109      p++;
2110    }
2111
2112  initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2113  if (initial == NULL)
2114    ERROR_EXIT(REG_ESPACE);
2115  tnfa->initial = initial;
2116
2117  i = 0;
2118  for (p = tree->firstpos; p->position >= 0; p++)
2119    {
2120      initial[i].state = transitions + offs[p->position];
2121      initial[i].state_id = p->position;
2122      initial[i].tags = NULL;
2123      /* Copy the arrays p->tags, and p->params, they are allocated
2124	 from a tre_mem object. */
2125      if (p->tags)
2126	{
2127	  int j;
2128	  for (j = 0; p->tags[j] >= 0; j++);
2129	  initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2130	  if (!initial[i].tags)
2131	    ERROR_EXIT(REG_ESPACE);
2132	  memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2133	}
2134      initial[i].params = NULL;
2135      if (p->params)
2136	{
2137	  initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
2138	  if (!initial[i].params)
2139	    ERROR_EXIT(REG_ESPACE);
2140	  memcpy(initial[i].params, p->params,
2141		 sizeof(*p->params) * TRE_PARAM_LAST);
2142	}
2143      initial[i].assertions = p->assertions;
2144      i++;
2145    }
2146  initial[i].state = NULL;
2147
2148  tnfa->num_transitions = add;
2149  tnfa->final = transitions + offs[tree->lastpos[0].position];
2150  tnfa->num_states = parse_ctx.position;
2151  tnfa->cflags = cflags;
2152
2153  DPRINT(("final state %p\n", (void *)tnfa->final));
2154
2155  tre_mem_destroy(mem);
2156  tre_stack_destroy(stack);
2157  xfree(counts);
2158  xfree(offs);
2159
2160  preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2161  return REG_OK;
2162
2163 error_exit:
2164  /* Free everything that was allocated and return the error code. */
2165  tre_mem_destroy(mem);
2166  if (stack != NULL)
2167    tre_stack_destroy(stack);
2168  if (counts != NULL)
2169    xfree(counts);
2170  if (offs != NULL)
2171    xfree(offs);
2172  preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2173  tre_free(preg);
2174  return errcode;
2175}
2176
2177
2178
2179
2180void
2181tre_free(regex_t *preg)
2182{
2183  tre_tnfa_t *tnfa;
2184  unsigned int i;
2185  tre_tnfa_transition_t *trans;
2186
2187  tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2188  if (!tnfa)
2189    return;
2190
2191  for (i = 0; i < tnfa->num_transitions; i++)
2192    if (tnfa->transitions[i].state)
2193      {
2194	if (tnfa->transitions[i].tags)
2195	  xfree(tnfa->transitions[i].tags);
2196	if (tnfa->transitions[i].neg_classes)
2197	  xfree(tnfa->transitions[i].neg_classes);
2198	if (tnfa->transitions[i].params)
2199	  xfree(tnfa->transitions[i].params);
2200      }
2201  if (tnfa->transitions)
2202    xfree(tnfa->transitions);
2203
2204  if (tnfa->initial)
2205    {
2206      for (trans = tnfa->initial; trans->state; trans++)
2207	{
2208	  if (trans->tags)
2209	    xfree(trans->tags);
2210	  if (trans->params)
2211	    xfree(trans->params);
2212	}
2213      xfree(tnfa->initial);
2214    }
2215
2216  if (tnfa->submatch_data)
2217    {
2218      for (i = 0; i < tnfa->num_submatches; i++)
2219	if (tnfa->submatch_data[i].parents)
2220	  xfree(tnfa->submatch_data[i].parents);
2221      xfree(tnfa->submatch_data);
2222    }
2223
2224  if (tnfa->tag_directions)
2225    xfree(tnfa->tag_directions);
2226  if (tnfa->firstpos_chars)
2227    xfree(tnfa->firstpos_chars);
2228  if (tnfa->minimal_tags)
2229    xfree(tnfa->minimal_tags);
2230  xfree(tnfa);
2231}
2232
2233char *
2234tre_version(void)
2235{
2236  static char str[256];
2237  char *version;
2238
2239  if (str[0] == 0)
2240    {
2241      (void) tre_config(TRE_CONFIG_VERSION, &version);
2242      (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
2243    }
2244  return str;
2245}
2246
2247int
2248tre_config(int query, void *result)
2249{
2250  int *int_result = result;
2251  const char **string_result = result;
2252
2253  switch (query)
2254    {
2255    case TRE_CONFIG_APPROX:
2256#ifdef TRE_APPROX
2257      *int_result = 1;
2258#else /* !TRE_APPROX */
2259      *int_result = 0;
2260#endif /* !TRE_APPROX */
2261      return REG_OK;
2262
2263    case TRE_CONFIG_WCHAR:
2264#ifdef TRE_WCHAR
2265      *int_result = 1;
2266#else /* !TRE_WCHAR */
2267      *int_result = 0;
2268#endif /* !TRE_WCHAR */
2269      return REG_OK;
2270
2271    case TRE_CONFIG_MULTIBYTE:
2272#ifdef TRE_MULTIBYTE
2273      *int_result = 1;
2274#else /* !TRE_MULTIBYTE */
2275      *int_result = 0;
2276#endif /* !TRE_MULTIBYTE */
2277      return REG_OK;
2278
2279    case TRE_CONFIG_SYSTEM_ABI:
2280#ifdef TRE_CONFIG_SYSTEM_ABI
2281      *int_result = 1;
2282#else /* !TRE_CONFIG_SYSTEM_ABI */
2283      *int_result = 0;
2284#endif /* !TRE_CONFIG_SYSTEM_ABI */
2285      return REG_OK;
2286
2287    case TRE_CONFIG_VERSION:
2288      *string_result = TRE_VERSION;
2289      return REG_OK;
2290    }
2291
2292  return REG_NOMATCH;
2293}
2294
2295
2296/* EOF */
2297