1251881Speter/*
2251881Speter  tre-match-approx.c - TRE approximate regex matching engine
3251881Speter
4251881Speter  This software is released under a BSD-style license.
5251881Speter  See the file LICENSE for details and copyright.
6251881Speter
7251881Speter*/
8251881Speter#ifdef HAVE_CONFIG_H
9251881Speter#include <config.h>
10251881Speter#endif /* HAVE_CONFIG_H */
11251881Speter
12251881Speter#include "tre-alloca.h"
13251881Speter
14251881Speter#define __USE_STRING_INLINES
15251881Speter#undef __NO_INLINE__
16251881Speter
17251881Speter#include <assert.h>
18251881Speter#include <stdlib.h>
19251881Speter#include <string.h>
20251881Speter#include <limits.h>
21251881Speter#ifdef HAVE_WCHAR_H
22251881Speter#include <wchar.h>
23251881Speter#endif /* HAVE_WCHAR_H */
24251881Speter#ifdef HAVE_WCTYPE_H
25251881Speter#include <wctype.h>
26251881Speter#endif /* HAVE_WCTYPE_H */
27251881Speter#ifndef TRE_WCHAR
28251881Speter#include <ctype.h>
29251881Speter#endif /* !TRE_WCHAR */
30251881Speter#ifdef HAVE_MALLOC_H
31251881Speter#include <malloc.h>
32251881Speter#endif /* HAVE_MALLOC_H */
33251881Speter
34251881Speter#include "tre-internal.h"
35251881Speter#include "tre-match-utils.h"
36251881Speter#include "tre.h"
37251881Speter#include "xmalloc.h"
38251881Speter
39251881Speter#define TRE_M_COST	0
40251881Speter#define TRE_M_NUM_INS	1
41251881Speter#define TRE_M_NUM_DEL	2
42251881Speter#define TRE_M_NUM_SUBST 3
43251881Speter#define TRE_M_NUM_ERR	4
44251881Speter#define TRE_M_LAST	5
45251881Speter
46251881Speter#define TRE_M_MAX_DEPTH 3
47251881Speter
48251881Spetertypedef struct {
49251881Speter  /* State in the TNFA transition table. */
50251881Speter  tre_tnfa_transition_t *state;
51251881Speter  /* Position in input string. */
52251881Speter  int pos;
53251881Speter  /* Tag values. */
54251881Speter  int *tags;
55251881Speter  /* Matching parameters. */
56251881Speter  regaparams_t params;
57251881Speter  /* Nesting depth of parameters.  This is used as an index in
58251881Speter     the `costs' array. */
59251881Speter  int depth;
60251881Speter  /* Costs and counter values for different parameter nesting depths. */
61251881Speter  int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
62251881Speter} tre_tnfa_approx_reach_t;
63251881Speter
64251881Speter
65251881Speter#ifdef TRE_DEBUG
66251881Speter/* Prints the `reach' array in a readable fashion with DPRINT. */
67251881Speterstatic void
68251881Spetertre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
69251881Speter		int pos, size_t num_tags)
70251881Speter{
71251881Speter  int id;
72251881Speter
73251881Speter  /* Print each state on one line. */
74251881Speter  DPRINT(("  reach:\n"));
75251881Speter  for (id = 0; id < tnfa->num_states; id++)
76251881Speter    {
77251881Speter      int i, j;
78251881Speter      if (reach[id].pos < pos)
79251881Speter	continue;  /* Not reached. */
80251881Speter      DPRINT(("	 %03d, costs ", id));
81251881Speter      for (i = 0; i <= reach[id].depth; i++)
82251881Speter	{
83251881Speter	  DPRINT(("["));
84251881Speter	  for (j = 0; j < TRE_M_LAST; j++)
85251881Speter	    {
86251881Speter	      DPRINT(("%2d", reach[id].costs[i][j]));
87251881Speter	      if (j + 1 < TRE_M_LAST)
88251881Speter		DPRINT((","));
89251881Speter	    }
90251881Speter	  DPRINT(("]"));
91251881Speter	  if (i + 1 <= reach[id].depth)
92251881Speter	    DPRINT((", "));
93251881Speter	}
94251881Speter      DPRINT(("\n	tags "));
95251881Speter      for (i = 0; i < num_tags; i++)
96251881Speter	{
97251881Speter	  DPRINT(("%02d", reach[id].tags[i]));
98251881Speter	  if (i + 1 < num_tags)
99251881Speter	    DPRINT((","));
100251881Speter	}
101251881Speter      DPRINT(("\n"));
102251881Speter    }
103251881Speter  DPRINT(("\n"));
104251881Speter}
105251881Speter#endif /* TRE_DEBUG */
106251881Speter
107251881Speter
108251881Speter/* Sets the matching parameters in `reach' to the ones defined in the `pa'
109251881Speter   array.  If `pa' specifies default values, they are taken from
110251881Speter   `default_params'. */
111251881Speterinline static void
112251881Spetertre_set_params(tre_tnfa_approx_reach_t *reach,
113251881Speter	       int *pa, regaparams_t default_params)
114251881Speter{
115251881Speter  int value;
116251881Speter
117251881Speter  /* If depth is increased reset costs and counters to zero for the
118251881Speter     new levels. */
119251881Speter  value = pa[TRE_PARAM_DEPTH];
120251881Speter  assert(value <= TRE_M_MAX_DEPTH);
121251881Speter  if (value > reach->depth)
122251881Speter    {
123251881Speter      int i, j;
124251881Speter      for (i = reach->depth + 1; i <= value; i++)
125251881Speter	for (j = 0; j < TRE_M_LAST; j++)
126251881Speter	  reach->costs[i][j] = 0;
127251881Speter    }
128251881Speter  reach->depth = value;
129251881Speter
130251881Speter  /* Set insert cost. */
131251881Speter  value = pa[TRE_PARAM_COST_INS];
132251881Speter  if (value == TRE_PARAM_DEFAULT)
133251881Speter    reach->params.cost_ins = default_params.cost_ins;
134251881Speter  else if (value != TRE_PARAM_UNSET)
135251881Speter    reach->params.cost_ins = value;
136251881Speter
137251881Speter  /* Set delete cost. */
138251881Speter  value = pa[TRE_PARAM_COST_DEL];
139251881Speter  if (value == TRE_PARAM_DEFAULT)
140251881Speter    reach->params.cost_del = default_params.cost_del;
141251881Speter  else if (value != TRE_PARAM_UNSET)
142251881Speter    reach->params.cost_del = value;
143251881Speter
144251881Speter  /* Set substitute cost. */
145251881Speter  value = pa[TRE_PARAM_COST_SUBST];
146  if (value == TRE_PARAM_DEFAULT)
147    reach->params.cost_subst = default_params.cost_subst;
148  else
149    reach->params.cost_subst = value;
150
151  /* Set maximum cost. */
152  value = pa[TRE_PARAM_COST_MAX];
153  if (value == TRE_PARAM_DEFAULT)
154    reach->params.max_cost = default_params.max_cost;
155  else if (value != TRE_PARAM_UNSET)
156    reach->params.max_cost = value;
157
158  /* Set maximum inserts. */
159  value = pa[TRE_PARAM_MAX_INS];
160  if (value == TRE_PARAM_DEFAULT)
161    reach->params.max_ins = default_params.max_ins;
162  else if (value != TRE_PARAM_UNSET)
163    reach->params.max_ins = value;
164
165  /* Set maximum deletes. */
166  value = pa[TRE_PARAM_MAX_DEL];
167  if (value == TRE_PARAM_DEFAULT)
168    reach->params.max_del = default_params.max_del;
169  else if (value != TRE_PARAM_UNSET)
170    reach->params.max_del = value;
171
172  /* Set maximum substitutes. */
173  value = pa[TRE_PARAM_MAX_SUBST];
174  if (value == TRE_PARAM_DEFAULT)
175    reach->params.max_subst = default_params.max_subst;
176  else if (value != TRE_PARAM_UNSET)
177    reach->params.max_subst = value;
178
179  /* Set maximum number of errors. */
180  value = pa[TRE_PARAM_MAX_ERR];
181  if (value == TRE_PARAM_DEFAULT)
182    reach->params.max_err = default_params.max_err;
183  else if (value != TRE_PARAM_UNSET)
184    reach->params.max_err = value;
185}
186
187reg_errcode_t
188tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
189		    tre_str_type_t type, int *match_tags,
190		    regamatch_t *match, regaparams_t default_params,
191		    int eflags, int *match_end_ofs)
192{
193  /* State variables required by GET_NEXT_WCHAR. */
194  tre_char_t prev_c = 0, next_c = 0;
195  const char *str_byte = string;
196  int pos = -1;
197  unsigned int pos_add_next = 1;
198#ifdef TRE_WCHAR
199  const wchar_t *str_wide = string;
200#ifdef TRE_MBSTATE
201  mbstate_t mbstate;
202#endif /* !TRE_WCHAR */
203#endif /* TRE_WCHAR */
204  int reg_notbol = eflags & REG_NOTBOL;
205  int reg_noteol = eflags & REG_NOTEOL;
206  int reg_newline = tnfa->cflags & REG_NEWLINE;
207  size_t str_user_end = 0;
208
209  int prev_pos;
210
211  /* Number of tags. */
212  size_t num_tags;
213  /* The reach tables. */
214  tre_tnfa_approx_reach_t *reach, *reach_next;
215  /* Tag array for temporary use. */
216  int *tmp_tags;
217
218  /* End offset of best match so far, or -1 if no match found yet. */
219  int match_eo = -1;
220  /* Costs of the match. */
221  int match_costs[TRE_M_LAST];
222
223  /* Space for temporary data required for matching. */
224  unsigned char *buf;
225
226  size_t i, id;
227
228  if (!match_tags)
229    num_tags = 0;
230  else
231    num_tags = tnfa->num_tags;
232
233#ifdef TRE_MBSTATE
234  memset(&mbstate, '\0', sizeof(mbstate));
235#endif /* TRE_MBSTATE */
236
237  DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
238	  "match_tags %p\n",
239	  type, len, eflags,
240	  match_tags));
241  DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
242	  default_params.max_cost,
243	  default_params.cost_ins,
244	  default_params.cost_del,
245	  default_params.cost_subst));
246
247  /* Allocate memory for temporary data required for matching.	This needs to
248     be done for every matching operation to be thread safe.  This allocates
249     everything in a single large block from the stack frame using alloca()
250     or with malloc() if alloca is unavailable. */
251  {
252    unsigned char *buf_cursor;
253    /* Space needed for one array of tags. */
254    size_t tag_bytes = sizeof(*tmp_tags) * num_tags;
255    /* Space needed for one reach table. */
256    size_t reach_bytes = sizeof(*reach_next) * tnfa->num_states;
257    /* Total space needed. */
258    size_t total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
259    /* Add some extra to make sure we can align the pointers.  The multiplier
260       used here must be equal to the number of ALIGN calls below. */
261    total_bytes += (sizeof(long) - 1) * 3;
262
263    /* Allocate the memory. */
264#ifdef TRE_USE_ALLOCA
265    buf = alloca(total_bytes);
266#else /* !TRE_USE_ALLOCA */
267    buf = xmalloc((unsigned)total_bytes);
268#endif /* !TRE_USE_ALLOCA */
269    if (!buf)
270      return REG_ESPACE;
271    memset(buf, 0, (size_t)total_bytes);
272
273    /* Allocate `tmp_tags' from `buf'. */
274    tmp_tags = (void *)buf;
275    buf_cursor = buf + tag_bytes;
276    buf_cursor += ALIGN(buf_cursor, long);
277
278    /* Allocate `reach' from `buf'. */
279    reach = (void *)buf_cursor;
280    buf_cursor += reach_bytes;
281    buf_cursor += ALIGN(buf_cursor, long);
282
283    /* Allocate `reach_next' from `buf'. */
284    reach_next = (void *)buf_cursor;
285    buf_cursor += reach_bytes;
286    buf_cursor += ALIGN(buf_cursor, long);
287
288    /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
289    for (i = 0; i < tnfa->num_states; i++)
290      {
291	reach[i].tags = (void *)buf_cursor;
292	buf_cursor += tag_bytes;
293	reach_next[i].tags = (void *)buf_cursor;
294	buf_cursor += tag_bytes;
295      }
296    assert(buf_cursor <= buf + total_bytes);
297  }
298
299  for (i = 0; i < TRE_M_LAST; i++)
300    match_costs[i] = INT_MAX;
301
302  /* Mark the reach arrays empty. */
303  for (i = 0; i < tnfa->num_states; i++)
304    reach[i].pos = reach_next[i].pos = -2;
305
306  prev_pos = pos;
307  GET_NEXT_WCHAR();
308  pos = 0;
309
310  while (/*CONSTCOND*/1)
311    {
312      DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
313
314      /* Add initial states to `reach_next' if an exact match has not yet
315	 been found. */
316      if (match_costs[TRE_M_COST] > 0)
317	{
318	  tre_tnfa_transition_t *trans;
319	  DPRINT(("  init"));
320	  for (trans = tnfa->initial; trans->state; trans++)
321	    {
322	      int stateid = trans->state_id;
323
324	      /* If this state is not currently in `reach_next', add it
325		 there. */
326	      if (reach_next[stateid].pos < pos)
327		{
328		  if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
329		    {
330		      /* Assertions failed, don't add this state. */
331		      DPRINT((" !%d (assert)", stateid));
332		      continue;
333		    }
334		  DPRINT((" %d", stateid));
335		  reach_next[stateid].state = trans->state;
336		  reach_next[stateid].pos = pos;
337
338		  /* Compute tag values after this transition. */
339		  for (i = 0; i < num_tags; i++)
340		    reach_next[stateid].tags[i] = -1;
341
342		  if (trans->tags)
343		    for (i = 0; trans->tags[i] >= 0; i++)
344		      if ((size_t)trans->tags[i] < num_tags)
345			reach_next[stateid].tags[trans->tags[i]] = pos;
346
347		  /* Set the parameters, depth, and costs. */
348		  reach_next[stateid].params = default_params;
349		  reach_next[stateid].depth = 0;
350		  for (i = 0; i < TRE_M_LAST; i++)
351		    reach_next[stateid].costs[0][i] = 0;
352		  if (trans->params)
353		    tre_set_params(&reach_next[stateid], trans->params,
354				   default_params);
355
356		  /* If this is the final state, mark the exact match. */
357		  if (trans->state == tnfa->final)
358		    {
359		      match_eo = pos;
360		      for (i = 0; i < num_tags; i++)
361			match_tags[i] = reach_next[stateid].tags[i];
362		      for (i = 0; i < TRE_M_LAST; i++)
363			match_costs[i] = 0;
364		    }
365		}
366	    }
367	    DPRINT(("\n"));
368	}
369
370
371      /* Handle inserts.  This is done by pretending there's an epsilon
372	 transition from each state in `reach' back to the same state.
373	 We don't need to worry about the final state here; this will never
374	 give a better match than what we already have. */
375      for (id = 0; id < tnfa->num_states; id++)
376	{
377	  int depth;
378	  int cost, cost0;
379
380	  if (reach[id].pos != prev_pos)
381	    {
382	      DPRINT(("	 insert: %d not reached\n", id));
383	      continue;	 /* Not reached. */
384	    }
385
386	  depth = reach[id].depth;
387
388	  /* Compute and check cost at current depth. */
389	  cost = reach[id].costs[depth][TRE_M_COST];
390	  if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
391	    cost += reach[id].params.cost_ins;
392	  if (cost > reach[id].params.max_cost)
393	    continue;  /* Cost too large. */
394
395	  /* Check number of inserts at current depth. */
396	  if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
397	      > reach[id].params.max_ins)
398	    continue;  /* Too many inserts. */
399
400	  /* Check total number of errors at current depth. */
401	  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
402	      > reach[id].params.max_err)
403	    continue;  /* Too many errors. */
404
405	  /* Compute overall cost. */
406	  cost0 = cost;
407	  if (depth > 0)
408	    {
409	      cost0 = reach[id].costs[0][TRE_M_COST];
410	      if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
411		cost0 += reach[id].params.cost_ins;
412	      else
413		cost0 += default_params.cost_ins;
414	    }
415
416	  DPRINT(("  insert: from %d to %d, cost %d: ", id, id,
417		  reach[id].costs[depth][TRE_M_COST]));
418	  if (reach_next[id].pos == pos
419	      && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
420	    {
421	      DPRINT(("lose\n"));
422	      continue;
423	    }
424	  DPRINT(("win\n"));
425
426	  /* Copy state, position, tags, parameters, and depth. */
427	  reach_next[id].state = reach[id].state;
428	  reach_next[id].pos = pos;
429	  for (i = 0; i < num_tags; i++)
430	    reach_next[id].tags[i] = reach[id].tags[i];
431	  reach_next[id].params = reach[id].params;
432	  reach_next[id].depth = reach[id].depth;
433
434	  /* Set the costs after this transition. */
435	  memcpy(reach_next[id].costs, reach[id].costs,
436		 sizeof(reach_next[id].costs[0][0])
437		 * TRE_M_LAST * (depth + 1));
438	  reach_next[id].costs[depth][TRE_M_COST] = cost;
439	  reach_next[id].costs[depth][TRE_M_NUM_INS]++;
440	  reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
441	  if (depth > 0)
442	    {
443	      reach_next[id].costs[0][TRE_M_COST] = cost0;
444	      reach_next[id].costs[0][TRE_M_NUM_INS]++;
445	      reach_next[id].costs[0][TRE_M_NUM_ERR]++;
446	    }
447
448	}
449
450
451      /* Handle deletes.  This is done by traversing through the whole TNFA
452	 pretending that all transitions are epsilon transitions, until
453	 no more states can be reached with better costs. */
454      {
455	/* XXX - dynamic ringbuffer size */
456	tre_tnfa_approx_reach_t *ringbuffer[512];
457	tre_tnfa_approx_reach_t **deque_start, **deque_end;
458
459	deque_start = deque_end = ringbuffer;
460
461	/* Add all states in `reach_next' to the deque. */
462	for (id = 0; id < tnfa->num_states; id++)
463	  {
464	    if (reach_next[id].pos != pos)
465	      continue;
466	    *deque_end = &reach_next[id];
467	    deque_end++;
468	    assert(deque_end != deque_start);
469	  }
470
471	/* Repeat until the deque is empty. */
472	while (deque_end != deque_start)
473	  {
474	    tre_tnfa_approx_reach_t *reach_p;
475	    int depth;
476	    int cost, cost0;
477	    tre_tnfa_transition_t *trans;
478
479	    /* Pop the first item off the deque. */
480	    reach_p = *deque_start;
481	    id = reach_p - reach_next;
482	    depth = reach_p->depth;
483
484	    /* Compute cost at current depth. */
485	    cost = reach_p->costs[depth][TRE_M_COST];
486	    if (reach_p->params.cost_del != TRE_PARAM_UNSET)
487	      cost += reach_p->params.cost_del;
488
489	    /* Check cost, number of deletes, and total number of errors
490	       at current depth. */
491	    if (cost > reach_p->params.max_cost
492		|| (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
493		    > reach_p->params.max_del)
494		|| (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
495		    > reach_p->params.max_err))
496	      {
497		/* Too many errors or cost too large. */
498		DPRINT(("  delete: from %03d: cost too large\n", id));
499		deque_start++;
500		if (deque_start >= (ringbuffer + 512))
501		  deque_start = ringbuffer;
502		continue;
503	      }
504
505	    /* Compute overall cost. */
506	    cost0 = cost;
507	    if (depth > 0)
508	      {
509		cost0 = reach_p->costs[0][TRE_M_COST];
510		if (reach_p->params.cost_del != TRE_PARAM_UNSET)
511		  cost0 += reach_p->params.cost_del;
512		else
513		  cost0 += default_params.cost_del;
514	      }
515
516	    for (trans = reach_p->state; trans->state; trans++)
517	      {
518		int dest_id = trans->state_id;
519		DPRINT(("  delete: from %03d to %03d, cost %d (%d): ",
520			id, dest_id, cost0, reach_p->params.max_cost));
521
522		if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
523		  {
524		    DPRINT(("assertion failed\n"));
525		    continue;
526		  }
527
528		/* Compute tag values after this transition. */
529		for (i = 0; i < num_tags; i++)
530		  tmp_tags[i] = reach_p->tags[i];
531		if (trans->tags)
532		  for (i = 0; trans->tags[i] >= 0; i++)
533		    if ((size_t)trans->tags[i] < num_tags)
534		      tmp_tags[trans->tags[i]] = pos;
535
536		/* If another path has also reached this state, choose the one
537		   with the smallest cost or best tags if costs are equal. */
538		if (reach_next[dest_id].pos == pos
539		    && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
540			|| (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
541			    && (!match_tags
542				|| !tre_tag_order(num_tags,
543						  tnfa->tag_directions,
544						  tmp_tags,
545						  reach_next[dest_id].tags)))))
546		  {
547		    DPRINT(("lose, cost0 %d, have %d\n",
548			    cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
549		    continue;
550		  }
551		DPRINT(("win\n"));
552
553		/* Set state, position, tags, parameters, depth, and costs. */
554		reach_next[dest_id].state = trans->state;
555		reach_next[dest_id].pos = pos;
556		for (i = 0; i < num_tags; i++)
557		  reach_next[dest_id].tags[i] = tmp_tags[i];
558
559		reach_next[dest_id].params = reach_p->params;
560		if (trans->params)
561		  tre_set_params(&reach_next[dest_id], trans->params,
562				 default_params);
563
564		reach_next[dest_id].depth = reach_p->depth;
565		memcpy(&reach_next[dest_id].costs,
566		       reach_p->costs,
567		       sizeof(reach_p->costs[0][0])
568		       * TRE_M_LAST * (depth + 1));
569		reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
570		reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
571		reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
572		if (depth > 0)
573		  {
574		    reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
575		    reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
576		    reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
577		  }
578
579		if (trans->state == tnfa->final
580		    && (match_eo < 0
581			|| match_costs[TRE_M_COST] > cost0
582			|| (match_costs[TRE_M_COST] == cost0
583			    && (num_tags > 0
584				&& tmp_tags[0] <= match_tags[0]))))
585		  {
586		    DPRINT(("	 setting new match at %d, cost %d\n",
587			    pos, cost0));
588		    match_eo = pos;
589		    memcpy(match_costs, reach_next[dest_id].costs[0],
590			   sizeof(match_costs[0]) * TRE_M_LAST);
591		    for (i = 0; i < num_tags; i++)
592		      match_tags[i] = tmp_tags[i];
593		  }
594
595		/* Add to the end of the deque. */
596		*deque_end = &reach_next[dest_id];
597		deque_end++;
598		if (deque_end >= (ringbuffer + 512))
599		  deque_end = ringbuffer;
600		assert(deque_end != deque_start);
601	      }
602	    deque_start++;
603	    if (deque_start >= (ringbuffer + 512))
604	      deque_start = ringbuffer;
605	  }
606
607      }
608
609#ifdef TRE_DEBUG
610      tre_print_reach(tnfa, reach_next, pos, num_tags);
611#endif /* TRE_DEBUG */
612
613      /* Check for end of string. */
614      if (len < 0)
615	{
616	  if (type == STR_USER)
617	    {
618	      if (str_user_end)
619		break;
620	    }
621	  else if (next_c == L'\0')
622	    break;
623	}
624      else
625	{
626	  if (pos >= len)
627	    break;
628	}
629
630      prev_pos = pos;
631      GET_NEXT_WCHAR();
632
633      /* Swap `reach' and `reach_next'. */
634      {
635	tre_tnfa_approx_reach_t *tmp;
636	tmp = reach;
637	reach = reach_next;
638	reach_next = tmp;
639      }
640
641      /* Handle exact matches and substitutions. */
642      for (id = 0; id < tnfa->num_states; id++)
643	{
644	  tre_tnfa_transition_t *trans;
645
646	  if (reach[id].pos < prev_pos)
647	    continue;  /* Not reached. */
648	  for (trans = reach[id].state; trans->state; trans++)
649	    {
650	      int dest_id;
651	      int depth;
652	      int cost, cost0, err;
653
654	      if (trans->assertions
655		  && (CHECK_ASSERTIONS(trans->assertions)
656		      || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
657		{
658		  DPRINT(("  exact,  from %d: assert failed\n", id));
659		  continue;
660		}
661
662	      depth = reach[id].depth;
663	      dest_id = trans->state_id;
664
665	      cost = reach[id].costs[depth][TRE_M_COST];
666	      cost0 = reach[id].costs[0][TRE_M_COST];
667	      err = 0;
668
669	      if (trans->code_min > (tre_cint_t)prev_c
670		  || trans->code_max < (tre_cint_t)prev_c)
671		{
672		  /* Handle substitutes.  The required character was not in
673		     the string, so match it in place of whatever was supposed
674		     to be there and increase costs accordingly. */
675		  err = 1;
676
677		  /* Compute and check cost at current depth. */
678		  cost = reach[id].costs[depth][TRE_M_COST];
679		  if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
680		    cost += reach[id].params.cost_subst;
681		  if (cost > reach[id].params.max_cost)
682		    continue; /* Cost too large. */
683
684		  /* Check number of substitutes at current depth. */
685		  if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
686		      > reach[id].params.max_subst)
687		    continue; /* Too many substitutes. */
688
689		  /* Check total number of errors at current depth. */
690		  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
691		      > reach[id].params.max_err)
692		    continue; /* Too many errors. */
693
694		  /* Compute overall cost. */
695		  cost0 = cost;
696		  if (depth > 0)
697		    {
698		      cost0 = reach[id].costs[0][TRE_M_COST];
699		      if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
700			cost0 += reach[id].params.cost_subst;
701		      else
702			cost0 += default_params.cost_subst;
703		    }
704		  DPRINT(("  subst,  from %03d to %03d, cost %d: ",
705			  id, dest_id, cost0));
706		}
707	      else
708		DPRINT(("  exact,  from %03d to %03d, cost %d: ",
709			id, dest_id, cost0));
710
711	      /* Compute tag values after this transition. */
712	      for (i = 0; i < num_tags; i++)
713		tmp_tags[i] = reach[id].tags[i];
714	      if (trans->tags)
715		for (i = 0; trans->tags[i] >= 0; i++)
716		  if ((size_t)trans->tags[i] < num_tags)
717		    tmp_tags[trans->tags[i]] = pos;
718
719	      /* If another path has also reached this state, choose the
720		 one with the smallest cost or best tags if costs are equal. */
721	      if (reach_next[dest_id].pos == pos
722		  && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
723		      || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
724			  && !tre_tag_order(num_tags, tnfa->tag_directions,
725					    tmp_tags,
726					    reach_next[dest_id].tags))))
727		{
728		  DPRINT(("lose\n"));
729		  continue;
730		}
731	      DPRINT(("win %d %d\n",
732		      reach_next[dest_id].pos,
733		      reach_next[dest_id].costs[0][TRE_M_COST]));
734
735	      /* Set state, position, tags, and depth. */
736	      reach_next[dest_id].state = trans->state;
737	      reach_next[dest_id].pos = pos;
738	      for (i = 0; i < num_tags; i++)
739		reach_next[dest_id].tags[i] = tmp_tags[i];
740	      reach_next[dest_id].depth = reach[id].depth;
741
742	      /* Set parameters. */
743	      reach_next[dest_id].params = reach[id].params;
744	      if (trans->params)
745		tre_set_params(&reach_next[dest_id], trans->params,
746			       default_params);
747
748	      /* Set the costs after this transition. */
749		memcpy(&reach_next[dest_id].costs,
750		       reach[id].costs,
751		       sizeof(reach[id].costs[0][0])
752		       * TRE_M_LAST * (depth + 1));
753	      reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
754	      reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
755	      reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
756	      if (depth > 0)
757		{
758		  reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
759		  reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
760		  reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
761		}
762
763	      if (trans->state == tnfa->final
764		  && (match_eo < 0
765		      || cost0 < match_costs[TRE_M_COST]
766		      || (cost0 == match_costs[TRE_M_COST]
767			  && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
768		{
769		  DPRINT(("    setting new match at %d, cost %d\n",
770			  pos, cost0));
771		  match_eo = pos;
772		  for (i = 0; i < TRE_M_LAST; i++)
773		    match_costs[i] = reach_next[dest_id].costs[0][i];
774		  for (i = 0; i < num_tags; i++)
775		    match_tags[i] = tmp_tags[i];
776		}
777	    }
778	}
779    }
780
781  DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
782	  match_costs[TRE_M_COST]));
783
784#ifndef TRE_USE_ALLOCA
785  if (buf)
786    xfree(buf);
787#endif /* !TRE_USE_ALLOCA */
788
789  match->cost = match_costs[TRE_M_COST];
790  match->num_ins = match_costs[TRE_M_NUM_INS];
791  match->num_del = match_costs[TRE_M_NUM_DEL];
792  match->num_subst = match_costs[TRE_M_NUM_SUBST];
793  *match_end_ofs = match_eo;
794
795  return match_eo >= 0 ? REG_OK : REG_NOMATCH;
796}
797