1/*
2  tre-match-utils.h - TRE matcher helper definitions
3
4  This software is released under a BSD-style license.
5  See the file LICENSE for details and copyright.
6
7*/
8
9#define str_source ((const tre_str_source*)string)
10
11#ifdef TRE_WCHAR
12
13#ifdef TRE_MULTIBYTE
14
15/* Wide character and multibyte support. */
16
17#define GET_NEXT_WCHAR()						      \
18  do {									      \
19    prev_c = next_c;							      \
20    if (type == STR_BYTE)						      \
21      {									      \
22	pos++;								      \
23	if (len >= 0 && pos >= len)					      \
24	  next_c = '\0';						      \
25	else								      \
26	  next_c = (unsigned char)(*str_byte++);			      \
27      }									      \
28    else if (type == STR_WIDE)						      \
29      {									      \
30	pos++;								      \
31	if (len >= 0 && pos >= len)					      \
32	  next_c = L'\0';						      \
33	else								      \
34	  next_c = *str_wide++;						      \
35      }									      \
36    else if (type == STR_MBS)						      \
37      {									      \
38        pos += pos_add_next;					      	      \
39	if (str_byte == NULL)						      \
40	  next_c = L'\0';						      \
41	else								      \
42	  {								      \
43	    size_t w;							      \
44	    int max;							      \
45	    if (len >= 0)						      \
46	      max = len - pos;						      \
47	    else							      \
48	      max = 32;							      \
49	    if (max <= 0)						      \
50	      {								      \
51		next_c = L'\0';						      \
52		pos_add_next = 1;					      \
53	      }								      \
54	    else							      \
55	      {								      \
56		w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate);    \
57		if (w == (size_t)-1 || w == (size_t)-2)			      \
58		  return REG_NOMATCH;					      \
59		if (w == 0 && len >= 0)					      \
60		  {							      \
61		    pos_add_next = 1;					      \
62		    next_c = 0;						      \
63		    str_byte++;						      \
64		  }							      \
65		else							      \
66		  {							      \
67		    pos_add_next = w;					      \
68		    str_byte += w;					      \
69		  }							      \
70	      }								      \
71	  }								      \
72      }									      \
73    else if (type == STR_USER)						      \
74      {									      \
75        pos += pos_add_next;					      	      \
76	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
77                                                 str_source->context);	      \
78      }									      \
79  } while(/*CONSTCOND*/0)
80
81#else /* !TRE_MULTIBYTE */
82
83/* Wide character support, no multibyte support. */
84
85#define GET_NEXT_WCHAR()						      \
86  do {									      \
87    prev_c = next_c;							      \
88    if (type == STR_BYTE)						      \
89      {									      \
90	pos++;								      \
91	if (len >= 0 && pos >= len)					      \
92	  next_c = '\0';						      \
93	else								      \
94	  next_c = (unsigned char)(*str_byte++);			      \
95      }									      \
96    else if (type == STR_WIDE)						      \
97      {									      \
98	pos++;								      \
99	if (len >= 0 && pos >= len)					      \
100	  next_c = L'\0';						      \
101	else								      \
102	  next_c = *str_wide++;						      \
103      }									      \
104    else if (type == STR_USER)						      \
105      {									      \
106        pos += pos_add_next;					      	      \
107	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
108                                                 str_source->context);	      \
109      }									      \
110  } while(/*CONSTCOND*/0)
111
112#endif /* !TRE_MULTIBYTE */
113
114#else /* !TRE_WCHAR */
115
116/* No wide character or multibyte support. */
117
118#define GET_NEXT_WCHAR()						      \
119  do {									      \
120    prev_c = next_c;							      \
121    if (type == STR_BYTE)						      \
122      {									      \
123	pos++;								      \
124	if (len >= 0 && pos >= len)					      \
125	  next_c = '\0';						      \
126	else								      \
127	  next_c = (unsigned char)(*str_byte++);			      \
128      }									      \
129    else if (type == STR_USER)						      \
130      {									      \
131	pos += pos_add_next;						      \
132	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
133						 str_source->context);	      \
134      }									      \
135  } while(/*CONSTCOND*/0)
136
137#endif /* !TRE_WCHAR */
138
139
140
141#define IS_WORD_CHAR(c)	 ((c) == L'_' || tre_isalnum(c))
142
143#define CHECK_ASSERTIONS(assertions)					      \
144  (((assertions & ASSERT_AT_BOL)					      \
145    && (pos > 0 || reg_notbol)						      \
146    && (prev_c != L'\n' || !reg_newline))				      \
147   || ((assertions & ASSERT_AT_EOL)					      \
148       && (next_c != L'\0' || reg_noteol)				      \
149       && (next_c != L'\n' || !reg_newline))				      \
150   || ((assertions & ASSERT_AT_BOW)					      \
151       && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))	              \
152   || ((assertions & ASSERT_AT_EOW)					      \
153       && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))		      \
154   || ((assertions & ASSERT_AT_WB)					      \
155       && (pos != 0 && next_c != L'\0'					      \
156	   && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))		      \
157   || ((assertions & ASSERT_AT_WB_NEG)					      \
158       && (pos == 0 || next_c == L'\0'					      \
159	   || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
160
161#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
162  (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
163       && !(tnfa->cflags & REG_ICASE)                                         \
164       && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
165    || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
166        && (tnfa->cflags & REG_ICASE)                                         \
167        && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
168	&& !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
169    || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
170        && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
171                                      tnfa->cflags & REG_ICASE)))
172
173
174
175
176/* Returns 1 if `t1' wins `t2', 0 otherwise. */
177inline static int
178tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
179	      int *t1, int *t2)
180{
181  int i;
182  for (i = 0; i < num_tags; i++)
183    {
184      if (tag_directions[i] == TRE_TAG_MINIMIZE)
185	{
186	  if (t1[i] < t2[i])
187	    return 1;
188	  if (t1[i] > t2[i])
189	    return 0;
190	}
191      else
192	{
193	  if (t1[i] > t2[i])
194	    return 1;
195	  if (t1[i] < t2[i])
196	    return 0;
197	}
198    }
199  /*  assert(0);*/
200  return 0;
201}
202
203inline static int
204tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
205{
206  DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
207  while (*classes != (tre_ctype_t)0)
208    if ((!icase && tre_isctype(wc, *classes))
209	|| (icase && (tre_isctype(tre_toupper(wc), *classes)
210		      || tre_isctype(tre_tolower(wc), *classes))))
211      return 1; /* Match. */
212    else
213      classes++;
214  return 0; /* No match. */
215}
216