1/*
2  tre-match-utils.h - TRE matcher helper definitions
3
4  This software is released under a BSD-style license.
5  See the file LICENSE for details and copyright.
6
7*/
8
9#define str_source ((const tre_str_source*)string)
10
11#ifdef TRE_WCHAR
12
13#ifdef TRE_MULTIBYTE
14
15/* Wide character and multibyte support. */
16
17#define GET_NEXT_WCHAR()						      \
18  do {									      \
19    prev_c = next_c;							      \
20    switch (type) {							      \
21      case STR_BYTE:						      	      \
22	pos++;								      \
23	if (len >= 0 && pos >= len)					      \
24	  next_c = '\0';						      \
25	else								      \
26	  next_c = (unsigned char)(*str_byte++);			      \
27	break;							              \
28      case STR_WIDE:						      	      \
29	pos++;								      \
30	if (len >= 0 && pos >= len)					      \
31	  next_c = L'\0';						      \
32	else								      \
33	  next_c = *str_wide++;						      \
34        break;								      \
35      case STR_MBS:						      	      \
36        pos += pos_add_next;					      	      \
37	if (str_byte == NULL)						      \
38	  next_c = L'\0';						      \
39	else								      \
40	  {								      \
41	    size_t w;							      \
42	    long max;							      \
43	    if (len >= 0)						      \
44	      max = len - pos;						      \
45	    else							      \
46	      max = 32;							      \
47	    if (max <= 0)						      \
48	      {								      \
49		next_c = L'\0';						      \
50		pos_add_next = 1;					      \
51	      }								      \
52	    else							      \
53	      {								      \
54		w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate);    \
55		if (w == (size_t)-1 || w == (size_t)-2) {		      \
56		  ret = REG_NOMATCH;					      \
57		  goto error_exit;					      \
58		}							      \
59		if (w == 0 && len >= 0)					      \
60		  {							      \
61		    pos_add_next = 1;					      \
62		    next_c = 0;						      \
63		    str_byte++;						      \
64		  }							      \
65		else							      \
66		  {							      \
67		    pos_add_next = (unsigned int)w;			      \
68		    str_byte += w;					      \
69		  }							      \
70	      }								      \
71	  } 								      \
72        break;								      \
73      case STR_USER:						      	      \
74        pos += pos_add_next;					      	      \
75	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
76                                                 str_source->context);	      \
77      }									      \
78  } while(/*CONSTCOND*/(void)0,0)
79
80#else /* !TRE_MULTIBYTE */
81
82/* Wide character support, no multibyte support. */
83
84#define GET_NEXT_WCHAR()						      \
85  do {									      \
86    prev_c = next_c;							      \
87    switch (type) {							      \
88      case STR_BYTE:							      \
89	pos++;								      \
90	if (len >= 0 && pos >= len)					      \
91	  next_c = '\0';						      \
92	else								      \
93	  next_c = (unsigned char)(*str_byte++);			      \
94        break;								      \
95      case STR_WIDE:							      \
96	pos++;								      \
97	if (len >= 0 && pos >= len)					      \
98	  next_c = L'\0';						      \
99	else								      \
100	  next_c = *str_wide++;						      \
101        break;								      \
102      case STR_USER:							      \
103        pos += pos_add_next;					      	      \
104	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
105                                                 str_source->context);	      \
106      }									      \
107  } while(/*CONSTCOND*/(void)0,0)
108
109#endif /* !TRE_MULTIBYTE */
110
111#else /* !TRE_WCHAR */
112
113/* No wide character or multibyte support. */
114
115#define GET_NEXT_WCHAR()						      \
116  do {									      \
117    prev_c = next_c;							      \
118    switch (type) {							      \
119      case STR_BYTE:							      \
120	pos++;								      \
121	if (len >= 0 && pos >= len)					      \
122	  next_c = '\0';						      \
123	else								      \
124	  next_c = (unsigned char)(*str_byte++);			      \
125        break;								      \
126      case STR_USER:						      	      \
127	pos += pos_add_next;						      \
128	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
129						 str_source->context);	      \
130      }									      \
131  } while(/*CONSTCOND*/(void)0,0)
132
133#endif /* !TRE_WCHAR */
134
135
136
137#define IS_WORD_CHAR(c)	 ((c) == L'_' || tre_isalnum(c))
138
139#define CHECK_ASSERTIONS(assertions)					      \
140  (((assertions & ASSERT_AT_BOL)					      \
141    && (pos > 0 || reg_notbol)						      \
142    && (prev_c != L'\n' || !reg_newline))				      \
143   || ((assertions & ASSERT_AT_EOL)					      \
144       && (next_c != L'\0' || reg_noteol)				      \
145       && (next_c != L'\n' || !reg_newline))				      \
146   || ((assertions & ASSERT_AT_BOW)					      \
147       && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))	              \
148   || ((assertions & ASSERT_AT_EOW)					      \
149       && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))		      \
150   || ((assertions & ASSERT_AT_WB)					      \
151       && (pos != 0 && next_c != L'\0'					      \
152	   && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))		      \
153   || ((assertions & ASSERT_AT_WB_NEG)					      \
154       && (pos == 0 || next_c == L'\0'					      \
155	   || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
156
157#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
158  (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
159       && !(tnfa->cflags & REG_ICASE)                                         \
160       && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
161    || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
162        && (tnfa->cflags & REG_ICASE)                                         \
163        && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
164	&& !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
165    || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
166        && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
167                                      tnfa->cflags & REG_ICASE)))
168
169
170
171
172/* Returns 1 if `t1' wins `t2', 0 otherwise. */
173inline static int
174tre_tag_order(size_t num_tags, tre_tag_direction_t *tag_directions,
175	      int *t1, int *t2)
176{
177  size_t i;
178  for (i = 0; i < num_tags; i++)
179    {
180      if (tag_directions[i] == TRE_TAG_MINIMIZE)
181	{
182	  if (t1[i] < t2[i])
183	    return 1;
184	  if (t1[i] > t2[i])
185	    return 0;
186	}
187      else
188	{
189	  if (t1[i] > t2[i])
190	    return 1;
191	  if (t1[i] < t2[i])
192	    return 0;
193	}
194    }
195  /*  assert(0);*/
196  return 0;
197}
198
199inline static int
200tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
201{
202  DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
203  while (*classes != (tre_ctype_t)0)
204    if ((!icase && tre_isctype(wc, *classes))
205	|| (icase && (tre_isctype(tre_toupper(wc), *classes)
206		      || tre_isctype(tre_tolower(wc), *classes))))
207      return 1; /* Match. */
208    else
209      classes++;
210  return 0; /* No match. */
211}
212