1/* 2 tre-match-utils.h - TRE matcher helper definitions 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7*/ 8 9#define str_source ((const tre_str_source*)string) 10 11#ifdef TRE_WCHAR 12 13#ifdef TRE_MULTIBYTE 14 15/* Wide character and multibyte support. */ 16 17#define GET_NEXT_WCHAR() \ 18 do { \ 19 prev_c = next_c; \ 20 if (type == STR_BYTE) \ 21 { \ 22 pos++; \ 23 if (len >= 0 && pos >= len) \ 24 next_c = '\0'; \ 25 else \ 26 next_c = (unsigned char)(*str_byte++); \ 27 } \ 28 else if (type == STR_WIDE) \ 29 { \ 30 pos++; \ 31 if (len >= 0 && pos >= len) \ 32 next_c = L'\0'; \ 33 else \ 34 next_c = *str_wide++; \ 35 } \ 36 else if (type == STR_MBS) \ 37 { \ 38 pos += pos_add_next; \ 39 if (str_byte == NULL) \ 40 next_c = L'\0'; \ 41 else \ 42 { \ 43 size_t w; \ 44 int max; \ 45 if (len >= 0) \ 46 max = len - pos; \ 47 else \ 48 max = 32; \ 49 if (max <= 0) \ 50 { \ 51 next_c = L'\0'; \ 52 pos_add_next = 1; \ 53 } \ 54 else \ 55 { \ 56 w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate); \ 57 if (w == (size_t)-1 || w == (size_t)-2) \ 58 return REG_NOMATCH; \ 59 if (w == 0 && len >= 0) \ 60 { \ 61 pos_add_next = 1; \ 62 next_c = 0; \ 63 str_byte++; \ 64 } \ 65 else \ 66 { \ 67 pos_add_next = w; \ 68 str_byte += w; \ 69 } \ 70 } \ 71 } \ 72 } \ 73 else if (type == STR_USER) \ 74 { \ 75 pos += pos_add_next; \ 76 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ 77 str_source->context); \ 78 } \ 79 } while(/*CONSTCOND*/0) 80 81#else /* !TRE_MULTIBYTE */ 82 83/* Wide character support, no multibyte support. */ 84 85#define GET_NEXT_WCHAR() \ 86 do { \ 87 prev_c = next_c; \ 88 if (type == STR_BYTE) \ 89 { \ 90 pos++; \ 91 if (len >= 0 && pos >= len) \ 92 next_c = '\0'; \ 93 else \ 94 next_c = (unsigned char)(*str_byte++); \ 95 } \ 96 else if (type == STR_WIDE) \ 97 { \ 98 pos++; \ 99 if (len >= 0 && pos >= len) \ 100 next_c = L'\0'; \ 101 else \ 102 next_c = *str_wide++; \ 103 } \ 104 else if (type == STR_USER) \ 105 { \ 106 pos += pos_add_next; \ 107 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ 108 str_source->context); \ 109 } \ 110 } while(/*CONSTCOND*/0) 111 112#endif /* !TRE_MULTIBYTE */ 113 114#else /* !TRE_WCHAR */ 115 116/* No wide character or multibyte support. */ 117 118#define GET_NEXT_WCHAR() \ 119 do { \ 120 prev_c = next_c; \ 121 if (type == STR_BYTE) \ 122 { \ 123 pos++; \ 124 if (len >= 0 && pos >= len) \ 125 next_c = '\0'; \ 126 else \ 127 next_c = (unsigned char)(*str_byte++); \ 128 } \ 129 else if (type == STR_USER) \ 130 { \ 131 pos += pos_add_next; \ 132 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ 133 str_source->context); \ 134 } \ 135 } while(/*CONSTCOND*/0) 136 137#endif /* !TRE_WCHAR */ 138 139 140 141#define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c)) 142 143#define CHECK_ASSERTIONS(assertions) \ 144 (((assertions & ASSERT_AT_BOL) \ 145 && (pos > 0 || reg_notbol) \ 146 && (prev_c != L'\n' || !reg_newline)) \ 147 || ((assertions & ASSERT_AT_EOL) \ 148 && (next_c != L'\0' || reg_noteol) \ 149 && (next_c != L'\n' || !reg_newline)) \ 150 || ((assertions & ASSERT_AT_BOW) \ 151 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \ 152 || ((assertions & ASSERT_AT_EOW) \ 153 && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \ 154 || ((assertions & ASSERT_AT_WB) \ 155 && (pos != 0 && next_c != L'\0' \ 156 && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \ 157 || ((assertions & ASSERT_AT_WB_NEG) \ 158 && (pos == 0 || next_c == L'\0' \ 159 || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c)))) 160 161#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \ 162 (((trans_i->assertions & ASSERT_CHAR_CLASS) \ 163 && !(tnfa->cflags & REG_ICASE) \ 164 && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \ 165 || ((trans_i->assertions & ASSERT_CHAR_CLASS) \ 166 && (tnfa->cflags & REG_ICASE) \ 167 && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \ 168 && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \ 169 || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \ 170 && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\ 171 tnfa->cflags & REG_ICASE))) 172 173 174 175 176/* Returns 1 if `t1' wins `t2', 0 otherwise. */ 177inline static int 178tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions, 179 int *t1, int *t2) 180{ 181 int i; 182 for (i = 0; i < num_tags; i++) 183 { 184 if (tag_directions[i] == TRE_TAG_MINIMIZE) 185 { 186 if (t1[i] < t2[i]) 187 return 1; 188 if (t1[i] > t2[i]) 189 return 0; 190 } 191 else 192 { 193 if (t1[i] > t2[i]) 194 return 1; 195 if (t1[i] < t2[i]) 196 return 0; 197 } 198 } 199 /* assert(0);*/ 200 return 0; 201} 202 203inline static int 204tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase) 205{ 206 DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase)); 207 while (*classes != (tre_ctype_t)0) 208 if ((!icase && tre_isctype(wc, *classes)) 209 || (icase && (tre_isctype(tre_toupper(wc), *classes) 210 || tre_isctype(tre_tolower(wc), *classes)))) 211 return 1; /* Match. */ 212 else 213 classes++; 214 return 0; /* No match. */ 215} 216