1/* 2 tre-match-utils.h - TRE matcher helper definitions 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7*/ 8 9#define str_source ((const tre_str_source*)string) 10 11#ifdef TRE_WCHAR 12 13#ifdef TRE_MULTIBYTE 14 15/* Wide character and multibyte support. */ 16 17#define GET_NEXT_WCHAR() \ 18 do { \ 19 prev_c = next_c; \ 20 switch (type) { \ 21 case STR_BYTE: \ 22 pos++; \ 23 if (len >= 0 && pos >= len) \ 24 next_c = '\0'; \ 25 else \ 26 next_c = (unsigned char)(*str_byte++); \ 27 break; \ 28 case STR_WIDE: \ 29 pos++; \ 30 if (len >= 0 && pos >= len) \ 31 next_c = L'\0'; \ 32 else \ 33 next_c = *str_wide++; \ 34 break; \ 35 case STR_MBS: \ 36 pos += pos_add_next; \ 37 if (str_byte == NULL) \ 38 next_c = L'\0'; \ 39 else \ 40 { \ 41 size_t w; \ 42 long max; \ 43 if (len >= 0) \ 44 max = len - pos; \ 45 else \ 46 max = 32; \ 47 if (max <= 0) \ 48 { \ 49 next_c = L'\0'; \ 50 pos_add_next = 1; \ 51 } \ 52 else \ 53 { \ 54 w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate); \ 55 if (w == (size_t)-1 || w == (size_t)-2) { \ 56 ret = REG_NOMATCH; \ 57 goto error_exit; \ 58 } \ 59 if (w == 0 && len >= 0) \ 60 { \ 61 pos_add_next = 1; \ 62 next_c = 0; \ 63 str_byte++; \ 64 } \ 65 else \ 66 { \ 67 pos_add_next = (unsigned int)w; \ 68 str_byte += w; \ 69 } \ 70 } \ 71 } \ 72 break; \ 73 case STR_USER: \ 74 pos += pos_add_next; \ 75 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ 76 str_source->context); \ 77 } \ 78 } while(/*CONSTCOND*/(void)0,0) 79 80#else /* !TRE_MULTIBYTE */ 81 82/* Wide character support, no multibyte support. */ 83 84#define GET_NEXT_WCHAR() \ 85 do { \ 86 prev_c = next_c; \ 87 switch (type) { \ 88 case STR_BYTE: \ 89 pos++; \ 90 if (len >= 0 && pos >= len) \ 91 next_c = '\0'; \ 92 else \ 93 next_c = (unsigned char)(*str_byte++); \ 94 break; \ 95 case STR_WIDE: \ 96 pos++; \ 97 if (len >= 0 && pos >= len) \ 98 next_c = L'\0'; \ 99 else \ 100 next_c = *str_wide++; \ 101 break; \ 102 case STR_USER: \ 103 pos += pos_add_next; \ 104 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ 105 str_source->context); \ 106 } \ 107 } while(/*CONSTCOND*/(void)0,0) 108 109#endif /* !TRE_MULTIBYTE */ 110 111#else /* !TRE_WCHAR */ 112 113/* No wide character or multibyte support. */ 114 115#define GET_NEXT_WCHAR() \ 116 do { \ 117 prev_c = next_c; \ 118 switch (type) { \ 119 case STR_BYTE: \ 120 pos++; \ 121 if (len >= 0 && pos >= len) \ 122 next_c = '\0'; \ 123 else \ 124 next_c = (unsigned char)(*str_byte++); \ 125 break; \ 126 case STR_USER: \ 127 pos += pos_add_next; \ 128 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ 129 str_source->context); \ 130 } \ 131 } while(/*CONSTCOND*/(void)0,0) 132 133#endif /* !TRE_WCHAR */ 134 135 136 137#define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c)) 138 139#define CHECK_ASSERTIONS(assertions) \ 140 (((assertions & ASSERT_AT_BOL) \ 141 && (pos > 0 || reg_notbol) \ 142 && (prev_c != L'\n' || !reg_newline)) \ 143 || ((assertions & ASSERT_AT_EOL) \ 144 && (next_c != L'\0' || reg_noteol) \ 145 && (next_c != L'\n' || !reg_newline)) \ 146 || ((assertions & ASSERT_AT_BOW) \ 147 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \ 148 || ((assertions & ASSERT_AT_EOW) \ 149 && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \ 150 || ((assertions & ASSERT_AT_WB) \ 151 && (pos != 0 && next_c != L'\0' \ 152 && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \ 153 || ((assertions & ASSERT_AT_WB_NEG) \ 154 && (pos == 0 || next_c == L'\0' \ 155 || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c)))) 156 157#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \ 158 (((trans_i->assertions & ASSERT_CHAR_CLASS) \ 159 && !(tnfa->cflags & REG_ICASE) \ 160 && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \ 161 || ((trans_i->assertions & ASSERT_CHAR_CLASS) \ 162 && (tnfa->cflags & REG_ICASE) \ 163 && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \ 164 && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \ 165 || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \ 166 && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\ 167 tnfa->cflags & REG_ICASE))) 168 169 170 171 172/* Returns 1 if `t1' wins `t2', 0 otherwise. */ 173inline static int 174tre_tag_order(size_t num_tags, tre_tag_direction_t *tag_directions, 175 int *t1, int *t2) 176{ 177 size_t i; 178 for (i = 0; i < num_tags; i++) 179 { 180 if (tag_directions[i] == TRE_TAG_MINIMIZE) 181 { 182 if (t1[i] < t2[i]) 183 return 1; 184 if (t1[i] > t2[i]) 185 return 0; 186 } 187 else 188 { 189 if (t1[i] > t2[i]) 190 return 1; 191 if (t1[i] < t2[i]) 192 return 0; 193 } 194 } 195 /* assert(0);*/ 196 return 0; 197} 198 199inline static int 200tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase) 201{ 202 DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase)); 203 while (*classes != (tre_ctype_t)0) 204 if ((!icase && tre_isctype(wc, *classes)) 205 || (icase && (tre_isctype(tre_toupper(wc), *classes) 206 || tre_isctype(tre_tolower(wc), *classes)))) 207 return 1; /* Match. */ 208 else 209 classes++; 210 return 0; /* No match. */ 211} 212