1#include <nmmintrin.h> 2#include <string.h> 3 4#define CFLAG 0x00000001 5#define ZFLAG 0x00000002 6#define SFLAG 0x00000004 7#define OFLAG 0x00000008 8#define AFLAG 0x00000010 9#define PFLAG 0x00000020 10 11#define PCMPSTR_EQ(X, Y, RES) \ 12 { \ 13 int __size = (sizeof (*X) ^ 3) * 8; \ 14 int __i, __j; \ 15 for (__i = 0; __i < __size; __i++) \ 16 for (__j = 0; __j < __size; __j++) \ 17 RES[__j][__i] = (X[__i] == Y[__j]); \ 18 } 19 20#define PCMPSTR_RNG(X, Y, RES) \ 21 { \ 22 int __size = (sizeof (*X) ^ 3) * 8; \ 23 int __i, __j; \ 24 for (__j = 0; __j < __size; __j++) \ 25 for (__i = 0; __i < __size - 1; __i += 2) \ 26 { \ 27 RES[__j][__i] = (Y[__j] >= X[__i]); \ 28 RES[__j][__i+1] = (Y[__j] <= X[__i + 1]); \ 29 } \ 30 } 31 32static void 33override_invalid (unsigned char res[16][16], int la, int lb, 34 const int mode, int dim) 35{ 36 int i, j; 37 38 for (j = 0; j < dim; j++) 39 for (i = 0; i < dim; i++) 40 if (i < la && j >= lb) 41 res[j][i] = 0; 42 else if (i >= la) 43 switch ((mode & 0x0C)) 44 { 45 case _SIDD_CMP_EQUAL_ANY: 46 case _SIDD_CMP_RANGES: 47 res[j][i] = 0; 48 break; 49 case _SIDD_CMP_EQUAL_EACH: 50 res[j][i] = (j >= lb) ? 1: 0; 51 break; 52 case _SIDD_CMP_EQUAL_ORDERED: 53 res[j][i] = 1; 54 break; 55 } 56} 57 58static void 59calc_matrix (__m128i a, int la, __m128i b, int lb, const int mode, 60 unsigned char res[16][16]) 61{ 62 union 63 { 64 __m128i x; 65 signed char sc[16]; 66 unsigned char uc[16]; 67 signed short ss[8]; 68 unsigned short us[8]; 69 } d, s; 70 71 d.x = a; 72 s.x = b; 73 74 switch ((mode & 3)) 75 { 76 case _SIDD_UBYTE_OPS: 77 if ((mode & 0x0C) == _SIDD_CMP_RANGES) 78 { 79 PCMPSTR_RNG (d.uc, s.uc, res); 80 } 81 else 82 { 83 PCMPSTR_EQ (d.uc, s.uc, res); 84 } 85 break; 86 case _SIDD_UWORD_OPS: 87 if ((mode & 0x0C) == _SIDD_CMP_RANGES) 88 { 89 PCMPSTR_RNG (d.us, s.us, res); 90 } 91 else 92 { 93 PCMPSTR_EQ (d.us, s.us, res); 94 } 95 break; 96 case _SIDD_SBYTE_OPS: 97 if ((mode & 0x0C) == _SIDD_CMP_RANGES) 98 { 99 PCMPSTR_RNG (d.sc, s.sc, res); 100 } 101 else 102 { 103 PCMPSTR_EQ (d.sc, s.sc, res); 104 } 105 break; 106 case _SIDD_SWORD_OPS: 107 if ((mode & 0x0C) == _SIDD_CMP_RANGES) 108 { 109 PCMPSTR_RNG (d.ss, s.ss, res); 110 } 111 else 112 { 113 PCMPSTR_EQ (d.ss, s.ss, res); 114 } 115 break; 116 } 117 118 override_invalid (res, la, lb, mode, (mode & 1) == 0 ? 16 : 8); 119} 120 121static int 122calc_res (__m128i a, int la, __m128i b, int lb, const int mode) 123{ 124 unsigned char mtx[16][16]; 125 int i, j, k, dim, res = 0; 126 127 memset (mtx, 0, sizeof (mtx)); 128 129 dim = (mode & 1) == 0 ? 16 : 8; 130 131 if (la < 0) 132 la = -la; 133 134 if (lb < 0) 135 lb = -lb; 136 137 if (la > dim) 138 la = dim; 139 140 if (lb > dim) 141 lb = dim; 142 143 calc_matrix (a, la, b, lb, mode, mtx); 144 145 switch ((mode & 0x0C)) 146 { 147 case _SIDD_CMP_EQUAL_ANY: 148 for (i = 0; i < dim; i++) 149 for (j = 0; j < dim; j++) 150 if (mtx[i][j]) 151 res |= (1 << i); 152 break; 153 154 case _SIDD_CMP_RANGES: 155 for (i = 0; i < dim; i += 2) 156 for(j = 0; j < dim; j++) 157 if (mtx[j][i] && mtx[j][i+1]) 158 res |= (1 << j); 159 break; 160 161 case _SIDD_CMP_EQUAL_EACH: 162 for(i = 0; i < dim; i++) 163 if (mtx[i][i]) 164 res |= (1 << i); 165 break; 166 167 case _SIDD_CMP_EQUAL_ORDERED: 168 for(i = 0; i < dim; i++) 169 { 170 unsigned char val = 1; 171 172 for (j = 0, k = i; j < dim - i && k < dim; j++, k++) 173 val &= mtx[k][j]; 174 175 if (val) 176 res |= (1 << i); 177 else 178 res &= ~(1 << i); 179 } 180 break; 181 } 182 183 switch ((mode & 0x30)) 184 { 185 case _SIDD_POSITIVE_POLARITY: 186 case _SIDD_MASKED_POSITIVE_POLARITY: 187 break; 188 189 case _SIDD_NEGATIVE_POLARITY: 190 res ^= -1; 191 break; 192 193 case _SIDD_MASKED_NEGATIVE_POLARITY: 194 for (i = 0; i < lb; i++) 195 if (res & (1 << i)) 196 res &= ~(1 << i); 197 else 198 res |= (1 << i); 199 break; 200 } 201 202 return res & ((dim == 8) ? 0xFF : 0xFFFF); 203} 204 205static int 206cmp_flags (__m128i a, int la, __m128i b, int lb, 207 int mode, int res2, int is_implicit) 208{ 209 int i; 210 int flags = 0; 211 int is_bytes_mode = (mode & 1) == 0; 212 union 213 { 214 __m128i x; 215 unsigned char uc[16]; 216 unsigned short us[8]; 217 } d, s; 218 219 d.x = a; 220 s.x = b; 221 222 /* CF: reset if (RES2 == 0), set otherwise. */ 223 if (res2 != 0) 224 flags |= CFLAG; 225 226 if (is_implicit) 227 { 228 /* ZF: set if any byte/word of src xmm operand is null, reset 229 otherwise. 230 SF: set if any byte/word of dst xmm operand is null, reset 231 otherwise. */ 232 233 if (is_bytes_mode) 234 { 235 for (i = 0; i < 16; i++) 236 { 237 if (s.uc[i] == 0) 238 flags |= ZFLAG; 239 if (d.uc[i] == 0) 240 flags |= SFLAG; 241 } 242 } 243 else 244 { 245 for (i = 0; i < 8; i++) 246 { 247 if (s.us[i] == 0) 248 flags |= ZFLAG; 249 if (d.us[i] == 0) 250 flags |= SFLAG; 251 } 252 } 253 } 254 else 255 { 256 /* ZF: set if abs value of EDX/RDX < 16 (8), reset otherwise. 257 SF: set if abs value of EAX/RAX < 16 (8), reset otherwise. */ 258 int max_ind = is_bytes_mode ? 16 : 8; 259 260 if (la < 0) 261 la = -la; 262 if (lb < 0) 263 lb = -lb; 264 265 if (lb < max_ind) 266 flags |= ZFLAG; 267 if (la < max_ind) 268 flags |= SFLAG; 269 } 270 271 /* OF: equal to RES2[0]. */ 272 if ((res2 & 0x1)) 273 flags |= OFLAG; 274 275 /* AF: Reset. 276 PF: Reset. */ 277 return flags; 278} 279 280static int 281cmp_indexed (__m128i a, int la, __m128i b, int lb, 282 const int mode, int *res2) 283{ 284 int i, ndx; 285 int dim = (mode & 1) == 0 ? 16 : 8; 286 int r2; 287 288 r2 = calc_res (a, la, b, lb, mode); 289 290 ndx = dim; 291 if ((mode & 0x40)) 292 { 293 for (i = dim - 1; i >= 0; i--) 294 if (r2 & (1 << i)) 295 { 296 ndx = i; 297 break; 298 } 299 } 300 else 301 { 302 for (i = 0; i < dim; i++) 303 if ((r2 & (1 << i))) 304 { 305 ndx = i; 306 break; 307 } 308 } 309 310 *res2 = r2; 311 return ndx; 312} 313 314static __m128i 315cmp_masked (__m128i a, int la, __m128i b, int lb, 316 const int mode, int *res2) 317{ 318 union 319 { 320 __m128i x; 321 char c[16]; 322 short s[8]; 323 } ret; 324 int i; 325 int dim = (mode & 1) == 0 ? 16 : 8; 326 union 327 { 328 int i; 329 char c[4]; 330 short s[2]; 331 } r2; 332 333 r2.i = calc_res (a, la, b, lb, mode); 334 335 memset (&ret, 0, sizeof (ret)); 336 337 if (mode & 0x40) 338 { 339 for (i = 0; i < dim; i++) 340 if (dim == 8) 341 ret.s [i] = (r2.i & (1 << i)) ? -1 : 0; 342 else 343 ret.c [i] = (r2.i & (1 << i)) ? -1 : 0; 344 } 345 else 346 { 347 if (dim == 16) 348 ret.s[0] = r2.s[0]; 349 else 350 ret.c[0] = r2.c[0]; 351 } 352 353 *res2 = r2.i; 354 355 return ret.x; 356} 357 358static int 359calc_str_len (__m128i a, const int mode) 360{ 361 union 362 { 363 __m128i x; 364 char c[16]; 365 short s[8]; 366 } s; 367 int i; 368 int dim = (mode & 1) == 0 ? 16 : 8; 369 370 s.x = a; 371 372 if ((mode & 1)) 373 { 374 for (i = 0; i < dim; i++) 375 if (s.s[i] == 0) 376 break; 377 } 378 else 379 { 380 for (i = 0; i < dim; i++) 381 if (s.c[i] == 0) 382 break; 383 } 384 385 return i; 386} 387 388static inline int 389cmp_ei (__m128i *a, int la, __m128i *b, int lb, 390 const int mode, int *flags) 391{ 392 int res2; 393 int index = cmp_indexed (*a, la, *b, lb, mode, &res2); 394 395 if (flags != NULL) 396 *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0); 397 398 return index; 399} 400 401static inline int 402cmp_ii (__m128i *a, __m128i *b, const int mode, int *flags) 403{ 404 int la, lb; 405 int res2; 406 int index; 407 408 la = calc_str_len (*a, mode); 409 lb = calc_str_len (*b, mode); 410 411 index = cmp_indexed (*a, la, *b, lb, mode, &res2); 412 413 if (flags != NULL) 414 *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1); 415 416 return index; 417} 418 419static inline __m128i 420cmp_em (__m128i *a, int la, __m128i *b, int lb, 421 const int mode, int *flags ) 422{ 423 int res2; 424 __m128i mask = cmp_masked (*a, la, *b, lb, mode, &res2); 425 426 if (flags != NULL) 427 *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0); 428 429 return mask; 430} 431 432static inline __m128i 433cmp_im (__m128i *a, __m128i *b, const int mode, int *flags) 434{ 435 int la, lb; 436 int res2; 437 __m128i mask; 438 439 la = calc_str_len (*a, mode); 440 lb = calc_str_len (*b, mode); 441 442 mask = cmp_masked (*a, la, *b, lb, mode, &res2); 443 if (flags != NULL) 444 *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1); 445 446 return mask; 447} 448