1/* $NetBSD: sljitNativeX86_common.c,v 1.10 2021/11/30 12:32:09 christos Exp $ */ 2 3/* 4 * Stack-less Just-In-Time compiler 5 * 6 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without modification, are 9 * permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright notice, this list of 12 * conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 15 * of conditions and the following disclaimer in the documentation and/or other materials 16 * provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 21 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 23 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) 30{ 31 return "x86" SLJIT_CPUINFO; 32} 33 34/* 35 32b register indexes: 36 0 - EAX 37 1 - ECX 38 2 - EDX 39 3 - EBX 40 4 - none 41 5 - EBP 42 6 - ESI 43 7 - EDI 44*/ 45 46/* 47 64b register indexes: 48 0 - RAX 49 1 - RCX 50 2 - RDX 51 3 - RBX 52 4 - none 53 5 - RBP 54 6 - RSI 55 7 - RDI 56 8 - R8 - From now on REX prefix is required 57 9 - R9 58 10 - R10 59 11 - R11 60 12 - R12 61 13 - R13 62 14 - R14 63 15 - R15 64*/ 65 66#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 67 68/* Last register + 1. */ 69#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 70 71static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { 72 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5 73}; 74 75#define CHECK_EXTRA_REGS(p, w, do) \ 76 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ 77 if (p <= compiler->scratches) \ 78 w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \ 79 else \ 80 w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \ 81 p = SLJIT_MEM1(SLJIT_SP); \ 82 do; \ 83 } 84 85#else /* SLJIT_CONFIG_X86_32 */ 86 87/* Last register + 1. */ 88#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 89#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) 90#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) 91 92/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present 93 Note: avoid to use r12 and r13 for memory addessing 94 therefore r12 is better for SAVED_EREG than SAVED_REG. */ 95#ifndef _WIN64 96/* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ 97static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 98 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9 99}; 100/* low-map. reg_map & 0x7. */ 101static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 102 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1 103}; 104#else 105/* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ 106static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 107 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9 108}; 109/* low-map. reg_map & 0x7. */ 110static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 111 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1 112}; 113#endif 114 115#define REX_W 0x48 116#define REX_R 0x44 117#define REX_X 0x42 118#define REX_B 0x41 119#define REX 0x40 120 121#ifndef _WIN64 122#define HALFWORD_MAX 0x7fffffffl 123#define HALFWORD_MIN -0x80000000l 124#else 125#define HALFWORD_MAX 0x7fffffffll 126#define HALFWORD_MIN -0x80000000ll 127#endif 128 129#define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) 130#define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) 131 132#define CHECK_EXTRA_REGS(p, w, do) 133 134#endif /* SLJIT_CONFIG_X86_32 */ 135 136#define TMP_FREG (0) 137 138/* Size flags for emit_x86_instruction: */ 139#define EX86_BIN_INS 0x0010 140#define EX86_SHIFT_INS 0x0020 141#define EX86_REX 0x0040 142#define EX86_NO_REXW 0x0080 143#define EX86_BYTE_ARG 0x0100 144#define EX86_HALF_ARG 0x0200 145#define EX86_PREF_66 0x0400 146#define EX86_PREF_F2 0x0800 147#define EX86_PREF_F3 0x1000 148#define EX86_SSE2_OP1 0x2000 149#define EX86_SSE2_OP2 0x4000 150#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) 151 152/* --------------------------------------------------------------------- */ 153/* Instrucion forms */ 154/* --------------------------------------------------------------------- */ 155 156#define ADD (/* BINARY */ 0 << 3) 157#define ADD_EAX_i32 0x05 158#define ADD_r_rm 0x03 159#define ADD_rm_r 0x01 160#define ADDSD_x_xm 0x58 161#define ADC (/* BINARY */ 2 << 3) 162#define ADC_EAX_i32 0x15 163#define ADC_r_rm 0x13 164#define ADC_rm_r 0x11 165#define AND (/* BINARY */ 4 << 3) 166#define AND_EAX_i32 0x25 167#define AND_r_rm 0x23 168#define AND_rm_r 0x21 169#define ANDPD_x_xm 0x54 170#define BSR_r_rm (/* GROUP_0F */ 0xbd) 171#define CALL_i32 0xe8 172#define CALL_rm (/* GROUP_FF */ 2 << 3) 173#define CDQ 0x99 174#define CMOVNE_r_rm (/* GROUP_0F */ 0x45) 175#define CMP (/* BINARY */ 7 << 3) 176#define CMP_EAX_i32 0x3d 177#define CMP_r_rm 0x3b 178#define CMP_rm_r 0x39 179#define CVTPD2PS_x_xm 0x5a 180#define CVTSI2SD_x_rm 0x2a 181#define CVTTSD2SI_r_xm 0x2c 182#define DIV (/* GROUP_F7 */ 6 << 3) 183#define DIVSD_x_xm 0x5e 184#define INT3 0xcc 185#define IDIV (/* GROUP_F7 */ 7 << 3) 186#define IMUL (/* GROUP_F7 */ 5 << 3) 187#define IMUL_r_rm (/* GROUP_0F */ 0xaf) 188#define IMUL_r_rm_i8 0x6b 189#define IMUL_r_rm_i32 0x69 190#define JE_i8 0x74 191#define JNE_i8 0x75 192#define JMP_i8 0xeb 193#define JMP_i32 0xe9 194#define JMP_rm (/* GROUP_FF */ 4 << 3) 195#define LEA_r_m 0x8d 196#define MOV_r_rm 0x8b 197#define MOV_r_i32 0xb8 198#define MOV_rm_r 0x89 199#define MOV_rm_i32 0xc7 200#define MOV_rm8_i8 0xc6 201#define MOV_rm8_r8 0x88 202#define MOVSD_x_xm 0x10 203#define MOVSD_xm_x 0x11 204#define MOVSXD_r_rm 0x63 205#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) 206#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) 207#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) 208#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) 209#define MUL (/* GROUP_F7 */ 4 << 3) 210#define MULSD_x_xm 0x59 211#define NEG_rm (/* GROUP_F7 */ 3 << 3) 212#define NOP 0x90 213#define NOT_rm (/* GROUP_F7 */ 2 << 3) 214#define OR (/* BINARY */ 1 << 3) 215#define OR_r_rm 0x0b 216#define OR_EAX_i32 0x0d 217#define OR_rm_r 0x09 218#define OR_rm8_r8 0x08 219#define POP_r 0x58 220#define POP_rm 0x8f 221#define POPF 0x9d 222#define PUSH_i32 0x68 223#define PUSH_r 0x50 224#define PUSH_rm (/* GROUP_FF */ 6 << 3) 225#define PUSHF 0x9c 226#define RET_near 0xc3 227#define RET_i16 0xc2 228#define SBB (/* BINARY */ 3 << 3) 229#define SBB_EAX_i32 0x1d 230#define SBB_r_rm 0x1b 231#define SBB_rm_r 0x19 232#define SAR (/* SHIFT */ 7 << 3) 233#define SHL (/* SHIFT */ 4 << 3) 234#define SHR (/* SHIFT */ 5 << 3) 235#define SUB (/* BINARY */ 5 << 3) 236#define SUB_EAX_i32 0x2d 237#define SUB_r_rm 0x2b 238#define SUB_rm_r 0x29 239#define SUBSD_x_xm 0x5c 240#define TEST_EAX_i32 0xa9 241#define TEST_rm_r 0x85 242#define UCOMISD_x_xm 0x2e 243#define UNPCKLPD_x_xm 0x14 244#define XCHG_EAX_r 0x90 245#define XCHG_r_rm 0x87 246#define XOR (/* BINARY */ 6 << 3) 247#define XOR_EAX_i32 0x35 248#define XOR_r_rm 0x33 249#define XOR_rm_r 0x31 250#define XORPD_x_xm 0x57 251 252#define GROUP_0F 0x0f 253#define GROUP_F7 0xf7 254#define GROUP_FF 0xff 255#define GROUP_BINARY_81 0x81 256#define GROUP_BINARY_83 0x83 257#define GROUP_SHIFT_1 0xd1 258#define GROUP_SHIFT_N 0xc1 259#define GROUP_SHIFT_CL 0xd3 260 261#define MOD_REG 0xc0 262#define MOD_DISP8 0x40 263 264#define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) 265 266#define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) 267#define POP_REG(r) (*inst++ = (POP_r + (r))) 268#define RET() (*inst++ = (RET_near)) 269#define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) 270/* r32, r/m32 */ 271#define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) 272 273/* Multithreading does not affect these static variables, since they store 274 built-in CPU features. Therefore they can be overwritten by different threads 275 if they detect the CPU features in the same time. */ 276#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 277static sljit_s32 cpu_has_sse2 = -1; 278#endif 279static sljit_s32 cpu_has_cmov = -1; 280 281#ifdef _WIN32_WCE 282#include <cmnintrin.h> 283#elif defined(_MSC_VER) && _MSC_VER >= 1400 284#include <intrin.h> 285#endif 286 287/******************************************************/ 288/* Unaligned-store functions */ 289/******************************************************/ 290 291static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value) 292{ 293 SLJIT_MEMCPY(addr, &value, sizeof(value)); 294} 295 296static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value) 297{ 298 SLJIT_MEMCPY(addr, &value, sizeof(value)); 299} 300 301static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value) 302{ 303 SLJIT_MEMCPY(addr, &value, sizeof(value)); 304} 305 306/******************************************************/ 307/* Utility functions */ 308/******************************************************/ 309 310static void get_cpu_features(void) 311{ 312 sljit_u32 features; 313 314#if defined(_MSC_VER) && _MSC_VER >= 1400 315 316 int CPUInfo[4]; 317 __cpuid(CPUInfo, 1); 318 features = (sljit_u32)CPUInfo[3]; 319 320#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__lint__) 321 322 /* AT&T syntax. */ 323 __asm__ ( 324 "movl $0x1, %%eax\n" 325#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 326 /* On x86-32, there is no red zone, so this 327 should work (no need for a local variable). */ 328 "push %%ebx\n" 329#endif 330 "cpuid\n" 331#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 332 "pop %%ebx\n" 333#endif 334 "movl %%edx, %0\n" 335 : "=g" (features) 336 : 337#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 338 : "%eax", "%ecx", "%edx" 339#else 340 : "%rax", "%rbx", "%rcx", "%rdx" 341#endif 342 ); 343 344#else /* _MSC_VER && _MSC_VER >= 1400 */ 345 346 /* Intel syntax. */ 347 __asm { 348 mov eax, 1 349 cpuid 350 mov features, edx 351 } 352 353#endif /* _MSC_VER && _MSC_VER >= 1400 */ 354 355#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 356 cpu_has_sse2 = (features >> 26) & 0x1; 357#endif 358 cpu_has_cmov = (features >> 15) & 0x1; 359} 360 361static sljit_u8 get_jump_code(sljit_s32 type) 362{ 363 switch (type) { 364 case SLJIT_EQUAL: 365 case SLJIT_EQUAL_F64: 366 return 0x84 /* je */; 367 368 case SLJIT_NOT_EQUAL: 369 case SLJIT_NOT_EQUAL_F64: 370 return 0x85 /* jne */; 371 372 case SLJIT_LESS: 373 case SLJIT_LESS_F64: 374 return 0x82 /* jc */; 375 376 case SLJIT_GREATER_EQUAL: 377 case SLJIT_GREATER_EQUAL_F64: 378 return 0x83 /* jae */; 379 380 case SLJIT_GREATER: 381 case SLJIT_GREATER_F64: 382 return 0x87 /* jnbe */; 383 384 case SLJIT_LESS_EQUAL: 385 case SLJIT_LESS_EQUAL_F64: 386 return 0x86 /* jbe */; 387 388 case SLJIT_SIG_LESS: 389 return 0x8c /* jl */; 390 391 case SLJIT_SIG_GREATER_EQUAL: 392 return 0x8d /* jnl */; 393 394 case SLJIT_SIG_GREATER: 395 return 0x8f /* jnle */; 396 397 case SLJIT_SIG_LESS_EQUAL: 398 return 0x8e /* jle */; 399 400 case SLJIT_OVERFLOW: 401 case SLJIT_MUL_OVERFLOW: 402 return 0x80 /* jo */; 403 404 case SLJIT_NOT_OVERFLOW: 405 case SLJIT_MUL_NOT_OVERFLOW: 406 return 0x81 /* jno */; 407 408 case SLJIT_UNORDERED_F64: 409 return 0x8a /* jp */; 410 411 case SLJIT_ORDERED_F64: 412 return 0x8b /* jpo */; 413 } 414 return 0; 415} 416 417#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 418static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset); 419#else 420static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type); 421#endif 422 423static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset) 424{ 425 sljit_s32 short_jump; 426 sljit_uw label_addr; 427 428 if (jump->flags & JUMP_LABEL) 429 label_addr = (sljit_uw)(code + jump->u.label->size); 430 else 431 label_addr = jump->u.target - executable_offset; 432 433 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; 434 435#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 436 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN) 437 return generate_far_jump_code(jump, code_ptr, type); 438#endif 439 440 if (type == SLJIT_JUMP) { 441 if (short_jump) 442 *code_ptr++ = JMP_i8; 443 else 444 *code_ptr++ = JMP_i32; 445 jump->addr++; 446 } 447 else if (type >= SLJIT_FAST_CALL) { 448 short_jump = 0; 449 *code_ptr++ = CALL_i32; 450 jump->addr++; 451 } 452 else if (short_jump) { 453 *code_ptr++ = get_jump_code(type) - 0x10; 454 jump->addr++; 455 } 456 else { 457 *code_ptr++ = GROUP_0F; 458 *code_ptr++ = get_jump_code(type); 459 jump->addr += 2; 460 } 461 462 if (short_jump) { 463 jump->flags |= PATCH_MB; 464 code_ptr += sizeof(sljit_s8); 465 } else { 466 jump->flags |= PATCH_MW; 467#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 468 code_ptr += sizeof(sljit_sw); 469#else 470 code_ptr += sizeof(sljit_s32); 471#endif 472 } 473 474 return code_ptr; 475} 476 477SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) 478{ 479 struct sljit_memory_fragment *buf; 480 sljit_u8 *code; 481 sljit_u8 *code_ptr; 482 sljit_u8 *buf_ptr; 483 sljit_u8 *buf_end; 484 sljit_u8 len; 485 sljit_sw executable_offset; 486 sljit_sw jump_addr; 487 488 struct sljit_label *label; 489 struct sljit_jump *jump; 490 struct sljit_const *const_; 491 492 CHECK_ERROR_PTR(); 493 CHECK_PTR(check_sljit_generate_code(compiler)); 494 reverse_buf(compiler); 495 496 /* Second code generation pass. */ 497 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size); 498 PTR_FAIL_WITH_EXEC_IF(code); 499 buf = compiler->buf; 500 501 code_ptr = code; 502 label = compiler->labels; 503 jump = compiler->jumps; 504 const_ = compiler->consts; 505 executable_offset = SLJIT_EXEC_OFFSET(code); 506 507 do { 508 buf_ptr = buf->memory; 509 buf_end = buf_ptr + buf->used_size; 510 do { 511 len = *buf_ptr++; 512 if (len > 0) { 513 /* The code is already generated. */ 514 SLJIT_MEMCPY(code_ptr, buf_ptr, len); 515 code_ptr += len; 516 buf_ptr += len; 517 } 518 else { 519 if (*buf_ptr >= 2) { 520 jump->addr = (sljit_uw)code_ptr; 521 if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) 522 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset); 523 else { 524#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 525 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset); 526#else 527 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2); 528#endif 529 } 530 jump = jump->next; 531 } 532 else if (*buf_ptr == 0) { 533 label->addr = ((sljit_uw)code_ptr) + executable_offset; 534 label->size = code_ptr - code; 535 label = label->next; 536 } 537 else { /* *buf_ptr is 1 */ 538 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); 539 const_ = const_->next; 540 } 541 buf_ptr++; 542 } 543 } while (buf_ptr < buf_end); 544 SLJIT_ASSERT(buf_ptr == buf_end); 545 buf = buf->next; 546 } while (buf); 547 548 SLJIT_ASSERT(!label); 549 SLJIT_ASSERT(!jump); 550 SLJIT_ASSERT(!const_); 551 552 jump = compiler->jumps; 553 while (jump) { 554 jump_addr = jump->addr + executable_offset; 555 556 if (jump->flags & PATCH_MB) { 557 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127); 558 *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))); 559 } else if (jump->flags & PATCH_MW) { 560 if (jump->flags & JUMP_LABEL) { 561#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 562 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw)))); 563#else 564 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX); 565 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32)))); 566#endif 567 } 568 else { 569#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 570 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw)))); 571#else 572 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX); 573 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32)))); 574#endif 575 } 576 } 577#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 578 else if (jump->flags & PATCH_MD) 579 sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr); 580#endif 581 582 jump = jump->next; 583 } 584 585 /* Some space may be wasted because of short jumps. */ 586 SLJIT_ASSERT(code_ptr <= code + compiler->size); 587 compiler->error = SLJIT_ERR_COMPILED; 588 compiler->executable_offset = executable_offset; 589 compiler->executable_size = code_ptr - code; 590 return (void*)(code + executable_offset); 591} 592 593/* --------------------------------------------------------------------- */ 594/* Operators */ 595/* --------------------------------------------------------------------- */ 596 597static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, 598 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 599 sljit_s32 dst, sljit_sw dstw, 600 sljit_s32 src1, sljit_sw src1w, 601 sljit_s32 src2, sljit_sw src2w); 602 603static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, 604 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 605 sljit_s32 dst, sljit_sw dstw, 606 sljit_s32 src1, sljit_sw src1w, 607 sljit_s32 src2, sljit_sw src2w); 608 609static sljit_s32 emit_mov(struct sljit_compiler *compiler, 610 sljit_s32 dst, sljit_sw dstw, 611 sljit_s32 src, sljit_sw srcw); 612 613#define EMIT_MOV(compiler, dst, dstw, src, srcw) \ 614 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 615 616#ifdef _WIN32 617#include <malloc.h> 618 619static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) 620{ 621 /* Workaround for calling the internal _chkstk() function on Windows. 622 This function touches all 4k pages belongs to the requested stack space, 623 which size is passed in local_size. This is necessary on Windows where 624 the stack can only grow in 4k steps. However, this function just burn 625 CPU cycles if the stack is large enough. However, you don't know it in 626 advance, so it must always be called. I think this is a bad design in 627 general even if it has some reasons. */ 628 *(volatile sljit_s32*)alloca(local_size) = 0; 629} 630 631#endif 632 633#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 634#include "sljitNativeX86_32.c" 635#else 636#include "sljitNativeX86_64.c" 637#endif 638 639static sljit_s32 emit_mov(struct sljit_compiler *compiler, 640 sljit_s32 dst, sljit_sw dstw, 641 sljit_s32 src, sljit_sw srcw) 642{ 643 sljit_u8* inst; 644 645 if (dst == SLJIT_UNUSED) { 646 /* No destination, doesn't need to setup flags. */ 647 if (src & SLJIT_MEM) { 648 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 649 FAIL_IF(!inst); 650 *inst = MOV_r_rm; 651 } 652 return SLJIT_SUCCESS; 653 } 654 if (FAST_IS_REG(src)) { 655 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); 656 FAIL_IF(!inst); 657 *inst = MOV_rm_r; 658 return SLJIT_SUCCESS; 659 } 660 if (src & SLJIT_IMM) { 661 if (FAST_IS_REG(dst)) { 662#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 663 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 664#else 665 if (!compiler->mode32) { 666 if (NOT_HALFWORD(srcw)) 667 return emit_load_imm64(compiler, dst, srcw); 668 } 669 else 670 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); 671#endif 672 } 673#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 674 if (!compiler->mode32 && NOT_HALFWORD(srcw)) { 675 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); 676 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); 677 FAIL_IF(!inst); 678 *inst = MOV_rm_r; 679 return SLJIT_SUCCESS; 680 } 681#endif 682 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); 683 FAIL_IF(!inst); 684 *inst = MOV_rm_i32; 685 return SLJIT_SUCCESS; 686 } 687 if (FAST_IS_REG(dst)) { 688 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); 689 FAIL_IF(!inst); 690 *inst = MOV_r_rm; 691 return SLJIT_SUCCESS; 692 } 693 694 /* Memory to memory move. Requires two instruction. */ 695 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 696 FAIL_IF(!inst); 697 *inst = MOV_r_rm; 698 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 699 FAIL_IF(!inst); 700 *inst = MOV_rm_r; 701 return SLJIT_SUCCESS; 702} 703 704SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) 705{ 706 sljit_u8 *inst; 707#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 708 sljit_s32 size; 709#endif 710 711 CHECK_ERROR(); 712 CHECK(check_sljit_emit_op0(compiler, op)); 713 714 switch (GET_OPCODE(op)) { 715 case SLJIT_BREAKPOINT: 716 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 717 FAIL_IF(!inst); 718 INC_SIZE(1); 719 *inst = INT3; 720 break; 721 case SLJIT_NOP: 722 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 723 FAIL_IF(!inst); 724 INC_SIZE(1); 725 *inst = NOP; 726 break; 727 case SLJIT_LMUL_UW: 728 case SLJIT_LMUL_SW: 729 case SLJIT_DIVMOD_UW: 730 case SLJIT_DIVMOD_SW: 731 case SLJIT_DIV_UW: 732 case SLJIT_DIV_SW: 733#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 734#ifdef _WIN64 735 SLJIT_ASSERT( 736 reg_map[SLJIT_R0] == 0 737 && reg_map[SLJIT_R1] == 2 738 && reg_map[TMP_REG1] > 7); 739#else 740 SLJIT_ASSERT( 741 reg_map[SLJIT_R0] == 0 742 && reg_map[SLJIT_R1] < 7 743 && reg_map[TMP_REG1] == 2); 744#endif 745 compiler->mode32 = op & SLJIT_I32_OP; 746#endif 747 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); 748 749 op = GET_OPCODE(op); 750 if ((op | 0x2) == SLJIT_DIV_UW) { 751#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 752 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); 753 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); 754#else 755 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 756#endif 757 FAIL_IF(!inst); 758 *inst = XOR_r_rm; 759 } 760 761 if ((op | 0x2) == SLJIT_DIV_SW) { 762#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 763 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); 764#endif 765 766#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 767 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 768 FAIL_IF(!inst); 769 INC_SIZE(1); 770 *inst = CDQ; 771#else 772 if (compiler->mode32) { 773 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 774 FAIL_IF(!inst); 775 INC_SIZE(1); 776 *inst = CDQ; 777 } else { 778 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); 779 FAIL_IF(!inst); 780 INC_SIZE(2); 781 *inst++ = REX_W; 782 *inst = CDQ; 783 } 784#endif 785 } 786 787#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 788 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); 789 FAIL_IF(!inst); 790 INC_SIZE(2); 791 *inst++ = GROUP_F7; 792 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); 793#else 794#ifdef _WIN64 795 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2; 796#else 797 size = (!compiler->mode32) ? 3 : 2; 798#endif 799 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 800 FAIL_IF(!inst); 801 INC_SIZE(size); 802#ifdef _WIN64 803 if (!compiler->mode32) 804 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0); 805 else if (op >= SLJIT_DIVMOD_UW) 806 *inst++ = REX_B; 807 *inst++ = GROUP_F7; 808 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); 809#else 810 if (!compiler->mode32) 811 *inst++ = REX_W; 812 *inst++ = GROUP_F7; 813 *inst = MOD_REG | reg_map[SLJIT_R1]; 814#endif 815#endif 816 switch (op) { 817 case SLJIT_LMUL_UW: 818 *inst |= MUL; 819 break; 820 case SLJIT_LMUL_SW: 821 *inst |= IMUL; 822 break; 823 case SLJIT_DIVMOD_UW: 824 case SLJIT_DIV_UW: 825 *inst |= DIV; 826 break; 827 case SLJIT_DIVMOD_SW: 828 case SLJIT_DIV_SW: 829 *inst |= IDIV; 830 break; 831 } 832#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) 833 if (op <= SLJIT_DIVMOD_SW) 834 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); 835#else 836 if (op >= SLJIT_DIV_UW) 837 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); 838#endif 839 break; 840 } 841 842 return SLJIT_SUCCESS; 843} 844 845#define ENCODE_PREFIX(prefix) \ 846 do { \ 847 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \ 848 FAIL_IF(!inst); \ 849 INC_SIZE(1); \ 850 *inst = (prefix); \ 851 } while (0) 852 853static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, 854 sljit_s32 dst, sljit_sw dstw, 855 sljit_s32 src, sljit_sw srcw) 856{ 857 sljit_u8* inst; 858 sljit_s32 dst_r; 859#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 860 sljit_s32 work_r; 861#endif 862 863#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 864 compiler->mode32 = 0; 865#endif 866 867 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 868 return SLJIT_SUCCESS; /* Empty instruction. */ 869 870 if (src & SLJIT_IMM) { 871 if (FAST_IS_REG(dst)) { 872#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 873 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 874#else 875 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 876 FAIL_IF(!inst); 877 *inst = MOV_rm_i32; 878 return SLJIT_SUCCESS; 879#endif 880 } 881 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); 882 FAIL_IF(!inst); 883 *inst = MOV_rm8_i8; 884 return SLJIT_SUCCESS; 885 } 886 887 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 888 889 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { 890#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 891 if (reg_map[src] >= 4) { 892 SLJIT_ASSERT(dst_r == TMP_REG1); 893 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 894 } else 895 dst_r = src; 896#else 897 dst_r = src; 898#endif 899 } 900#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 901 else if (FAST_IS_REG(src) && reg_map[src] >= 4) { 902 /* src, dst are registers. */ 903 SLJIT_ASSERT(SLOW_IS_REG(dst)); 904 if (reg_map[dst] < 4) { 905 if (dst != src) 906 EMIT_MOV(compiler, dst, 0, src, 0); 907 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); 908 FAIL_IF(!inst); 909 *inst++ = GROUP_0F; 910 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 911 } 912 else { 913 if (dst != src) 914 EMIT_MOV(compiler, dst, 0, src, 0); 915 if (sign) { 916 /* shl reg, 24 */ 917 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 918 FAIL_IF(!inst); 919 *inst |= SHL; 920 /* sar reg, 24 */ 921 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 922 FAIL_IF(!inst); 923 *inst |= SAR; 924 } 925 else { 926 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); 927 FAIL_IF(!inst); 928 *(inst + 1) |= AND; 929 } 930 } 931 return SLJIT_SUCCESS; 932 } 933#endif 934 else { 935 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ 936 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 937 FAIL_IF(!inst); 938 *inst++ = GROUP_0F; 939 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 940 } 941 942 if (dst & SLJIT_MEM) { 943#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 944 if (dst_r == TMP_REG1) { 945 /* Find a non-used register, whose reg_map[src] < 4. */ 946 if ((dst & REG_MASK) == SLJIT_R0) { 947 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) 948 work_r = SLJIT_R2; 949 else 950 work_r = SLJIT_R1; 951 } 952 else { 953 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) 954 work_r = SLJIT_R0; 955 else if ((dst & REG_MASK) == SLJIT_R1) 956 work_r = SLJIT_R2; 957 else 958 work_r = SLJIT_R1; 959 } 960 961 if (work_r == SLJIT_R0) { 962 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 963 } 964 else { 965 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 966 FAIL_IF(!inst); 967 *inst = XCHG_r_rm; 968 } 969 970 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); 971 FAIL_IF(!inst); 972 *inst = MOV_rm8_r8; 973 974 if (work_r == SLJIT_R0) { 975 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 976 } 977 else { 978 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 979 FAIL_IF(!inst); 980 *inst = XCHG_r_rm; 981 } 982 } 983 else { 984 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 985 FAIL_IF(!inst); 986 *inst = MOV_rm8_r8; 987 } 988#else 989 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); 990 FAIL_IF(!inst); 991 *inst = MOV_rm8_r8; 992#endif 993 } 994 995 return SLJIT_SUCCESS; 996} 997 998static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, 999 sljit_s32 dst, sljit_sw dstw, 1000 sljit_s32 src, sljit_sw srcw) 1001{ 1002 sljit_u8* inst; 1003 sljit_s32 dst_r; 1004 1005#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1006 compiler->mode32 = 0; 1007#endif 1008 1009 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 1010 return SLJIT_SUCCESS; /* Empty instruction. */ 1011 1012 if (src & SLJIT_IMM) { 1013 if (FAST_IS_REG(dst)) { 1014#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1015 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 1016#else 1017 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 1018 FAIL_IF(!inst); 1019 *inst = MOV_rm_i32; 1020 return SLJIT_SUCCESS; 1021#endif 1022 } 1023 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); 1024 FAIL_IF(!inst); 1025 *inst = MOV_rm_i32; 1026 return SLJIT_SUCCESS; 1027 } 1028 1029 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1030 1031 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) 1032 dst_r = src; 1033 else { 1034 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 1035 FAIL_IF(!inst); 1036 *inst++ = GROUP_0F; 1037 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; 1038 } 1039 1040 if (dst & SLJIT_MEM) { 1041 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); 1042 FAIL_IF(!inst); 1043 *inst = MOV_rm_r; 1044 } 1045 1046 return SLJIT_SUCCESS; 1047} 1048 1049static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, 1050 sljit_s32 dst, sljit_sw dstw, 1051 sljit_s32 src, sljit_sw srcw) 1052{ 1053 sljit_u8* inst; 1054 1055 if (dst == SLJIT_UNUSED) { 1056 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1057 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1058 FAIL_IF(!inst); 1059 *inst++ = GROUP_F7; 1060 *inst |= opcode; 1061 return SLJIT_SUCCESS; 1062 } 1063 if (dst == src && dstw == srcw) { 1064 /* Same input and output */ 1065 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1066 FAIL_IF(!inst); 1067 *inst++ = GROUP_F7; 1068 *inst |= opcode; 1069 return SLJIT_SUCCESS; 1070 } 1071 if (FAST_IS_REG(dst)) { 1072 EMIT_MOV(compiler, dst, 0, src, srcw); 1073 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1074 FAIL_IF(!inst); 1075 *inst++ = GROUP_F7; 1076 *inst |= opcode; 1077 return SLJIT_SUCCESS; 1078 } 1079 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1080 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1081 FAIL_IF(!inst); 1082 *inst++ = GROUP_F7; 1083 *inst |= opcode; 1084 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1085 return SLJIT_SUCCESS; 1086} 1087 1088static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, 1089 sljit_s32 dst, sljit_sw dstw, 1090 sljit_s32 src, sljit_sw srcw) 1091{ 1092 sljit_u8* inst; 1093 1094 if (dst == SLJIT_UNUSED) { 1095 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1096 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1097 FAIL_IF(!inst); 1098 *inst++ = GROUP_F7; 1099 *inst |= NOT_rm; 1100 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1101 FAIL_IF(!inst); 1102 *inst = OR_r_rm; 1103 return SLJIT_SUCCESS; 1104 } 1105 if (FAST_IS_REG(dst)) { 1106 EMIT_MOV(compiler, dst, 0, src, srcw); 1107 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1108 FAIL_IF(!inst); 1109 *inst++ = GROUP_F7; 1110 *inst |= NOT_rm; 1111 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); 1112 FAIL_IF(!inst); 1113 *inst = OR_r_rm; 1114 return SLJIT_SUCCESS; 1115 } 1116 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1117 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1118 FAIL_IF(!inst); 1119 *inst++ = GROUP_F7; 1120 *inst |= NOT_rm; 1121 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1122 FAIL_IF(!inst); 1123 *inst = OR_r_rm; 1124 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1125 return SLJIT_SUCCESS; 1126} 1127 1128static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, 1129 sljit_s32 dst, sljit_sw dstw, 1130 sljit_s32 src, sljit_sw srcw) 1131{ 1132 sljit_u8* inst; 1133 sljit_s32 dst_r; 1134 1135 SLJIT_UNUSED_ARG(op_flags); 1136 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 1137 /* Just set the zero flag. */ 1138 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1139 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1140 FAIL_IF(!inst); 1141 *inst++ = GROUP_F7; 1142 *inst |= NOT_rm; 1143#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1144 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0); 1145#else 1146 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0); 1147#endif 1148 FAIL_IF(!inst); 1149 *inst |= SHR; 1150 return SLJIT_SUCCESS; 1151 } 1152 1153 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { 1154 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); 1155 src = TMP_REG1; 1156 srcw = 0; 1157 } 1158 1159 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw); 1160 FAIL_IF(!inst); 1161 *inst++ = GROUP_0F; 1162 *inst = BSR_r_rm; 1163 1164#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1165 if (FAST_IS_REG(dst)) 1166 dst_r = dst; 1167 else { 1168 /* Find an unused temporary register. */ 1169 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) 1170 dst_r = SLJIT_R0; 1171 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1)) 1172 dst_r = SLJIT_R1; 1173 else 1174 dst_r = SLJIT_R2; 1175 EMIT_MOV(compiler, dst, dstw, dst_r, 0); 1176 } 1177 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); 1178#else 1179 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; 1180 compiler->mode32 = 0; 1181 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31); 1182 compiler->mode32 = op_flags & SLJIT_I32_OP; 1183#endif 1184 1185 if (cpu_has_cmov == -1) 1186 get_cpu_features(); 1187 1188 if (cpu_has_cmov) { 1189 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); 1190 FAIL_IF(!inst); 1191 *inst++ = GROUP_0F; 1192 *inst = CMOVNE_r_rm; 1193 } else { 1194#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1195 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1196 FAIL_IF(!inst); 1197 INC_SIZE(4); 1198 1199 *inst++ = JE_i8; 1200 *inst++ = 2; 1201 *inst++ = MOV_r_rm; 1202 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1]; 1203#else 1204 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); 1205 FAIL_IF(!inst); 1206 INC_SIZE(5); 1207 1208 *inst++ = JE_i8; 1209 *inst++ = 3; 1210 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0); 1211 *inst++ = MOV_r_rm; 1212 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1]; 1213#endif 1214 } 1215 1216#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1217 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); 1218#else 1219 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0); 1220#endif 1221 FAIL_IF(!inst); 1222 *(inst + 1) |= XOR; 1223 1224#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1225 if (dst & SLJIT_MEM) { 1226 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 1227 FAIL_IF(!inst); 1228 *inst = XCHG_r_rm; 1229 } 1230#else 1231 if (dst & SLJIT_MEM) 1232 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0); 1233#endif 1234 return SLJIT_SUCCESS; 1235} 1236 1237SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, 1238 sljit_s32 dst, sljit_sw dstw, 1239 sljit_s32 src, sljit_sw srcw) 1240{ 1241 sljit_s32 update = 0; 1242 sljit_s32 op_flags = GET_ALL_FLAGS(op); 1243#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1244 sljit_s32 dst_is_ereg = 0; 1245 sljit_s32 src_is_ereg = 0; 1246#else 1247# define src_is_ereg 0 1248#endif 1249 1250 CHECK_ERROR(); 1251 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); 1252 ADJUST_LOCAL_OFFSET(dst, dstw); 1253 ADJUST_LOCAL_OFFSET(src, srcw); 1254 1255 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); 1256 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1); 1257#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1258 compiler->mode32 = op_flags & SLJIT_I32_OP; 1259#endif 1260 1261 op = GET_OPCODE(op); 1262 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { 1263#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1264 compiler->mode32 = 0; 1265#endif 1266 1267 if (op_flags & SLJIT_I32_OP) { 1268 if (FAST_IS_REG(src) && src == dst) { 1269 if (!TYPE_CAST_NEEDED(op)) 1270 return SLJIT_SUCCESS; 1271 } 1272#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1273 if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM)) 1274 op = SLJIT_MOV_U32; 1275 if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM)) 1276 op = SLJIT_MOVU_U32; 1277 if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM)) 1278 op = SLJIT_MOV_S32; 1279 if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM)) 1280 op = SLJIT_MOVU_S32; 1281#endif 1282 } 1283 1284 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset); 1285 if (op >= SLJIT_MOVU) { 1286 update = 1; 1287 op -= 8; 1288 } 1289 1290 if (src & SLJIT_IMM) { 1291 switch (op) { 1292 case SLJIT_MOV_U8: 1293 srcw = (sljit_u8)srcw; 1294 break; 1295 case SLJIT_MOV_S8: 1296 srcw = (sljit_s8)srcw; 1297 break; 1298 case SLJIT_MOV_U16: 1299 srcw = (sljit_u16)srcw; 1300 break; 1301 case SLJIT_MOV_S16: 1302 srcw = (sljit_s16)srcw; 1303 break; 1304#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1305 case SLJIT_MOV_U32: 1306 srcw = (sljit_u32)srcw; 1307 break; 1308 case SLJIT_MOV_S32: 1309 srcw = (sljit_s32)srcw; 1310 break; 1311#endif 1312 } 1313#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1314 if (SLJIT_UNLIKELY(dst_is_ereg)) 1315 return emit_mov(compiler, dst, dstw, src, srcw); 1316#endif 1317 } 1318 1319#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1320 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { 1321 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); 1322 dst = TMP_REG1; 1323 } 1324#endif 1325 1326 switch (op) { 1327 case SLJIT_MOV: 1328 case SLJIT_MOV_P: 1329#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1330 case SLJIT_MOV_U32: 1331 case SLJIT_MOV_S32: 1332#endif 1333 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 1334 break; 1335 case SLJIT_MOV_U8: 1336 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); 1337 break; 1338 case SLJIT_MOV_S8: 1339 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); 1340 break; 1341 case SLJIT_MOV_U16: 1342 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); 1343 break; 1344 case SLJIT_MOV_S16: 1345 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); 1346 break; 1347#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1348 case SLJIT_MOV_U32: 1349 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); 1350 break; 1351 case SLJIT_MOV_S32: 1352 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); 1353 break; 1354#endif 1355 } 1356 1357#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1358 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) 1359 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); 1360#endif 1361 1362 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) { 1363 if ((src & OFFS_REG_MASK) != 0) { 1364 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 1365 (src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0)); 1366 } 1367 else if (srcw != 0) { 1368 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 1369 (src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw)); 1370 } 1371 } 1372 1373 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) { 1374 if ((dst & OFFS_REG_MASK) != 0) { 1375 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 1376 (dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0)); 1377 } 1378 else if (dstw != 0) { 1379 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 1380 (dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw)); 1381 } 1382 } 1383 return SLJIT_SUCCESS; 1384 } 1385 1386 switch (op) { 1387 case SLJIT_NOT: 1388 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z)) 1389 return emit_not_with_flags(compiler, dst, dstw, src, srcw); 1390 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); 1391 1392 case SLJIT_NEG: 1393 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); 1394 1395 case SLJIT_CLZ: 1396 return emit_clz(compiler, op_flags, dst, dstw, src, srcw); 1397 } 1398 1399 return SLJIT_SUCCESS; 1400 1401#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1402# undef src_is_ereg 1403#endif 1404} 1405 1406#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1407 1408#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1409 if (IS_HALFWORD(immw) || compiler->mode32) { \ 1410 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1411 FAIL_IF(!inst); \ 1412 *(inst + 1) |= (op_imm); \ 1413 } \ 1414 else { \ 1415 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ 1416 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ 1417 FAIL_IF(!inst); \ 1418 *inst = (op_mr); \ 1419 } 1420 1421#define BINARY_EAX_IMM(op_eax_imm, immw) \ 1422 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) 1423 1424#else 1425 1426#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1427 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1428 FAIL_IF(!inst); \ 1429 *(inst + 1) |= (op_imm); 1430 1431#define BINARY_EAX_IMM(op_eax_imm, immw) \ 1432 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) 1433 1434#endif 1435 1436static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, 1437 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 1438 sljit_s32 dst, sljit_sw dstw, 1439 sljit_s32 src1, sljit_sw src1w, 1440 sljit_s32 src2, sljit_sw src2w) 1441{ 1442 sljit_u8* inst; 1443 1444 if (dst == SLJIT_UNUSED) { 1445 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1446 if (src2 & SLJIT_IMM) { 1447 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1448 } 1449 else { 1450 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1451 FAIL_IF(!inst); 1452 *inst = op_rm; 1453 } 1454 return SLJIT_SUCCESS; 1455 } 1456 1457 if (dst == src1 && dstw == src1w) { 1458 if (src2 & SLJIT_IMM) { 1459#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1460 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1461#else 1462 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { 1463#endif 1464 BINARY_EAX_IMM(op_eax_imm, src2w); 1465 } 1466 else { 1467 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1468 } 1469 } 1470 else if (FAST_IS_REG(dst)) { 1471 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1472 FAIL_IF(!inst); 1473 *inst = op_rm; 1474 } 1475 else if (FAST_IS_REG(src2)) { 1476 /* Special exception for sljit_emit_op_flags. */ 1477 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1478 FAIL_IF(!inst); 1479 *inst = op_mr; 1480 } 1481 else { 1482 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1483 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1484 FAIL_IF(!inst); 1485 *inst = op_mr; 1486 } 1487 return SLJIT_SUCCESS; 1488 } 1489 1490 /* Only for cumulative operations. */ 1491 if (dst == src2 && dstw == src2w) { 1492 if (src1 & SLJIT_IMM) { 1493#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1494 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1495#else 1496 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { 1497#endif 1498 BINARY_EAX_IMM(op_eax_imm, src1w); 1499 } 1500 else { 1501 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); 1502 } 1503 } 1504 else if (FAST_IS_REG(dst)) { 1505 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); 1506 FAIL_IF(!inst); 1507 *inst = op_rm; 1508 } 1509 else if (FAST_IS_REG(src1)) { 1510 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); 1511 FAIL_IF(!inst); 1512 *inst = op_mr; 1513 } 1514 else { 1515 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1516 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1517 FAIL_IF(!inst); 1518 *inst = op_mr; 1519 } 1520 return SLJIT_SUCCESS; 1521 } 1522 1523 /* General version. */ 1524 if (FAST_IS_REG(dst)) { 1525 EMIT_MOV(compiler, dst, 0, src1, src1w); 1526 if (src2 & SLJIT_IMM) { 1527 BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1528 } 1529 else { 1530 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1531 FAIL_IF(!inst); 1532 *inst = op_rm; 1533 } 1534 } 1535 else { 1536 /* This version requires less memory writing. */ 1537 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1538 if (src2 & SLJIT_IMM) { 1539 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1540 } 1541 else { 1542 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1543 FAIL_IF(!inst); 1544 *inst = op_rm; 1545 } 1546 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1547 } 1548 1549 return SLJIT_SUCCESS; 1550} 1551 1552static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, 1553 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 1554 sljit_s32 dst, sljit_sw dstw, 1555 sljit_s32 src1, sljit_sw src1w, 1556 sljit_s32 src2, sljit_sw src2w) 1557{ 1558 sljit_u8* inst; 1559 1560 if (dst == SLJIT_UNUSED) { 1561 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1562 if (src2 & SLJIT_IMM) { 1563 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1564 } 1565 else { 1566 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1567 FAIL_IF(!inst); 1568 *inst = op_rm; 1569 } 1570 return SLJIT_SUCCESS; 1571 } 1572 1573 if (dst == src1 && dstw == src1w) { 1574 if (src2 & SLJIT_IMM) { 1575#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1576 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1577#else 1578 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { 1579#endif 1580 BINARY_EAX_IMM(op_eax_imm, src2w); 1581 } 1582 else { 1583 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1584 } 1585 } 1586 else if (FAST_IS_REG(dst)) { 1587 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1588 FAIL_IF(!inst); 1589 *inst = op_rm; 1590 } 1591 else if (FAST_IS_REG(src2)) { 1592 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1593 FAIL_IF(!inst); 1594 *inst = op_mr; 1595 } 1596 else { 1597 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1598 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1599 FAIL_IF(!inst); 1600 *inst = op_mr; 1601 } 1602 return SLJIT_SUCCESS; 1603 } 1604 1605 /* General version. */ 1606 if (FAST_IS_REG(dst) && dst != src2) { 1607 EMIT_MOV(compiler, dst, 0, src1, src1w); 1608 if (src2 & SLJIT_IMM) { 1609 BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1610 } 1611 else { 1612 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1613 FAIL_IF(!inst); 1614 *inst = op_rm; 1615 } 1616 } 1617 else { 1618 /* This version requires less memory writing. */ 1619 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1620 if (src2 & SLJIT_IMM) { 1621 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1622 } 1623 else { 1624 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1625 FAIL_IF(!inst); 1626 *inst = op_rm; 1627 } 1628 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1629 } 1630 1631 return SLJIT_SUCCESS; 1632} 1633 1634static sljit_s32 emit_mul(struct sljit_compiler *compiler, 1635 sljit_s32 dst, sljit_sw dstw, 1636 sljit_s32 src1, sljit_sw src1w, 1637 sljit_s32 src2, sljit_sw src2w) 1638{ 1639 sljit_u8* inst; 1640 sljit_s32 dst_r; 1641 1642 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1643 1644 /* Register destination. */ 1645 if (dst_r == src1 && !(src2 & SLJIT_IMM)) { 1646 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1647 FAIL_IF(!inst); 1648 *inst++ = GROUP_0F; 1649 *inst = IMUL_r_rm; 1650 } 1651 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { 1652 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); 1653 FAIL_IF(!inst); 1654 *inst++ = GROUP_0F; 1655 *inst = IMUL_r_rm; 1656 } 1657 else if (src1 & SLJIT_IMM) { 1658 if (src2 & SLJIT_IMM) { 1659 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); 1660 src2 = dst_r; 1661 src2w = 0; 1662 } 1663 1664 if (src1w <= 127 && src1w >= -128) { 1665 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1666 FAIL_IF(!inst); 1667 *inst = IMUL_r_rm_i8; 1668 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 1669 FAIL_IF(!inst); 1670 INC_SIZE(1); 1671 *inst = (sljit_s8)src1w; 1672 } 1673#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1674 else { 1675 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1676 FAIL_IF(!inst); 1677 *inst = IMUL_r_rm_i32; 1678 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1679 FAIL_IF(!inst); 1680 INC_SIZE(4); 1681 sljit_unaligned_store_sw(inst, src1w); 1682 } 1683#else 1684 else if (IS_HALFWORD(src1w)) { 1685 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1686 FAIL_IF(!inst); 1687 *inst = IMUL_r_rm_i32; 1688 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1689 FAIL_IF(!inst); 1690 INC_SIZE(4); 1691 sljit_unaligned_store_s32(inst, (sljit_s32)src1w); 1692 } 1693 else { 1694 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); 1695 if (dst_r != src2) 1696 EMIT_MOV(compiler, dst_r, 0, src2, src2w); 1697 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1698 FAIL_IF(!inst); 1699 *inst++ = GROUP_0F; 1700 *inst = IMUL_r_rm; 1701 } 1702#endif 1703 } 1704 else if (src2 & SLJIT_IMM) { 1705 /* Note: src1 is NOT immediate. */ 1706 1707 if (src2w <= 127 && src2w >= -128) { 1708 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1709 FAIL_IF(!inst); 1710 *inst = IMUL_r_rm_i8; 1711 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 1712 FAIL_IF(!inst); 1713 INC_SIZE(1); 1714 *inst = (sljit_s8)src2w; 1715 } 1716#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1717 else { 1718 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1719 FAIL_IF(!inst); 1720 *inst = IMUL_r_rm_i32; 1721 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1722 FAIL_IF(!inst); 1723 INC_SIZE(4); 1724 sljit_unaligned_store_sw(inst, src2w); 1725 } 1726#else 1727 else if (IS_HALFWORD(src2w)) { 1728 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1729 FAIL_IF(!inst); 1730 *inst = IMUL_r_rm_i32; 1731 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1732 FAIL_IF(!inst); 1733 INC_SIZE(4); 1734 sljit_unaligned_store_s32(inst, (sljit_s32)src2w); 1735 } 1736 else { 1737 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w); 1738 if (dst_r != src1) 1739 EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1740 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1741 FAIL_IF(!inst); 1742 *inst++ = GROUP_0F; 1743 *inst = IMUL_r_rm; 1744 } 1745#endif 1746 } 1747 else { 1748 /* Neither argument is immediate. */ 1749 if (ADDRESSING_DEPENDS_ON(src2, dst_r)) 1750 dst_r = TMP_REG1; 1751 EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1752 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1753 FAIL_IF(!inst); 1754 *inst++ = GROUP_0F; 1755 *inst = IMUL_r_rm; 1756 } 1757 1758 if (dst_r == TMP_REG1) 1759 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1760 1761 return SLJIT_SUCCESS; 1762} 1763 1764static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, 1765 sljit_s32 dst, sljit_sw dstw, 1766 sljit_s32 src1, sljit_sw src1w, 1767 sljit_s32 src2, sljit_sw src2w) 1768{ 1769 sljit_u8* inst; 1770 sljit_s32 dst_r, done = 0; 1771 1772 /* These cases better be left to handled by normal way. */ 1773 if (dst == src1 && dstw == src1w) 1774 return SLJIT_ERR_UNSUPPORTED; 1775 if (dst == src2 && dstw == src2w) 1776 return SLJIT_ERR_UNSUPPORTED; 1777 1778 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1779 1780 if (FAST_IS_REG(src1)) { 1781 if (FAST_IS_REG(src2)) { 1782 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); 1783 FAIL_IF(!inst); 1784 *inst = LEA_r_m; 1785 done = 1; 1786 } 1787#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1788 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1789 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w); 1790#else 1791 if (src2 & SLJIT_IMM) { 1792 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); 1793#endif 1794 FAIL_IF(!inst); 1795 *inst = LEA_r_m; 1796 done = 1; 1797 } 1798 } 1799 else if (FAST_IS_REG(src2)) { 1800#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1801 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1802 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w); 1803#else 1804 if (src1 & SLJIT_IMM) { 1805 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); 1806#endif 1807 FAIL_IF(!inst); 1808 *inst = LEA_r_m; 1809 done = 1; 1810 } 1811 } 1812 1813 if (done) { 1814 if (dst_r == TMP_REG1) 1815 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 1816 return SLJIT_SUCCESS; 1817 } 1818 return SLJIT_ERR_UNSUPPORTED; 1819} 1820 1821static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, 1822 sljit_s32 src1, sljit_sw src1w, 1823 sljit_s32 src2, sljit_sw src2w) 1824{ 1825 sljit_u8* inst; 1826 1827#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1828 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1829#else 1830 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1831#endif 1832 BINARY_EAX_IMM(CMP_EAX_i32, src2w); 1833 return SLJIT_SUCCESS; 1834 } 1835 1836 if (FAST_IS_REG(src1)) { 1837 if (src2 & SLJIT_IMM) { 1838 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); 1839 } 1840 else { 1841 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1842 FAIL_IF(!inst); 1843 *inst = CMP_r_rm; 1844 } 1845 return SLJIT_SUCCESS; 1846 } 1847 1848 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) { 1849 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1850 FAIL_IF(!inst); 1851 *inst = CMP_rm_r; 1852 return SLJIT_SUCCESS; 1853 } 1854 1855 if (src2 & SLJIT_IMM) { 1856 if (src1 & SLJIT_IMM) { 1857 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1858 src1 = TMP_REG1; 1859 src1w = 0; 1860 } 1861 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); 1862 } 1863 else { 1864 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1865 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1866 FAIL_IF(!inst); 1867 *inst = CMP_r_rm; 1868 } 1869 return SLJIT_SUCCESS; 1870} 1871 1872static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, 1873 sljit_s32 src1, sljit_sw src1w, 1874 sljit_s32 src2, sljit_sw src2w) 1875{ 1876 sljit_u8* inst; 1877 1878#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1879 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1880#else 1881 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1882#endif 1883 BINARY_EAX_IMM(TEST_EAX_i32, src2w); 1884 return SLJIT_SUCCESS; 1885 } 1886 1887#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1888 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1889#else 1890 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { 1891#endif 1892 BINARY_EAX_IMM(TEST_EAX_i32, src1w); 1893 return SLJIT_SUCCESS; 1894 } 1895 1896 if (!(src1 & SLJIT_IMM)) { 1897 if (src2 & SLJIT_IMM) { 1898#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1899 if (IS_HALFWORD(src2w) || compiler->mode32) { 1900 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); 1901 FAIL_IF(!inst); 1902 *inst = GROUP_F7; 1903 } 1904 else { 1905 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1906 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w); 1907 FAIL_IF(!inst); 1908 *inst = TEST_rm_r; 1909 } 1910#else 1911 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); 1912 FAIL_IF(!inst); 1913 *inst = GROUP_F7; 1914#endif 1915 return SLJIT_SUCCESS; 1916 } 1917 else if (FAST_IS_REG(src1)) { 1918 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1919 FAIL_IF(!inst); 1920 *inst = TEST_rm_r; 1921 return SLJIT_SUCCESS; 1922 } 1923 } 1924 1925 if (!(src2 & SLJIT_IMM)) { 1926 if (src1 & SLJIT_IMM) { 1927#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1928 if (IS_HALFWORD(src1w) || compiler->mode32) { 1929 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); 1930 FAIL_IF(!inst); 1931 *inst = GROUP_F7; 1932 } 1933 else { 1934 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); 1935 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w); 1936 FAIL_IF(!inst); 1937 *inst = TEST_rm_r; 1938 } 1939#else 1940 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w); 1941 FAIL_IF(!inst); 1942 *inst = GROUP_F7; 1943#endif 1944 return SLJIT_SUCCESS; 1945 } 1946 else if (FAST_IS_REG(src2)) { 1947 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1948 FAIL_IF(!inst); 1949 *inst = TEST_rm_r; 1950 return SLJIT_SUCCESS; 1951 } 1952 } 1953 1954 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1955 if (src2 & SLJIT_IMM) { 1956#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1957 if (IS_HALFWORD(src2w) || compiler->mode32) { 1958 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 1959 FAIL_IF(!inst); 1960 *inst = GROUP_F7; 1961 } 1962 else { 1963 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1964 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); 1965 FAIL_IF(!inst); 1966 *inst = TEST_rm_r; 1967 } 1968#else 1969 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 1970 FAIL_IF(!inst); 1971 *inst = GROUP_F7; 1972#endif 1973 } 1974 else { 1975 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1976 FAIL_IF(!inst); 1977 *inst = TEST_rm_r; 1978 } 1979 return SLJIT_SUCCESS; 1980} 1981 1982static sljit_s32 emit_shift(struct sljit_compiler *compiler, 1983 sljit_u8 mode, 1984 sljit_s32 dst, sljit_sw dstw, 1985 sljit_s32 src1, sljit_sw src1w, 1986 sljit_s32 src2, sljit_sw src2w) 1987{ 1988 sljit_u8* inst; 1989 1990 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { 1991 if (dst == src1 && dstw == src1w) { 1992 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); 1993 FAIL_IF(!inst); 1994 *inst |= mode; 1995 return SLJIT_SUCCESS; 1996 } 1997 if (dst == SLJIT_UNUSED) { 1998 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1999 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2000 FAIL_IF(!inst); 2001 *inst |= mode; 2002 return SLJIT_SUCCESS; 2003 } 2004 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { 2005 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2006 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2007 FAIL_IF(!inst); 2008 *inst |= mode; 2009 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2010 return SLJIT_SUCCESS; 2011 } 2012 if (FAST_IS_REG(dst)) { 2013 EMIT_MOV(compiler, dst, 0, src1, src1w); 2014 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); 2015 FAIL_IF(!inst); 2016 *inst |= mode; 2017 return SLJIT_SUCCESS; 2018 } 2019 2020 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2021 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2022 FAIL_IF(!inst); 2023 *inst |= mode; 2024 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2025 return SLJIT_SUCCESS; 2026 } 2027 2028 if (dst == SLJIT_PREF_SHIFT_REG) { 2029 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2030 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2031 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2032 FAIL_IF(!inst); 2033 *inst |= mode; 2034 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2035 } 2036 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { 2037 if (src1 != dst) 2038 EMIT_MOV(compiler, dst, 0, src1, src1w); 2039 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); 2040 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2041 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); 2042 FAIL_IF(!inst); 2043 *inst |= mode; 2044 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2045 } 2046 else { 2047 /* This case is complex since ecx itself may be used for 2048 addressing, and this case must be supported as well. */ 2049 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2050#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2051 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); 2052#else 2053 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0); 2054#endif 2055 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2056 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2057 FAIL_IF(!inst); 2058 *inst |= mode; 2059#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2060 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); 2061#else 2062 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0); 2063#endif 2064 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2065 } 2066 2067 return SLJIT_SUCCESS; 2068} 2069 2070static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, 2071 sljit_u8 mode, sljit_s32 set_flags, 2072 sljit_s32 dst, sljit_sw dstw, 2073 sljit_s32 src1, sljit_sw src1w, 2074 sljit_s32 src2, sljit_sw src2w) 2075{ 2076 /* The CPU does not set flags if the shift count is 0. */ 2077 if (src2 & SLJIT_IMM) { 2078#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2079 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0)) 2080 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2081#else 2082 if ((src2w & 0x1f) != 0) 2083 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2084#endif 2085 if (!set_flags) 2086 return emit_mov(compiler, dst, dstw, src1, src1w); 2087 /* OR dst, src, 0 */ 2088 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2089 dst, dstw, src1, src1w, SLJIT_IMM, 0); 2090 } 2091 2092 if (!set_flags) 2093 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2094 2095 if (!FAST_IS_REG(dst)) 2096 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); 2097 2098 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w)); 2099 2100 if (FAST_IS_REG(dst)) 2101 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); 2102 return SLJIT_SUCCESS; 2103} 2104 2105SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, 2106 sljit_s32 dst, sljit_sw dstw, 2107 sljit_s32 src1, sljit_sw src1w, 2108 sljit_s32 src2, sljit_sw src2w) 2109{ 2110 CHECK_ERROR(); 2111 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2112 ADJUST_LOCAL_OFFSET(dst, dstw); 2113 ADJUST_LOCAL_OFFSET(src1, src1w); 2114 ADJUST_LOCAL_OFFSET(src2, src2w); 2115 2116 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2117 CHECK_EXTRA_REGS(src1, src1w, (void)0); 2118 CHECK_EXTRA_REGS(src2, src2w, (void)0); 2119#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2120 compiler->mode32 = op & SLJIT_I32_OP; 2121#endif 2122 2123 switch (GET_OPCODE(op)) { 2124 case SLJIT_ADD: 2125 if (!HAS_FLAGS(op)) { 2126 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) 2127 return compiler->error; 2128 } 2129 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 2130 dst, dstw, src1, src1w, src2, src2w); 2131 case SLJIT_ADDC: 2132 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, 2133 dst, dstw, src1, src1w, src2, src2w); 2134 case SLJIT_SUB: 2135 if (!HAS_FLAGS(op)) { 2136 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) 2137 return compiler->error; 2138 } 2139 2140 if (dst == SLJIT_UNUSED) 2141 return emit_cmp_binary(compiler, src1, src1w, src2, src2w); 2142 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, 2143 dst, dstw, src1, src1w, src2, src2w); 2144 case SLJIT_SUBC: 2145 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, 2146 dst, dstw, src1, src1w, src2, src2w); 2147 case SLJIT_MUL: 2148 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); 2149 case SLJIT_AND: 2150 if (dst == SLJIT_UNUSED) 2151 return emit_test_binary(compiler, src1, src1w, src2, src2w); 2152 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32, 2153 dst, dstw, src1, src1w, src2, src2w); 2154 case SLJIT_OR: 2155 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2156 dst, dstw, src1, src1w, src2, src2w); 2157 case SLJIT_XOR: 2158 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, 2159 dst, dstw, src1, src1w, src2, src2w); 2160 case SLJIT_SHL: 2161 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op), 2162 dst, dstw, src1, src1w, src2, src2w); 2163 case SLJIT_LSHR: 2164 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op), 2165 dst, dstw, src1, src1w, src2, src2w); 2166 case SLJIT_ASHR: 2167 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op), 2168 dst, dstw, src1, src1w, src2, src2w); 2169 } 2170 2171 return SLJIT_SUCCESS; 2172} 2173 2174SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) 2175{ 2176 CHECK_REG_INDEX(check_sljit_get_register_index(reg)); 2177#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2178 if (reg >= SLJIT_R3 && reg <= SLJIT_R8) 2179 return -1; 2180#endif 2181 return reg_map[reg]; 2182} 2183 2184SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) 2185{ 2186 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); 2187 return reg; 2188} 2189 2190SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, 2191 void *instruction, sljit_s32 size) 2192{ 2193 sljit_u8 *inst; 2194 2195 CHECK_ERROR(); 2196 CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); 2197 2198 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 2199 FAIL_IF(!inst); 2200 INC_SIZE(size); 2201 SLJIT_MEMCPY(inst, instruction, size); 2202 return SLJIT_SUCCESS; 2203} 2204 2205/* --------------------------------------------------------------------- */ 2206/* Floating point operators */ 2207/* --------------------------------------------------------------------- */ 2208 2209/* Alignment + 2 * 16 bytes. */ 2210static sljit_s32 sse2_data[3 + (4 + 4) * 2]; 2211static sljit_s32 *sse2_buffer; 2212 2213static void init_compiler(void) 2214{ 2215 sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf); 2216 /* Single precision constants. */ 2217 sse2_buffer[0] = 0x80000000; 2218 sse2_buffer[4] = 0x7fffffff; 2219 /* Double precision constants. */ 2220 sse2_buffer[8] = 0; 2221 sse2_buffer[9] = 0x80000000; 2222 sse2_buffer[12] = 0xffffffff; 2223 sse2_buffer[13] = 0x7fffffff; 2224} 2225 2226SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) 2227{ 2228#ifdef SLJIT_IS_FPU_AVAILABLE 2229 return SLJIT_IS_FPU_AVAILABLE; 2230#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 2231 if (cpu_has_sse2 == -1) 2232 get_cpu_features(); 2233 return cpu_has_sse2; 2234#else /* SLJIT_DETECT_SSE2 */ 2235 return 1; 2236#endif /* SLJIT_DETECT_SSE2 */ 2237} 2238 2239static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode, 2240 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) 2241{ 2242 sljit_u8 *inst; 2243 2244 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2245 FAIL_IF(!inst); 2246 *inst++ = GROUP_0F; 2247 *inst = opcode; 2248 return SLJIT_SUCCESS; 2249} 2250 2251static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode, 2252 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) 2253{ 2254 sljit_u8 *inst; 2255 2256 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2257 FAIL_IF(!inst); 2258 *inst++ = GROUP_0F; 2259 *inst = opcode; 2260 return SLJIT_SUCCESS; 2261} 2262 2263static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, 2264 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw) 2265{ 2266 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); 2267} 2268 2269static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, 2270 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src) 2271{ 2272 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); 2273} 2274 2275static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, 2276 sljit_s32 dst, sljit_sw dstw, 2277 sljit_s32 src, sljit_sw srcw) 2278{ 2279 sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2280 sljit_u8 *inst; 2281 2282#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2283 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) 2284 compiler->mode32 = 0; 2285#endif 2286 2287 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); 2288 FAIL_IF(!inst); 2289 *inst++ = GROUP_0F; 2290 *inst = CVTTSD2SI_r_xm; 2291 2292 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) 2293 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2294 return SLJIT_SUCCESS; 2295} 2296 2297static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, 2298 sljit_s32 dst, sljit_sw dstw, 2299 sljit_s32 src, sljit_sw srcw) 2300{ 2301 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; 2302 sljit_u8 *inst; 2303 2304#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2305 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) 2306 compiler->mode32 = 0; 2307#endif 2308 2309 if (src & SLJIT_IMM) { 2310#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2311 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) 2312 srcw = (sljit_s32)srcw; 2313#endif 2314 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 2315 src = TMP_REG1; 2316 srcw = 0; 2317 } 2318 2319 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); 2320 FAIL_IF(!inst); 2321 *inst++ = GROUP_0F; 2322 *inst = CVTSI2SD_x_rm; 2323 2324#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2325 compiler->mode32 = 1; 2326#endif 2327 if (dst_r == TMP_FREG) 2328 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2329 return SLJIT_SUCCESS; 2330} 2331 2332static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, 2333 sljit_s32 src1, sljit_sw src1w, 2334 sljit_s32 src2, sljit_sw src2w) 2335{ 2336 if (!FAST_IS_REG(src1)) { 2337 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); 2338 src1 = TMP_FREG; 2339 } 2340 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w); 2341} 2342 2343SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, 2344 sljit_s32 dst, sljit_sw dstw, 2345 sljit_s32 src, sljit_sw srcw) 2346{ 2347 sljit_s32 dst_r; 2348 2349#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2350 compiler->mode32 = 1; 2351#endif 2352 2353 CHECK_ERROR(); 2354 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); 2355 2356 if (GET_OPCODE(op) == SLJIT_MOV_F64) { 2357 if (FAST_IS_REG(dst)) 2358 return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw); 2359 if (FAST_IS_REG(src)) 2360 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src); 2361 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw)); 2362 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2363 } 2364 2365 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) { 2366 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; 2367 if (FAST_IS_REG(src)) { 2368 /* We overwrite the high bits of source. From SLJIT point of view, 2369 this is not an issue. 2370 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ 2371 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0)); 2372 } 2373 else { 2374 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw)); 2375 src = TMP_FREG; 2376 } 2377 2378 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0)); 2379 if (dst_r == TMP_FREG) 2380 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2381 return SLJIT_SUCCESS; 2382 } 2383 2384 if (SLOW_IS_REG(dst)) { 2385 dst_r = dst; 2386 if (dst != src) 2387 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); 2388 } 2389 else { 2390 dst_r = TMP_FREG; 2391 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); 2392 } 2393 2394 switch (GET_OPCODE(op)) { 2395 case SLJIT_NEG_F64: 2396 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8))); 2397 break; 2398 2399 case SLJIT_ABS_F64: 2400 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12))); 2401 break; 2402 } 2403 2404 if (dst_r == TMP_FREG) 2405 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2406 return SLJIT_SUCCESS; 2407} 2408 2409SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, 2410 sljit_s32 dst, sljit_sw dstw, 2411 sljit_s32 src1, sljit_sw src1w, 2412 sljit_s32 src2, sljit_sw src2w) 2413{ 2414 sljit_s32 dst_r; 2415 2416 CHECK_ERROR(); 2417 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2418 ADJUST_LOCAL_OFFSET(dst, dstw); 2419 ADJUST_LOCAL_OFFSET(src1, src1w); 2420 ADJUST_LOCAL_OFFSET(src2, src2w); 2421 2422#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2423 compiler->mode32 = 1; 2424#endif 2425 2426 if (FAST_IS_REG(dst)) { 2427 dst_r = dst; 2428 if (dst == src1) 2429 ; /* Do nothing here. */ 2430 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) { 2431 /* Swap arguments. */ 2432 src2 = src1; 2433 src2w = src1w; 2434 } 2435 else if (dst != src2) 2436 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w)); 2437 else { 2438 dst_r = TMP_FREG; 2439 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); 2440 } 2441 } 2442 else { 2443 dst_r = TMP_FREG; 2444 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); 2445 } 2446 2447 switch (GET_OPCODE(op)) { 2448 case SLJIT_ADD_F64: 2449 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2450 break; 2451 2452 case SLJIT_SUB_F64: 2453 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2454 break; 2455 2456 case SLJIT_MUL_F64: 2457 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2458 break; 2459 2460 case SLJIT_DIV_F64: 2461 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2462 break; 2463 } 2464 2465 if (dst_r == TMP_FREG) 2466 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2467 return SLJIT_SUCCESS; 2468} 2469 2470/* --------------------------------------------------------------------- */ 2471/* Conditional instructions */ 2472/* --------------------------------------------------------------------- */ 2473 2474SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) 2475{ 2476 sljit_u8 *inst; 2477 struct sljit_label *label; 2478 2479 CHECK_ERROR_PTR(); 2480 CHECK_PTR(check_sljit_emit_label(compiler)); 2481 2482 if (compiler->last_label && compiler->last_label->size == compiler->size) 2483 return compiler->last_label; 2484 2485 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); 2486 PTR_FAIL_IF(!label); 2487 set_label(label, compiler); 2488 2489 inst = (sljit_u8*)ensure_buf(compiler, 2); 2490 PTR_FAIL_IF(!inst); 2491 2492 *inst++ = 0; 2493 *inst++ = 0; 2494 2495 return label; 2496} 2497 2498SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) 2499{ 2500 sljit_u8 *inst; 2501 struct sljit_jump *jump; 2502 2503 CHECK_ERROR_PTR(); 2504 CHECK_PTR(check_sljit_emit_jump(compiler, type)); 2505 2506 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2507 PTR_FAIL_IF_NULL(jump); 2508 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2509 type &= 0xff; 2510 2511 if (type >= SLJIT_CALL1) 2512 PTR_FAIL_IF(call_with_args(compiler, type)); 2513 2514 /* Worst case size. */ 2515#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2516 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; 2517#else 2518 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); 2519#endif 2520 2521 inst = (sljit_u8*)ensure_buf(compiler, 2); 2522 PTR_FAIL_IF_NULL(inst); 2523 2524 *inst++ = 0; 2525 *inst++ = type + 2; 2526 return jump; 2527} 2528 2529SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) 2530{ 2531 sljit_u8 *inst; 2532 struct sljit_jump *jump; 2533 2534 CHECK_ERROR(); 2535 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); 2536 ADJUST_LOCAL_OFFSET(src, srcw); 2537 2538 CHECK_EXTRA_REGS(src, srcw, (void)0); 2539 2540 if (type >= SLJIT_CALL1) { 2541#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2542#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 2543 if (src == SLJIT_R2) { 2544 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2545 src = TMP_REG1; 2546 } 2547 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3) 2548 srcw += sizeof(sljit_sw); 2549#endif 2550#endif 2551#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) 2552 if (src == SLJIT_R2) { 2553 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2554 src = TMP_REG1; 2555 } 2556#endif 2557 FAIL_IF(call_with_args(compiler, type)); 2558 } 2559 2560 if (src == SLJIT_IMM) { 2561 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2562 FAIL_IF_NULL(jump); 2563 set_jump(jump, compiler, JUMP_ADDR); 2564 jump->u.target = srcw; 2565 2566 /* Worst case size. */ 2567#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2568 compiler->size += 5; 2569#else 2570 compiler->size += 10 + 3; 2571#endif 2572 2573 inst = (sljit_u8*)ensure_buf(compiler, 2); 2574 FAIL_IF_NULL(inst); 2575 2576 *inst++ = 0; 2577 *inst++ = type + 2; 2578 } 2579 else { 2580#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2581 /* REX_W is not necessary (src is not immediate). */ 2582 compiler->mode32 = 1; 2583#endif 2584 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); 2585 FAIL_IF(!inst); 2586 *inst++ = GROUP_FF; 2587 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; 2588 } 2589 return SLJIT_SUCCESS; 2590} 2591 2592SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, 2593 sljit_s32 dst, sljit_sw dstw, 2594 sljit_s32 src, sljit_sw srcw, 2595 sljit_s32 type) 2596{ 2597 sljit_u8 *inst; 2598 sljit_u8 cond_set = 0; 2599#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2600 sljit_s32 reg; 2601#endif 2602 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */ 2603 sljit_s32 dst_save = dst; 2604 sljit_sw dstw_save = dstw; 2605 2606 CHECK_ERROR(); 2607 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); 2608 SLJIT_UNUSED_ARG(srcw); 2609 2610 if (dst == SLJIT_UNUSED) 2611 return SLJIT_SUCCESS; 2612 2613 ADJUST_LOCAL_OFFSET(dst, dstw); 2614 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2615 2616 type &= 0xff; 2617 /* setcc = jcc + 0x10. */ 2618 cond_set = get_jump_code(type) + 0x10; 2619 2620#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2621 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) { 2622 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3); 2623 FAIL_IF(!inst); 2624 INC_SIZE(4 + 3); 2625 /* Set low register to conditional flag. */ 2626 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; 2627 *inst++ = GROUP_0F; 2628 *inst++ = cond_set; 2629 *inst++ = MOD_REG | reg_lmap[TMP_REG1]; 2630 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B); 2631 *inst++ = OR_rm8_r8; 2632 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]; 2633 return SLJIT_SUCCESS; 2634 } 2635 2636 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1; 2637 2638 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4); 2639 FAIL_IF(!inst); 2640 INC_SIZE(4 + 4); 2641 /* Set low register to conditional flag. */ 2642 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; 2643 *inst++ = GROUP_0F; 2644 *inst++ = cond_set; 2645 *inst++ = MOD_REG | reg_lmap[reg]; 2646 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); 2647 /* The movzx instruction does not affect flags. */ 2648 *inst++ = GROUP_0F; 2649 *inst++ = MOVZX_r_rm8; 2650 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; 2651 2652 if (reg != TMP_REG1) 2653 return SLJIT_SUCCESS; 2654 2655 if (GET_OPCODE(op) < SLJIT_ADD) { 2656 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; 2657 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2658 } 2659#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 2660 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2661 compiler->skip_checks = 1; 2662#endif 2663 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); 2664 2665#else 2666 /* The SLJIT_CONFIG_X86_32 code path starts here. */ 2667 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { 2668 if (reg_map[dst] <= 4) { 2669 /* Low byte is accessible. */ 2670 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3); 2671 FAIL_IF(!inst); 2672 INC_SIZE(3 + 3); 2673 /* Set low byte to conditional flag. */ 2674 *inst++ = GROUP_0F; 2675 *inst++ = cond_set; 2676 *inst++ = MOD_REG | reg_map[dst]; 2677 2678 *inst++ = GROUP_0F; 2679 *inst++ = MOVZX_r_rm8; 2680 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; 2681 return SLJIT_SUCCESS; 2682 } 2683 2684 /* Low byte is not accessible. */ 2685 if (cpu_has_cmov == -1) 2686 get_cpu_features(); 2687 2688 if (cpu_has_cmov) { 2689 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); 2690 /* a xor reg, reg operation would overwrite the flags. */ 2691 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); 2692 2693 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); 2694 FAIL_IF(!inst); 2695 INC_SIZE(3); 2696 2697 *inst++ = GROUP_0F; 2698 /* cmovcc = setcc - 0x50. */ 2699 *inst++ = cond_set - 0x50; 2700 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]; 2701 return SLJIT_SUCCESS; 2702 } 2703 2704 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2705 FAIL_IF(!inst); 2706 INC_SIZE(1 + 3 + 3 + 1); 2707 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2708 /* Set al to conditional flag. */ 2709 *inst++ = GROUP_0F; 2710 *inst++ = cond_set; 2711 *inst++ = MOD_REG | 0 /* eax */; 2712 2713 *inst++ = GROUP_0F; 2714 *inst++ = MOVZX_r_rm8; 2715 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; 2716 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2717 return SLJIT_SUCCESS; 2718 } 2719 2720 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { 2721 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0); 2722 2723 if (dst != SLJIT_R0) { 2724 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); 2725 FAIL_IF(!inst); 2726 INC_SIZE(1 + 3 + 2 + 1); 2727 /* Set low register to conditional flag. */ 2728 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2729 *inst++ = GROUP_0F; 2730 *inst++ = cond_set; 2731 *inst++ = MOD_REG | 0 /* eax */; 2732 *inst++ = OR_rm8_r8; 2733 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; 2734 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2735 } 2736 else { 2737 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); 2738 FAIL_IF(!inst); 2739 INC_SIZE(2 + 3 + 2 + 2); 2740 /* Set low register to conditional flag. */ 2741 *inst++ = XCHG_r_rm; 2742 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2743 *inst++ = GROUP_0F; 2744 *inst++ = cond_set; 2745 *inst++ = MOD_REG | 1 /* ecx */; 2746 *inst++ = OR_rm8_r8; 2747 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; 2748 *inst++ = XCHG_r_rm; 2749 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2750 } 2751 return SLJIT_SUCCESS; 2752 } 2753 2754 /* Set TMP_REG1 to the bit. */ 2755 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2756 FAIL_IF(!inst); 2757 INC_SIZE(1 + 3 + 3 + 1); 2758 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2759 /* Set al to conditional flag. */ 2760 *inst++ = GROUP_0F; 2761 *inst++ = cond_set; 2762 *inst++ = MOD_REG | 0 /* eax */; 2763 2764 *inst++ = GROUP_0F; 2765 *inst++ = MOVZX_r_rm8; 2766 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; 2767 2768 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2769 2770 if (GET_OPCODE(op) < SLJIT_ADD) 2771 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2772 2773#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 2774 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2775 compiler->skip_checks = 1; 2776#endif 2777 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); 2778#endif /* SLJIT_CONFIG_X86_64 */ 2779} 2780 2781SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) 2782{ 2783 CHECK_ERROR(); 2784 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); 2785 ADJUST_LOCAL_OFFSET(dst, dstw); 2786 2787 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2788 2789#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2790 compiler->mode32 = 0; 2791#endif 2792 2793 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); 2794 2795#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2796 if (NOT_HALFWORD(offset)) { 2797 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); 2798#if (defined SLJIT_DEBUG && SLJIT_DEBUG) 2799 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); 2800 return compiler->error; 2801#else 2802 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); 2803#endif 2804 } 2805#endif 2806 2807 if (offset != 0) 2808 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); 2809 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); 2810} 2811 2812SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) 2813{ 2814 sljit_u8 *inst; 2815 struct sljit_const *const_; 2816#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2817 sljit_s32 reg; 2818#endif 2819 2820 CHECK_ERROR_PTR(); 2821 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); 2822 ADJUST_LOCAL_OFFSET(dst, dstw); 2823 2824 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2825 2826 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); 2827 PTR_FAIL_IF(!const_); 2828 set_const(const_, compiler); 2829 2830#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2831 compiler->mode32 = 0; 2832 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2833 2834 if (emit_load_imm64(compiler, reg, init_value)) 2835 return NULL; 2836#else 2837 if (dst == SLJIT_UNUSED) 2838 dst = TMP_REG1; 2839 2840 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) 2841 return NULL; 2842#endif 2843 2844 inst = (sljit_u8*)ensure_buf(compiler, 2); 2845 PTR_FAIL_IF(!inst); 2846 2847 *inst++ = 0; 2848 *inst++ = 1; 2849 2850#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2851 if (dst & SLJIT_MEM) 2852 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) 2853 return NULL; 2854#endif 2855 2856 return const_; 2857} 2858 2859SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) 2860{ 2861 SLJIT_UNUSED_ARG(executable_offset); 2862#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2863 sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset); 2864#else 2865 sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target); 2866#endif 2867} 2868 2869SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) 2870{ 2871 SLJIT_UNUSED_ARG(executable_offset); 2872 sljit_unaligned_store_sw((void*)addr, new_constant); 2873} 2874 2875SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void) 2876{ 2877#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 2878 if (cpu_has_sse2 == -1) 2879 get_cpu_features(); 2880 return cpu_has_sse2; 2881#else 2882 return 1; 2883#endif 2884} 2885 2886SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void) 2887{ 2888 if (cpu_has_cmov == -1) 2889 get_cpu_features(); 2890 return cpu_has_cmov; 2891} 2892 2893SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler, 2894 sljit_s32 type, 2895 sljit_s32 dst_reg, 2896 sljit_s32 src, sljit_sw srcw) 2897{ 2898 sljit_u8* inst; 2899 2900 CHECK_ERROR(); 2901#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2902 CHECK_ARGUMENT(sljit_x86_is_cmov_available()); 2903 CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); 2904 CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); 2905 CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP)); 2906 FUNCTION_CHECK_SRC(src, srcw); 2907 2908 if ((type & 0xff) <= SLJIT_NOT_ZERO) 2909 CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); 2910 else 2911 CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff)); 2912#endif 2913#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) 2914 if (SLJIT_UNLIKELY(!!compiler->verbose)) { 2915 fprintf(compiler->verbose, " x86_cmov%s %s%s, ", 2916 !(dst_reg & SLJIT_I32_OP) ? "" : ".i", 2917 jump_names[type & 0xff], JUMP_POSTFIX(type)); 2918 sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP); 2919 fprintf(compiler->verbose, ", "); 2920 sljit_verbose_param(compiler, src, srcw); 2921 fprintf(compiler->verbose, "\n"); 2922 } 2923#endif 2924 2925 ADJUST_LOCAL_OFFSET(src, srcw); 2926 CHECK_EXTRA_REGS(src, srcw, (void)0); 2927 2928#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2929 compiler->mode32 = dst_reg & SLJIT_I32_OP; 2930#endif 2931 dst_reg &= ~SLJIT_I32_OP; 2932 2933 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { 2934 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); 2935 src = TMP_REG1; 2936 srcw = 0; 2937 } 2938 2939 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); 2940 FAIL_IF(!inst); 2941 *inst++ = GROUP_0F; 2942 *inst = get_jump_code(type & 0xff) - 0x40; 2943 return SLJIT_SUCCESS; 2944} 2945