1/* $NetBSD: sljitNativeTILEGX_64.c,v 1.4 2019/01/20 23:14:16 alnsn Exp $ */ 2 3/* 4 * Stack-less Just-In-Time compiler 5 * 6 * Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved. 7 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without modification, are 10 * permitted provided that the following conditions are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright notice, this list of 13 * conditions and the following disclaimer. 14 * 15 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 16 * of conditions and the following disclaimer in the documentation and/or other materials 17 * provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 22 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 24 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 25 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30/* TileGX architecture. */ 31/* Contributed by Tilera Corporation. */ 32#include "sljitNativeTILEGX-encoder.c" 33 34#define SIMM_8BIT_MAX (0x7f) 35#define SIMM_8BIT_MIN (-0x80) 36#define SIMM_16BIT_MAX (0x7fff) 37#define SIMM_16BIT_MIN (-0x8000) 38#define SIMM_17BIT_MAX (0xffff) 39#define SIMM_17BIT_MIN (-0x10000) 40#define SIMM_32BIT_MAX (0x7fffffff) 41#define SIMM_32BIT_MIN (-0x7fffffff - 1) 42#define SIMM_48BIT_MAX (0x7fffffff0000L) 43#define SIMM_48BIT_MIN (-0x800000000000L) 44#define IMM16(imm) ((imm) & 0xffff) 45 46#define UIMM_16BIT_MAX (0xffff) 47 48#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 49#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) 50#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) 51#define ADDR_TMP (SLJIT_NUMBER_OF_REGISTERS + 5) 52#define PIC_ADDR_REG TMP_REG2 53 54static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { 55 63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7 56}; 57 58#define SLJIT_LOCALS_REG_mapped 54 59#define TMP_REG1_mapped 5 60#define TMP_REG2_mapped 16 61#define TMP_REG3_mapped 6 62#define ADDR_TMP_mapped 7 63 64/* Flags are keept in volatile registers. */ 65#define EQUAL_FLAG 8 66/* And carry flag as well. */ 67#define ULESS_FLAG 9 68#define UGREATER_FLAG 10 69#define LESS_FLAG 11 70#define GREATER_FLAG 12 71#define OVERFLOW_FLAG 13 72 73#define ZERO 63 74#define RA 55 75#define TMP_EREG1 14 76#define TMP_EREG2 15 77 78#define LOAD_DATA 0x01 79#define WORD_DATA 0x00 80#define BYTE_DATA 0x02 81#define HALF_DATA 0x04 82#define INT_DATA 0x06 83#define SIGNED_DATA 0x08 84#define DOUBLE_DATA 0x10 85 86/* Separates integer and floating point registers */ 87#define GPR_REG 0xf 88 89#define MEM_MASK 0x1f 90 91#define WRITE_BACK 0x00020 92#define ARG_TEST 0x00040 93#define ALT_KEEP_CACHE 0x00080 94#define CUMULATIVE_OP 0x00100 95#define LOGICAL_OP 0x00200 96#define IMM_OP 0x00400 97#define SRC2_IMM 0x00800 98 99#define UNUSED_DEST 0x01000 100#define REG_DEST 0x02000 101#define REG1_SOURCE 0x04000 102#define REG2_SOURCE 0x08000 103#define SLOW_SRC1 0x10000 104#define SLOW_SRC2 0x20000 105#define SLOW_DEST 0x40000 106 107/* Only these flags are set. UNUSED_DEST is not set when no flags should be set. 108 */ 109#define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list)))) 110 111SLJIT_API_FUNC_ATTRIBUTE const char *sljit_get_platform_name(void) 112{ 113 return "TileGX" SLJIT_CPUINFO; 114} 115 116/* Length of an instruction word */ 117typedef sljit_uw sljit_ins; 118 119struct jit_instr { 120 const struct tilegx_opcode* opcode; 121 tilegx_pipeline pipe; 122 unsigned long input_registers; 123 unsigned long output_registers; 124 int operand_value[4]; 125 int line; 126}; 127 128/* Opcode Helper Macros */ 129#define TILEGX_X_MODE 0 130 131#define X_MODE create_Mode(TILEGX_X_MODE) 132 133#define FNOP_X0 \ 134 create_Opcode_X0(RRR_0_OPCODE_X0) | \ 135 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ 136 create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) 137 138#define FNOP_X1 \ 139 create_Opcode_X1(RRR_0_OPCODE_X1) | \ 140 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 141 create_UnaryOpcodeExtension_X1(FNOP_UNARY_OPCODE_X1) 142 143#define NOP \ 144 create_Mode(TILEGX_X_MODE) | FNOP_X0 | FNOP_X1 145 146#define ANOP_X0 \ 147 create_Opcode_X0(RRR_0_OPCODE_X0) | \ 148 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ 149 create_UnaryOpcodeExtension_X0(NOP_UNARY_OPCODE_X0) 150 151#define BPT create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 152 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 153 create_UnaryOpcodeExtension_X1(ILL_UNARY_OPCODE_X1) | \ 154 create_Dest_X1(0x1C) | create_SrcA_X1(0x25) | ANOP_X0 155 156#define ADD_X1 \ 157 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 158 create_RRROpcodeExtension_X1(ADD_RRR_0_OPCODE_X1) | FNOP_X0 159 160#define ADDI_X1 \ 161 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 162 create_Imm8OpcodeExtension_X1(ADDI_IMM8_OPCODE_X1) | FNOP_X0 163 164#define SUB_X1 \ 165 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 166 create_RRROpcodeExtension_X1(SUB_RRR_0_OPCODE_X1) | FNOP_X0 167 168#define NOR_X1 \ 169 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 170 create_RRROpcodeExtension_X1(NOR_RRR_0_OPCODE_X1) | FNOP_X0 171 172#define OR_X1 \ 173 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 174 create_RRROpcodeExtension_X1(OR_RRR_0_OPCODE_X1) | FNOP_X0 175 176#define AND_X1 \ 177 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 178 create_RRROpcodeExtension_X1(AND_RRR_0_OPCODE_X1) | FNOP_X0 179 180#define XOR_X1 \ 181 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 182 create_RRROpcodeExtension_X1(XOR_RRR_0_OPCODE_X1) | FNOP_X0 183 184#define CMOVNEZ_X0 \ 185 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ 186 create_RRROpcodeExtension_X0(CMOVNEZ_RRR_0_OPCODE_X0) | FNOP_X1 187 188#define CMOVEQZ_X0 \ 189 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ 190 create_RRROpcodeExtension_X0(CMOVEQZ_RRR_0_OPCODE_X0) | FNOP_X1 191 192#define ADDLI_X1 \ 193 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(ADDLI_OPCODE_X1) | FNOP_X0 194 195#define V4INT_L_X1 \ 196 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 197 create_RRROpcodeExtension_X1(V4INT_L_RRR_0_OPCODE_X1) | FNOP_X0 198 199#define BFEXTU_X0 \ 200 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \ 201 create_BFOpcodeExtension_X0(BFEXTU_BF_OPCODE_X0) | FNOP_X1 202 203#define BFEXTS_X0 \ 204 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \ 205 create_BFOpcodeExtension_X0(BFEXTS_BF_OPCODE_X0) | FNOP_X1 206 207#define SHL16INSLI_X1 \ 208 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHL16INSLI_OPCODE_X1) | FNOP_X0 209 210#define ST_X1 \ 211 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 212 create_RRROpcodeExtension_X1(ST_RRR_0_OPCODE_X1) | create_Dest_X1(0x0) | FNOP_X0 213 214#define LD_X1 \ 215 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 216 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 217 create_UnaryOpcodeExtension_X1(LD_UNARY_OPCODE_X1) | FNOP_X0 218 219#define JR_X1 \ 220 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 221 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 222 create_UnaryOpcodeExtension_X1(JR_UNARY_OPCODE_X1) | FNOP_X0 223 224#define JALR_X1 \ 225 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 226 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 227 create_UnaryOpcodeExtension_X1(JALR_UNARY_OPCODE_X1) | FNOP_X0 228 229#define CLZ_X0 \ 230 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ 231 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ 232 create_UnaryOpcodeExtension_X0(CNTLZ_UNARY_OPCODE_X0) | FNOP_X1 233 234#define CMPLTUI_X1 \ 235 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 236 create_Imm8OpcodeExtension_X1(CMPLTUI_IMM8_OPCODE_X1) | FNOP_X0 237 238#define CMPLTU_X1 \ 239 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 240 create_RRROpcodeExtension_X1(CMPLTU_RRR_0_OPCODE_X1) | FNOP_X0 241 242#define CMPLTS_X1 \ 243 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 244 create_RRROpcodeExtension_X1(CMPLTS_RRR_0_OPCODE_X1) | FNOP_X0 245 246#define XORI_X1 \ 247 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 248 create_Imm8OpcodeExtension_X1(XORI_IMM8_OPCODE_X1) | FNOP_X0 249 250#define ORI_X1 \ 251 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 252 create_Imm8OpcodeExtension_X1(ORI_IMM8_OPCODE_X1) | FNOP_X0 253 254#define ANDI_X1 \ 255 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 256 create_Imm8OpcodeExtension_X1(ANDI_IMM8_OPCODE_X1) | FNOP_X0 257 258#define SHLI_X1 \ 259 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ 260 create_ShiftOpcodeExtension_X1(SHLI_SHIFT_OPCODE_X1) | FNOP_X0 261 262#define SHL_X1 \ 263 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 264 create_RRROpcodeExtension_X1(SHL_RRR_0_OPCODE_X1) | FNOP_X0 265 266#define SHRSI_X1 \ 267 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ 268 create_ShiftOpcodeExtension_X1(SHRSI_SHIFT_OPCODE_X1) | FNOP_X0 269 270#define SHRS_X1 \ 271 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 272 create_RRROpcodeExtension_X1(SHRS_RRR_0_OPCODE_X1) | FNOP_X0 273 274#define SHRUI_X1 \ 275 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ 276 create_ShiftOpcodeExtension_X1(SHRUI_SHIFT_OPCODE_X1) | FNOP_X0 277 278#define SHRU_X1 \ 279 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 280 create_RRROpcodeExtension_X1(SHRU_RRR_0_OPCODE_X1) | FNOP_X0 281 282#define BEQZ_X1 \ 283 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \ 284 create_BrType_X1(BEQZ_BRANCH_OPCODE_X1) | FNOP_X0 285 286#define BNEZ_X1 \ 287 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \ 288 create_BrType_X1(BNEZ_BRANCH_OPCODE_X1) | FNOP_X0 289 290#define J_X1 \ 291 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \ 292 create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | FNOP_X0 293 294#define JAL_X1 \ 295 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \ 296 create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | FNOP_X0 297 298#define DEST_X0(x) create_Dest_X0(x) 299#define SRCA_X0(x) create_SrcA_X0(x) 300#define SRCB_X0(x) create_SrcB_X0(x) 301#define DEST_X1(x) create_Dest_X1(x) 302#define SRCA_X1(x) create_SrcA_X1(x) 303#define SRCB_X1(x) create_SrcB_X1(x) 304#define IMM16_X1(x) create_Imm16_X1(x) 305#define IMM8_X1(x) create_Imm8_X1(x) 306#define BFSTART_X0(x) create_BFStart_X0(x) 307#define BFEND_X0(x) create_BFEnd_X0(x) 308#define SHIFTIMM_X1(x) create_ShAmt_X1(x) 309#define JOFF_X1(x) create_JumpOff_X1(x) 310#define BOFF_X1(x) create_BrOff_X1(x) 311 312static const tilegx_mnemonic data_transfer_insts[16] = { 313 /* u w s */ TILEGX_OPC_ST /* st */, 314 /* u w l */ TILEGX_OPC_LD /* ld */, 315 /* u b s */ TILEGX_OPC_ST1 /* st1 */, 316 /* u b l */ TILEGX_OPC_LD1U /* ld1u */, 317 /* u h s */ TILEGX_OPC_ST2 /* st2 */, 318 /* u h l */ TILEGX_OPC_LD2U /* ld2u */, 319 /* u i s */ TILEGX_OPC_ST4 /* st4 */, 320 /* u i l */ TILEGX_OPC_LD4U /* ld4u */, 321 /* s w s */ TILEGX_OPC_ST /* st */, 322 /* s w l */ TILEGX_OPC_LD /* ld */, 323 /* s b s */ TILEGX_OPC_ST1 /* st1 */, 324 /* s b l */ TILEGX_OPC_LD1S /* ld1s */, 325 /* s h s */ TILEGX_OPC_ST2 /* st2 */, 326 /* s h l */ TILEGX_OPC_LD2S /* ld2s */, 327 /* s i s */ TILEGX_OPC_ST4 /* st4 */, 328 /* s i l */ TILEGX_OPC_LD4S /* ld4s */, 329}; 330 331#ifdef TILEGX_JIT_DEBUG 332static sljit_s32 push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line) 333{ 334 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); 335 FAIL_IF(!ptr); 336 *ptr = ins; 337 compiler->size++; 338 printf("|%04d|S0|:\t\t", line); 339 print_insn_tilegx(ptr); 340 return SLJIT_SUCCESS; 341} 342 343static sljit_s32 push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins) 344{ 345 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); 346 FAIL_IF(!ptr); 347 *ptr = ins; 348 compiler->size++; 349 return SLJIT_SUCCESS; 350} 351 352#define push_inst(a, b) push_inst_debug(a, b, __LINE__) 353#else 354static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) 355{ 356 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); 357 FAIL_IF(!ptr); 358 *ptr = ins; 359 compiler->size++; 360 return SLJIT_SUCCESS; 361} 362#endif 363 364#define BUNDLE_FORMAT_MASK(p0, p1, p2) \ 365 ((p0) | ((p1) << 8) | ((p2) << 16)) 366 367#define BUNDLE_FORMAT(p0, p1, p2) \ 368 { \ 369 { \ 370 (tilegx_pipeline)(p0), \ 371 (tilegx_pipeline)(p1), \ 372 (tilegx_pipeline)(p2) \ 373 }, \ 374 BUNDLE_FORMAT_MASK(1 << (p0), 1 << (p1), (1 << (p2))) \ 375 } 376 377#define NO_PIPELINE TILEGX_NUM_PIPELINE_ENCODINGS 378 379#define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1) 380 381#define PI(encoding) \ 382 push_inst(compiler, encoding) 383 384#define PB3(opcode, dst, srca, srcb) \ 385 push_3_buffer(compiler, opcode, dst, srca, srcb, __LINE__) 386 387#define PB2(opcode, dst, src) \ 388 push_2_buffer(compiler, opcode, dst, src, __LINE__) 389 390#define JR(reg) \ 391 push_jr_buffer(compiler, TILEGX_OPC_JR, reg, __LINE__) 392 393#define ADD(dst, srca, srcb) \ 394 push_3_buffer(compiler, TILEGX_OPC_ADD, dst, srca, srcb, __LINE__) 395 396#define SUB(dst, srca, srcb) \ 397 push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__) 398 399#define MUL(dst, srca, srcb) \ 400 push_3_buffer(compiler, TILEGX_OPC_MULX, dst, srca, srcb, __LINE__) 401 402#define NOR(dst, srca, srcb) \ 403 push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__) 404 405#define OR(dst, srca, srcb) \ 406 push_3_buffer(compiler, TILEGX_OPC_OR, dst, srca, srcb, __LINE__) 407 408#define XOR(dst, srca, srcb) \ 409 push_3_buffer(compiler, TILEGX_OPC_XOR, dst, srca, srcb, __LINE__) 410 411#define AND(dst, srca, srcb) \ 412 push_3_buffer(compiler, TILEGX_OPC_AND, dst, srca, srcb, __LINE__) 413 414#define CLZ(dst, src) \ 415 push_2_buffer(compiler, TILEGX_OPC_CLZ, dst, src, __LINE__) 416 417#define SHLI(dst, srca, srcb) \ 418 push_3_buffer(compiler, TILEGX_OPC_SHLI, dst, srca, srcb, __LINE__) 419 420#define SHRUI(dst, srca, imm) \ 421 push_3_buffer(compiler, TILEGX_OPC_SHRUI, dst, srca, imm, __LINE__) 422 423#define XORI(dst, srca, imm) \ 424 push_3_buffer(compiler, TILEGX_OPC_XORI, dst, srca, imm, __LINE__) 425 426#define ORI(dst, srca, imm) \ 427 push_3_buffer(compiler, TILEGX_OPC_ORI, dst, srca, imm, __LINE__) 428 429#define CMPLTU(dst, srca, srcb) \ 430 push_3_buffer(compiler, TILEGX_OPC_CMPLTU, dst, srca, srcb, __LINE__) 431 432#define CMPLTS(dst, srca, srcb) \ 433 push_3_buffer(compiler, TILEGX_OPC_CMPLTS, dst, srca, srcb, __LINE__) 434 435#define CMPLTUI(dst, srca, imm) \ 436 push_3_buffer(compiler, TILEGX_OPC_CMPLTUI, dst, srca, imm, __LINE__) 437 438#define CMOVNEZ(dst, srca, srcb) \ 439 push_3_buffer(compiler, TILEGX_OPC_CMOVNEZ, dst, srca, srcb, __LINE__) 440 441#define CMOVEQZ(dst, srca, srcb) \ 442 push_3_buffer(compiler, TILEGX_OPC_CMOVEQZ, dst, srca, srcb, __LINE__) 443 444#define ADDLI(dst, srca, srcb) \ 445 push_3_buffer(compiler, TILEGX_OPC_ADDLI, dst, srca, srcb, __LINE__) 446 447#define SHL16INSLI(dst, srca, srcb) \ 448 push_3_buffer(compiler, TILEGX_OPC_SHL16INSLI, dst, srca, srcb, __LINE__) 449 450#define LD_ADD(dst, addr, adjust) \ 451 push_3_buffer(compiler, TILEGX_OPC_LD_ADD, dst, addr, adjust, __LINE__) 452 453#define ST_ADD(src, addr, adjust) \ 454 push_3_buffer(compiler, TILEGX_OPC_ST_ADD, src, addr, adjust, __LINE__) 455 456#define LD(dst, addr) \ 457 push_2_buffer(compiler, TILEGX_OPC_LD, dst, addr, __LINE__) 458 459#define BFEXTU(dst, src, start, end) \ 460 push_4_buffer(compiler, TILEGX_OPC_BFEXTU, dst, src, start, end, __LINE__) 461 462#define BFEXTS(dst, src, start, end) \ 463 push_4_buffer(compiler, TILEGX_OPC_BFEXTS, dst, src, start, end, __LINE__) 464 465#define ADD_SOLO(dest, srca, srcb) \ 466 push_inst(compiler, ADD_X1 | DEST_X1(dest) | SRCA_X1(srca) | SRCB_X1(srcb)) 467 468#define ADDI_SOLO(dest, srca, imm) \ 469 push_inst(compiler, ADDI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM8_X1(imm)) 470 471#define ADDLI_SOLO(dest, srca, imm) \ 472 push_inst(compiler, ADDLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm)) 473 474#define SHL16INSLI_SOLO(dest, srca, imm) \ 475 push_inst(compiler, SHL16INSLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm)) 476 477#define JALR_SOLO(reg) \ 478 push_inst(compiler, JALR_X1 | SRCA_X1(reg)) 479 480#define JR_SOLO(reg) \ 481 push_inst(compiler, JR_X1 | SRCA_X1(reg)) 482 483struct Format { 484 /* Mapping of bundle issue slot to assigned pipe. */ 485 tilegx_pipeline pipe[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]; 486 487 /* Mask of pipes used by this bundle. */ 488 unsigned int pipe_mask; 489}; 490 491const struct Format formats[] = 492{ 493 /* In Y format we must always have something in Y2, since it has 494 * no fnop, so this conveys that Y2 must always be used. */ 495 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, NO_PIPELINE), 496 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, NO_PIPELINE), 497 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, NO_PIPELINE), 498 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, NO_PIPELINE), 499 500 /* Y format has three instructions. */ 501 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2), 502 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1), 503 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2), 504 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0), 505 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1), 506 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0), 507 508 /* X format has only two instructions. */ 509 BUNDLE_FORMAT(TILEGX_PIPELINE_X0, TILEGX_PIPELINE_X1, NO_PIPELINE), 510 BUNDLE_FORMAT(TILEGX_PIPELINE_X1, TILEGX_PIPELINE_X0, NO_PIPELINE) 511}; 512 513 514struct jit_instr inst_buf[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]; 515unsigned long inst_buf_index; 516 517tilegx_pipeline get_any_valid_pipe(const struct tilegx_opcode* opcode) 518{ 519 /* FIXME: tile: we could pregenerate this. */ 520 int pipe; 521 for (pipe = 0; ((opcode->pipes & (1 << pipe)) == 0 && pipe < TILEGX_NUM_PIPELINE_ENCODINGS); pipe++) 522 ; 523 return (tilegx_pipeline)(pipe); 524} 525 526void insert_nop(tilegx_mnemonic opc, int line) 527{ 528 const struct tilegx_opcode* opcode = NULL; 529 530 memmove(&inst_buf[1], &inst_buf[0], inst_buf_index * sizeof inst_buf[0]); 531 532 opcode = &tilegx_opcodes[opc]; 533 inst_buf[0].opcode = opcode; 534 inst_buf[0].pipe = get_any_valid_pipe(opcode); 535 inst_buf[0].input_registers = 0; 536 inst_buf[0].output_registers = 0; 537 inst_buf[0].line = line; 538 ++inst_buf_index; 539} 540 541const struct Format* compute_format() 542{ 543 unsigned int compatible_pipes = BUNDLE_FORMAT_MASK( 544 inst_buf[0].opcode->pipes, 545 inst_buf[1].opcode->pipes, 546 (inst_buf_index == 3 ? inst_buf[2].opcode->pipes : (1 << NO_PIPELINE))); 547 548 const struct Format* match = NULL; 549 const struct Format *b = NULL; 550 unsigned int i; 551 for (i = 0; i < sizeof formats / sizeof formats[0]; i++) { 552 b = &formats[i]; 553 if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) { 554 match = b; 555 break; 556 } 557 } 558 559 return match; 560} 561 562sljit_s32 assign_pipes() 563{ 564 unsigned long output_registers = 0; 565 unsigned int i = 0; 566 567 if (inst_buf_index == 1) { 568 tilegx_mnemonic opc = inst_buf[0].opcode->can_bundle 569 ? TILEGX_OPC_FNOP : TILEGX_OPC_NOP; 570 insert_nop(opc, __LINE__); 571 } 572 573 const struct Format* match = compute_format(); 574 575 if (match == NULL) 576 return -1; 577 578 for (i = 0; i < inst_buf_index; i++) { 579 580 if ((i > 0) && ((inst_buf[i].input_registers & output_registers) != 0)) 581 return -1; 582 583 if ((i > 0) && ((inst_buf[i].output_registers & output_registers) != 0)) 584 return -1; 585 586 /* Don't include Rzero in the match set, to avoid triggering 587 needlessly on 'prefetch' instrs. */ 588 589 output_registers |= inst_buf[i].output_registers & 0xFFFFFFFFFFFFFFL; 590 591 inst_buf[i].pipe = match->pipe[i]; 592 } 593 594 /* If only 2 instrs, and in Y-mode, insert a nop. */ 595 if (inst_buf_index == 2 && !tilegx_is_x_pipeline(match->pipe[0])) { 596 insert_nop(TILEGX_OPC_FNOP, __LINE__); 597 598 /* Select the yet unassigned pipe. */ 599 tilegx_pipeline pipe = (tilegx_pipeline)(((TILEGX_PIPELINE_Y0 600 + TILEGX_PIPELINE_Y1 + TILEGX_PIPELINE_Y2) 601 - (inst_buf[1].pipe + inst_buf[2].pipe))); 602 603 inst_buf[0].pipe = pipe; 604 } 605 606 return 0; 607} 608 609tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst) 610{ 611 int i, val; 612 const struct tilegx_opcode* opcode = inst->opcode; 613 tilegx_bundle_bits bits = opcode->fixed_bit_values[inst->pipe]; 614 615 const struct tilegx_operand* operand = NULL; 616 for (i = 0; i < opcode->num_operands; i++) { 617 operand = &tilegx_operands[opcode->operands[inst->pipe][i]]; 618 val = inst->operand_value[i]; 619 620 bits |= operand->insert(val); 621 } 622 623 return bits; 624} 625 626static sljit_s32 update_buffer(struct sljit_compiler *compiler) 627{ 628 int i; 629 int orig_index = inst_buf_index; 630 struct jit_instr inst0 = inst_buf[0]; 631 struct jit_instr inst1 = inst_buf[1]; 632 struct jit_instr inst2 = inst_buf[2]; 633 tilegx_bundle_bits bits = 0; 634 635 /* If the bundle is valid as is, perform the encoding and return 1. */ 636 if (assign_pipes() == 0) { 637 for (i = 0; i < inst_buf_index; i++) { 638 bits |= get_bundle_bit(inst_buf + i); 639#ifdef TILEGX_JIT_DEBUG 640 printf("|%04d", inst_buf[i].line); 641#endif 642 } 643#ifdef TILEGX_JIT_DEBUG 644 if (inst_buf_index == 3) 645 printf("|M0|:\t"); 646 else 647 printf("|M0|:\t\t"); 648 print_insn_tilegx(&bits); 649#endif 650 651 inst_buf_index = 0; 652 653#ifdef TILEGX_JIT_DEBUG 654 return push_inst_nodebug(compiler, bits); 655#else 656 return push_inst(compiler, bits); 657#endif 658 } 659 660 /* If the bundle is invalid, split it in two. First encode the first two 661 (or possibly 1) instructions, and then the last, separately. Note that 662 assign_pipes may have re-ordered the instrs (by inserting no-ops in 663 lower slots) so we need to reset them. */ 664 665 inst_buf_index = orig_index - 1; 666 inst_buf[0] = inst0; 667 inst_buf[1] = inst1; 668 inst_buf[2] = inst2; 669 if (assign_pipes() == 0) { 670 for (i = 0; i < inst_buf_index; i++) { 671 bits |= get_bundle_bit(inst_buf + i); 672#ifdef TILEGX_JIT_DEBUG 673 printf("|%04d", inst_buf[i].line); 674#endif 675 } 676 677#ifdef TILEGX_JIT_DEBUG 678 if (inst_buf_index == 3) 679 printf("|M1|:\t"); 680 else 681 printf("|M1|:\t\t"); 682 print_insn_tilegx(&bits); 683#endif 684 685 if ((orig_index - 1) == 2) { 686 inst_buf[0] = inst2; 687 inst_buf_index = 1; 688 } else if ((orig_index - 1) == 1) { 689 inst_buf[0] = inst1; 690 inst_buf_index = 1; 691 } else 692 SLJIT_UNREACHABLE(); 693 694#ifdef TILEGX_JIT_DEBUG 695 return push_inst_nodebug(compiler, bits); 696#else 697 return push_inst(compiler, bits); 698#endif 699 } else { 700 /* We had 3 instrs of which the first 2 can't live in the same bundle. 701 Split those two. Note that we don't try to then combine the second 702 and third instr into a single bundle. First instruction: */ 703 inst_buf_index = 1; 704 inst_buf[0] = inst0; 705 inst_buf[1] = inst1; 706 inst_buf[2] = inst2; 707 if (assign_pipes() == 0) { 708 for (i = 0; i < inst_buf_index; i++) { 709 bits |= get_bundle_bit(inst_buf + i); 710#ifdef TILEGX_JIT_DEBUG 711 printf("|%04d", inst_buf[i].line); 712#endif 713 } 714 715#ifdef TILEGX_JIT_DEBUG 716 if (inst_buf_index == 3) 717 printf("|M2|:\t"); 718 else 719 printf("|M2|:\t\t"); 720 print_insn_tilegx(&bits); 721#endif 722 723 inst_buf[0] = inst1; 724 inst_buf[1] = inst2; 725 inst_buf_index = orig_index - 1; 726#ifdef TILEGX_JIT_DEBUG 727 return push_inst_nodebug(compiler, bits); 728#else 729 return push_inst(compiler, bits); 730#endif 731 } else 732 SLJIT_UNREACHABLE(); 733 } 734 735 SLJIT_UNREACHABLE(); 736} 737 738static sljit_s32 flush_buffer(struct sljit_compiler *compiler) 739{ 740 while (inst_buf_index != 0) { 741 FAIL_IF(update_buffer(compiler)); 742 } 743 return SLJIT_SUCCESS; 744} 745 746static sljit_s32 push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line) 747{ 748 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 749 FAIL_IF(update_buffer(compiler)); 750 751 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 752 inst_buf[inst_buf_index].opcode = opcode; 753 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 754 inst_buf[inst_buf_index].operand_value[0] = op0; 755 inst_buf[inst_buf_index].operand_value[1] = op1; 756 inst_buf[inst_buf_index].operand_value[2] = op2; 757 inst_buf[inst_buf_index].operand_value[3] = op3; 758 inst_buf[inst_buf_index].input_registers = 1L << op1; 759 inst_buf[inst_buf_index].output_registers = 1L << op0; 760 inst_buf[inst_buf_index].line = line; 761 inst_buf_index++; 762 763 return SLJIT_SUCCESS; 764} 765 766static sljit_s32 push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line) 767{ 768 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 769 FAIL_IF(update_buffer(compiler)); 770 771 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 772 inst_buf[inst_buf_index].opcode = opcode; 773 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 774 inst_buf[inst_buf_index].operand_value[0] = op0; 775 inst_buf[inst_buf_index].operand_value[1] = op1; 776 inst_buf[inst_buf_index].operand_value[2] = op2; 777 inst_buf[inst_buf_index].line = line; 778 779 switch (opc) { 780 case TILEGX_OPC_ST_ADD: 781 inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1); 782 inst_buf[inst_buf_index].output_registers = 1L << op0; 783 break; 784 case TILEGX_OPC_LD_ADD: 785 inst_buf[inst_buf_index].input_registers = 1L << op1; 786 inst_buf[inst_buf_index].output_registers = (1L << op0) | (1L << op1); 787 break; 788 case TILEGX_OPC_ADD: 789 case TILEGX_OPC_AND: 790 case TILEGX_OPC_SUB: 791 case TILEGX_OPC_MULX: 792 case TILEGX_OPC_OR: 793 case TILEGX_OPC_XOR: 794 case TILEGX_OPC_NOR: 795 case TILEGX_OPC_SHL: 796 case TILEGX_OPC_SHRU: 797 case TILEGX_OPC_SHRS: 798 case TILEGX_OPC_CMPLTU: 799 case TILEGX_OPC_CMPLTS: 800 case TILEGX_OPC_CMOVEQZ: 801 case TILEGX_OPC_CMOVNEZ: 802 inst_buf[inst_buf_index].input_registers = (1L << op1) | (1L << op2); 803 inst_buf[inst_buf_index].output_registers = 1L << op0; 804 break; 805 case TILEGX_OPC_ADDLI: 806 case TILEGX_OPC_XORI: 807 case TILEGX_OPC_ORI: 808 case TILEGX_OPC_SHLI: 809 case TILEGX_OPC_SHRUI: 810 case TILEGX_OPC_SHRSI: 811 case TILEGX_OPC_SHL16INSLI: 812 case TILEGX_OPC_CMPLTUI: 813 case TILEGX_OPC_CMPLTSI: 814 inst_buf[inst_buf_index].input_registers = 1L << op1; 815 inst_buf[inst_buf_index].output_registers = 1L << op0; 816 break; 817 default: 818 printf("unrecoginzed opc: %s\n", opcode->name); 819 SLJIT_UNREACHABLE(); 820 } 821 822 inst_buf_index++; 823 824 return SLJIT_SUCCESS; 825} 826 827static sljit_s32 push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line) 828{ 829 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 830 FAIL_IF(update_buffer(compiler)); 831 832 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 833 inst_buf[inst_buf_index].opcode = opcode; 834 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 835 inst_buf[inst_buf_index].operand_value[0] = op0; 836 inst_buf[inst_buf_index].operand_value[1] = op1; 837 inst_buf[inst_buf_index].line = line; 838 839 switch (opc) { 840 case TILEGX_OPC_BEQZ: 841 case TILEGX_OPC_BNEZ: 842 inst_buf[inst_buf_index].input_registers = 1L << op0; 843 break; 844 case TILEGX_OPC_ST: 845 case TILEGX_OPC_ST1: 846 case TILEGX_OPC_ST2: 847 case TILEGX_OPC_ST4: 848 inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1); 849 inst_buf[inst_buf_index].output_registers = 0; 850 break; 851 case TILEGX_OPC_CLZ: 852 case TILEGX_OPC_LD: 853 case TILEGX_OPC_LD1U: 854 case TILEGX_OPC_LD1S: 855 case TILEGX_OPC_LD2U: 856 case TILEGX_OPC_LD2S: 857 case TILEGX_OPC_LD4U: 858 case TILEGX_OPC_LD4S: 859 inst_buf[inst_buf_index].input_registers = 1L << op1; 860 inst_buf[inst_buf_index].output_registers = 1L << op0; 861 break; 862 default: 863 printf("unrecoginzed opc: %s\n", opcode->name); 864 SLJIT_UNREACHABLE(); 865 } 866 867 inst_buf_index++; 868 869 return SLJIT_SUCCESS; 870} 871 872static sljit_s32 push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line) 873{ 874 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 875 FAIL_IF(update_buffer(compiler)); 876 877 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 878 inst_buf[inst_buf_index].opcode = opcode; 879 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 880 inst_buf[inst_buf_index].input_registers = 0; 881 inst_buf[inst_buf_index].output_registers = 0; 882 inst_buf[inst_buf_index].line = line; 883 inst_buf_index++; 884 885 return SLJIT_SUCCESS; 886} 887 888static sljit_s32 push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line) 889{ 890 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 891 FAIL_IF(update_buffer(compiler)); 892 893 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 894 inst_buf[inst_buf_index].opcode = opcode; 895 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 896 inst_buf[inst_buf_index].operand_value[0] = op0; 897 inst_buf[inst_buf_index].input_registers = 1L << op0; 898 inst_buf[inst_buf_index].output_registers = 0; 899 inst_buf[inst_buf_index].line = line; 900 inst_buf_index++; 901 902 return flush_buffer(compiler); 903} 904 905static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) 906{ 907 sljit_sw diff; 908 sljit_uw target_addr; 909 sljit_ins *inst; 910 911 if (jump->flags & SLJIT_REWRITABLE_JUMP) 912 return code_ptr; 913 914 if (jump->flags & JUMP_ADDR) 915 target_addr = jump->u.target; 916 else { 917 SLJIT_ASSERT(jump->flags & JUMP_LABEL); 918 target_addr = (sljit_uw)(code + jump->u.label->size); 919 } 920 921 inst = (sljit_ins *)jump->addr; 922 if (jump->flags & IS_COND) 923 inst--; 924 925 diff = ((sljit_sw) target_addr - (sljit_sw) inst) >> 3; 926 if (diff <= SIMM_17BIT_MAX && diff >= SIMM_17BIT_MIN) { 927 jump->flags |= PATCH_B; 928 929 if (!(jump->flags & IS_COND)) { 930 if (jump->flags & IS_JAL) { 931 jump->flags &= ~(PATCH_B); 932 jump->flags |= PATCH_J; 933 inst[0] = JAL_X1; 934 935#ifdef TILEGX_JIT_DEBUG 936 printf("[runtime relocate]%04d:\t", __LINE__); 937 print_insn_tilegx(inst); 938#endif 939 } else { 940 inst[0] = BEQZ_X1 | SRCA_X1(ZERO); 941 942#ifdef TILEGX_JIT_DEBUG 943 printf("[runtime relocate]%04d:\t", __LINE__); 944 print_insn_tilegx(inst); 945#endif 946 } 947 948 return inst; 949 } 950 951 inst[0] = inst[0] ^ (0x7L << 55); 952 953#ifdef TILEGX_JIT_DEBUG 954 printf("[runtime relocate]%04d:\t", __LINE__); 955 print_insn_tilegx(inst); 956#endif 957 jump->addr -= sizeof(sljit_ins); 958 return inst; 959 } 960 961 if (jump->flags & IS_COND) { 962 if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) { 963 jump->flags |= PATCH_J; 964 inst[0] = (inst[0] & ~(BOFF_X1(-1))) | BOFF_X1(2); 965 inst[1] = J_X1; 966 return inst + 1; 967 } 968 969 return code_ptr; 970 } 971 972 if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) { 973 jump->flags |= PATCH_J; 974 975 if (jump->flags & IS_JAL) { 976 inst[0] = JAL_X1; 977 978#ifdef TILEGX_JIT_DEBUG 979 printf("[runtime relocate]%04d:\t", __LINE__); 980 print_insn_tilegx(inst); 981#endif 982 983 } else { 984 inst[0] = J_X1; 985 986#ifdef TILEGX_JIT_DEBUG 987 printf("[runtime relocate]%04d:\t", __LINE__); 988 print_insn_tilegx(inst); 989#endif 990 } 991 992 return inst; 993 } 994 995 return code_ptr; 996} 997 998SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler) 999{ 1000 struct sljit_memory_fragment *buf; 1001 sljit_ins *code; 1002 sljit_ins *code_ptr; 1003 sljit_ins *buf_ptr; 1004 sljit_ins *buf_end; 1005 sljit_uw word_count; 1006 sljit_uw addr; 1007 1008 struct sljit_label *label; 1009 struct sljit_jump *jump; 1010 struct sljit_const *const_; 1011 1012 CHECK_ERROR_PTR(); 1013 CHECK_PTR(check_sljit_generate_code(compiler)); 1014 reverse_buf(compiler); 1015 1016 code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); 1017 PTR_FAIL_WITH_EXEC_IF(code); 1018 buf = compiler->buf; 1019 1020 code_ptr = code; 1021 word_count = 0; 1022 label = compiler->labels; 1023 jump = compiler->jumps; 1024 const_ = compiler->consts; 1025 do { 1026 buf_ptr = (sljit_ins *)buf->memory; 1027 buf_end = buf_ptr + (buf->used_size >> 3); 1028 do { 1029 *code_ptr = *buf_ptr++; 1030 SLJIT_ASSERT(!label || label->size >= word_count); 1031 SLJIT_ASSERT(!jump || jump->addr >= word_count); 1032 SLJIT_ASSERT(!const_ || const_->addr >= word_count); 1033 /* These structures are ordered by their address. */ 1034 if (label && label->size == word_count) { 1035 /* Just recording the address. */ 1036 label->addr = (sljit_uw) code_ptr; 1037 label->size = code_ptr - code; 1038 label = label->next; 1039 } 1040 1041 if (jump && jump->addr == word_count) { 1042 if (jump->flags & IS_JAL) 1043 jump->addr = (sljit_uw)(code_ptr - 4); 1044 else 1045 jump->addr = (sljit_uw)(code_ptr - 3); 1046 1047 code_ptr = detect_jump_type(jump, code_ptr, code); 1048 jump = jump->next; 1049 } 1050 1051 if (const_ && const_->addr == word_count) { 1052 /* Just recording the address. */ 1053 const_->addr = (sljit_uw) code_ptr; 1054 const_ = const_->next; 1055 } 1056 1057 code_ptr++; 1058 word_count++; 1059 } while (buf_ptr < buf_end); 1060 1061 buf = buf->next; 1062 } while (buf); 1063 1064 if (label && label->size == word_count) { 1065 label->addr = (sljit_uw) code_ptr; 1066 label->size = code_ptr - code; 1067 label = label->next; 1068 } 1069 1070 SLJIT_ASSERT(!label); 1071 SLJIT_ASSERT(!jump); 1072 SLJIT_ASSERT(!const_); 1073 SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); 1074 1075 jump = compiler->jumps; 1076 while (jump) { 1077 do { 1078 addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; 1079 buf_ptr = (sljit_ins *)jump->addr; 1080 1081 if (jump->flags & PATCH_B) { 1082 addr = (sljit_sw)(addr - (jump->addr)) >> 3; 1083 SLJIT_ASSERT((sljit_sw) addr <= SIMM_17BIT_MAX && (sljit_sw) addr >= SIMM_17BIT_MIN); 1084 buf_ptr[0] = (buf_ptr[0] & ~(BOFF_X1(-1))) | BOFF_X1(addr); 1085 1086#ifdef TILEGX_JIT_DEBUG 1087 printf("[runtime relocate]%04d:\t", __LINE__); 1088 print_insn_tilegx(buf_ptr); 1089#endif 1090 break; 1091 } 1092 1093 if (jump->flags & PATCH_J) { 1094 SLJIT_ASSERT((addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)); 1095 addr = (sljit_sw)(addr - (jump->addr)) >> 3; 1096 buf_ptr[0] = (buf_ptr[0] & ~(JOFF_X1(-1))) | JOFF_X1(addr); 1097 1098#ifdef TILEGX_JIT_DEBUG 1099 printf("[runtime relocate]%04d:\t", __LINE__); 1100 print_insn_tilegx(buf_ptr); 1101#endif 1102 break; 1103 } 1104 1105 SLJIT_ASSERT(!(jump->flags & IS_JAL)); 1106 1107 /* Set the fields of immediate loads. */ 1108 buf_ptr[0] = (buf_ptr[0] & ~(0xFFFFL << 43)) | (((addr >> 32) & 0xFFFFL) << 43); 1109 buf_ptr[1] = (buf_ptr[1] & ~(0xFFFFL << 43)) | (((addr >> 16) & 0xFFFFL) << 43); 1110 buf_ptr[2] = (buf_ptr[2] & ~(0xFFFFL << 43)) | ((addr & 0xFFFFL) << 43); 1111 } while (0); 1112 1113 jump = jump->next; 1114 } 1115 1116 compiler->error = SLJIT_ERR_COMPILED; 1117 compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); 1118 SLJIT_CACHE_FLUSH(code, code_ptr); 1119 return code; 1120} 1121 1122static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) 1123{ 1124 1125 if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN) 1126 return ADDLI(dst_ar, ZERO, imm); 1127 1128 if (imm <= SIMM_32BIT_MAX && imm >= SIMM_32BIT_MIN) { 1129 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 16)); 1130 return SHL16INSLI(dst_ar, dst_ar, imm); 1131 } 1132 1133 if (imm <= SIMM_48BIT_MAX && imm >= SIMM_48BIT_MIN) { 1134 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32)); 1135 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); 1136 return SHL16INSLI(dst_ar, dst_ar, imm); 1137 } 1138 1139 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 48)); 1140 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 32)); 1141 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); 1142 return SHL16INSLI(dst_ar, dst_ar, imm); 1143} 1144 1145static sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm, int flush) 1146{ 1147 /* Should *not* be optimized as load_immediate, as pcre relocation 1148 mechanism will match this fixed 4-instruction pattern. */ 1149 if (flush) { 1150 FAIL_IF(ADDLI_SOLO(dst_ar, ZERO, imm >> 32)); 1151 FAIL_IF(SHL16INSLI_SOLO(dst_ar, dst_ar, imm >> 16)); 1152 return SHL16INSLI_SOLO(dst_ar, dst_ar, imm); 1153 } 1154 1155 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32)); 1156 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); 1157 return SHL16INSLI(dst_ar, dst_ar, imm); 1158} 1159 1160static sljit_s32 emit_const_64(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm, int flush) 1161{ 1162 /* Should *not* be optimized as load_immediate, as pcre relocation 1163 mechanism will match this fixed 4-instruction pattern. */ 1164 if (flush) { 1165 FAIL_IF(ADDLI_SOLO(reg_map[dst_ar], ZERO, imm >> 48)); 1166 FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 32)); 1167 FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 16)); 1168 return SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm); 1169 } 1170 1171 FAIL_IF(ADDLI(reg_map[dst_ar], ZERO, imm >> 48)); 1172 FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 32)); 1173 FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 16)); 1174 return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm); 1175} 1176 1177SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, 1178 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 1179 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 1180{ 1181 sljit_ins base; 1182 sljit_s32 i, tmp; 1183 1184 CHECK_ERROR(); 1185 CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 1186 set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 1187 1188 local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); 1189 local_size = (local_size + 7) & ~7; 1190 compiler->local_size = local_size; 1191 1192 if (local_size <= SIMM_16BIT_MAX) { 1193 /* Frequent case. */ 1194 FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, -local_size)); 1195 base = SLJIT_LOCALS_REG_mapped; 1196 } else { 1197 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size)); 1198 FAIL_IF(ADD(TMP_REG2_mapped, SLJIT_LOCALS_REG_mapped, ZERO)); 1199 FAIL_IF(SUB(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped)); 1200 base = TMP_REG2_mapped; 1201 local_size = 0; 1202 } 1203 1204 /* Save the return address. */ 1205 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); 1206 FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8)); 1207 1208 /* Save the S registers. */ 1209 tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; 1210 for (i = SLJIT_S0; i >= tmp; i--) { 1211 FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8)); 1212 } 1213 1214 /* Save the R registers that need to be reserved. */ 1215 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { 1216 FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8)); 1217 } 1218 1219 /* Move the arguments to S registers. */ 1220 for (i = 0; i < args; i++) { 1221 FAIL_IF(ADD(reg_map[SLJIT_S0 - i], i, ZERO)); 1222 } 1223 1224 return SLJIT_SUCCESS; 1225} 1226 1227SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, 1228 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 1229 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 1230{ 1231 CHECK_ERROR(); 1232 CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 1233 set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 1234 1235 local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); 1236 compiler->local_size = (local_size + 7) & ~7; 1237 1238 return SLJIT_SUCCESS; 1239} 1240 1241SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) 1242{ 1243 sljit_s32 local_size; 1244 sljit_ins base; 1245 sljit_s32 i, tmp; 1246 sljit_s32 saveds; 1247 1248 CHECK_ERROR(); 1249 CHECK(check_sljit_emit_return(compiler, op, src, srcw)); 1250 1251 FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); 1252 1253 local_size = compiler->local_size; 1254 if (local_size <= SIMM_16BIT_MAX) 1255 base = SLJIT_LOCALS_REG_mapped; 1256 else { 1257 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size)); 1258 FAIL_IF(ADD(TMP_REG1_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped)); 1259 base = TMP_REG1_mapped; 1260 local_size = 0; 1261 } 1262 1263 /* Restore the return address. */ 1264 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); 1265 FAIL_IF(LD_ADD(RA, ADDR_TMP_mapped, -8)); 1266 1267 /* Restore the S registers. */ 1268 saveds = compiler->saveds; 1269 tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; 1270 for (i = SLJIT_S0; i >= tmp; i--) { 1271 FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8)); 1272 } 1273 1274 /* Restore the R registers that need to be reserved. */ 1275 for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { 1276 FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8)); 1277 } 1278 1279 if (compiler->local_size <= SIMM_16BIT_MAX) 1280 FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, compiler->local_size)); 1281 else 1282 FAIL_IF(ADD(SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped, ZERO)); 1283 1284 return JR(RA); 1285} 1286 1287/* reg_ar is an absoulute register! */ 1288 1289/* Can perform an operation using at most 1 instruction. */ 1290static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) 1291{ 1292 SLJIT_ASSERT(arg & SLJIT_MEM); 1293 1294 if ((!(flags & WRITE_BACK) || !(arg & REG_MASK)) 1295 && !(arg & OFFS_REG_MASK) && argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { 1296 /* Works for both absoulte and relative addresses. */ 1297 if (SLJIT_UNLIKELY(flags & ARG_TEST)) 1298 return 1; 1299 1300 FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[arg & REG_MASK], argw)); 1301 1302 if (flags & LOAD_DATA) 1303 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped)); 1304 else 1305 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar)); 1306 1307 return -1; 1308 } 1309 1310 return 0; 1311} 1312 1313/* See getput_arg below. 1314 Note: can_cache is called only for binary operators. Those 1315 operators always uses word arguments without write back. */ 1316static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) 1317{ 1318 SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); 1319 1320 /* Simple operation except for updates. */ 1321 if (arg & OFFS_REG_MASK) { 1322 argw &= 0x3; 1323 next_argw &= 0x3; 1324 if (argw && argw == next_argw 1325 && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK))) 1326 return 1; 1327 return 0; 1328 } 1329 1330 if (arg == next_arg) { 1331 if (((next_argw - argw) <= SIMM_16BIT_MAX 1332 && (next_argw - argw) >= SIMM_16BIT_MIN)) 1333 return 1; 1334 1335 return 0; 1336 } 1337 1338 return 0; 1339} 1340 1341/* Emit the necessary instructions. See can_cache above. */ 1342static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) 1343{ 1344 sljit_s32 tmp_ar, base; 1345 1346 SLJIT_ASSERT(arg & SLJIT_MEM); 1347 if (!(next_arg & SLJIT_MEM)) { 1348 next_arg = 0; 1349 next_argw = 0; 1350 } 1351 1352 if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) 1353 tmp_ar = reg_ar; 1354 else 1355 tmp_ar = TMP_REG1_mapped; 1356 1357 base = arg & REG_MASK; 1358 1359 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { 1360 argw &= 0x3; 1361 1362 if ((flags & WRITE_BACK) && reg_ar == reg_map[base]) { 1363 SLJIT_ASSERT(!(flags & LOAD_DATA) && reg_map[TMP_REG1] != reg_ar); 1364 FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO)); 1365 reg_ar = TMP_REG1_mapped; 1366 } 1367 1368 /* Using the cache. */ 1369 if (argw == compiler->cache_argw) { 1370 if (!(flags & WRITE_BACK)) { 1371 if (arg == compiler->cache_arg) { 1372 if (flags & LOAD_DATA) 1373 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1374 else 1375 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1376 } 1377 1378 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { 1379 if (arg == next_arg && argw == (next_argw & 0x3)) { 1380 compiler->cache_arg = arg; 1381 compiler->cache_argw = argw; 1382 FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], TMP_REG3_mapped)); 1383 if (flags & LOAD_DATA) 1384 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1385 else 1386 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1387 } 1388 1389 FAIL_IF(ADD(tmp_ar, reg_map[base], TMP_REG3_mapped)); 1390 if (flags & LOAD_DATA) 1391 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); 1392 else 1393 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); 1394 } 1395 } else { 1396 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { 1397 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); 1398 if (flags & LOAD_DATA) 1399 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); 1400 else 1401 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); 1402 } 1403 } 1404 } 1405 1406 if (SLJIT_UNLIKELY(argw)) { 1407 compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); 1408 compiler->cache_argw = argw; 1409 FAIL_IF(SHLI(TMP_REG3_mapped, reg_map[OFFS_REG(arg)], argw)); 1410 } 1411 1412 if (!(flags & WRITE_BACK)) { 1413 if (arg == next_arg && argw == (next_argw & 0x3)) { 1414 compiler->cache_arg = arg; 1415 compiler->cache_argw = argw; 1416 FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); 1417 tmp_ar = TMP_REG3_mapped; 1418 } else 1419 FAIL_IF(ADD(tmp_ar, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); 1420 1421 if (flags & LOAD_DATA) 1422 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); 1423 else 1424 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); 1425 } 1426 1427 FAIL_IF(ADD(reg_map[base], reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); 1428 1429 if (flags & LOAD_DATA) 1430 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); 1431 else 1432 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); 1433 } 1434 1435 if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) { 1436 /* Update only applies if a base register exists. */ 1437 if (reg_ar == reg_map[base]) { 1438 SLJIT_ASSERT(!(flags & LOAD_DATA) && TMP_REG1_mapped != reg_ar); 1439 if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { 1440 FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[base], argw)); 1441 if (flags & LOAD_DATA) 1442 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped)); 1443 else 1444 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar)); 1445 1446 if (argw) 1447 return ADDLI(reg_map[base], reg_map[base], argw); 1448 1449 return SLJIT_SUCCESS; 1450 } 1451 1452 FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO)); 1453 reg_ar = TMP_REG1_mapped; 1454 } 1455 1456 if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { 1457 if (argw) 1458 FAIL_IF(ADDLI(reg_map[base], reg_map[base], argw)); 1459 } else { 1460 if (compiler->cache_arg == SLJIT_MEM 1461 && argw - compiler->cache_argw <= SIMM_16BIT_MAX 1462 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { 1463 if (argw != compiler->cache_argw) { 1464 FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); 1465 compiler->cache_argw = argw; 1466 } 1467 1468 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); 1469 } else { 1470 compiler->cache_arg = SLJIT_MEM; 1471 compiler->cache_argw = argw; 1472 FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw)); 1473 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); 1474 } 1475 } 1476 1477 if (flags & LOAD_DATA) 1478 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); 1479 else 1480 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); 1481 } 1482 1483 if (compiler->cache_arg == arg 1484 && argw - compiler->cache_argw <= SIMM_16BIT_MAX 1485 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { 1486 if (argw != compiler->cache_argw) { 1487 FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); 1488 compiler->cache_argw = argw; 1489 } 1490 1491 if (flags & LOAD_DATA) 1492 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1493 else 1494 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1495 } 1496 1497 if (compiler->cache_arg == SLJIT_MEM 1498 && argw - compiler->cache_argw <= SIMM_16BIT_MAX 1499 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { 1500 if (argw != compiler->cache_argw) 1501 FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); 1502 } else { 1503 compiler->cache_arg = SLJIT_MEM; 1504 FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw)); 1505 } 1506 1507 compiler->cache_argw = argw; 1508 1509 if (!base) { 1510 if (flags & LOAD_DATA) 1511 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1512 else 1513 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1514 } 1515 1516 if (arg == next_arg 1517 && next_argw - argw <= SIMM_16BIT_MAX 1518 && next_argw - argw >= SIMM_16BIT_MIN) { 1519 compiler->cache_arg = arg; 1520 FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, reg_map[base])); 1521 if (flags & LOAD_DATA) 1522 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1523 else 1524 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1525 } 1526 1527 FAIL_IF(ADD(tmp_ar, TMP_REG3_mapped, reg_map[base])); 1528 1529 if (flags & LOAD_DATA) 1530 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); 1531 else 1532 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); 1533} 1534 1535static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) 1536{ 1537 if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) 1538 return compiler->error; 1539 1540 compiler->cache_arg = 0; 1541 compiler->cache_argw = 0; 1542 return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); 1543} 1544 1545static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) 1546{ 1547 if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) 1548 return compiler->error; 1549 return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); 1550} 1551 1552SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) 1553{ 1554 CHECK_ERROR(); 1555 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); 1556 ADJUST_LOCAL_OFFSET(dst, dstw); 1557 1558 /* For UNUSED dst. Uncommon, but possible. */ 1559 if (dst == SLJIT_UNUSED) 1560 return SLJIT_SUCCESS; 1561 1562 if (FAST_IS_REG(dst)) 1563 return ADD(reg_map[dst], RA, ZERO); 1564 1565 /* Memory. */ 1566 return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw); 1567} 1568 1569SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) 1570{ 1571 CHECK_ERROR(); 1572 CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); 1573 ADJUST_LOCAL_OFFSET(src, srcw); 1574 1575 if (FAST_IS_REG(src)) 1576 FAIL_IF(ADD(RA, reg_map[src], ZERO)); 1577 1578 else if (src & SLJIT_MEM) 1579 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw)); 1580 1581 else if (src & SLJIT_IMM) 1582 FAIL_IF(load_immediate(compiler, RA, srcw)); 1583 1584 return JR(RA); 1585} 1586 1587static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) 1588{ 1589 sljit_s32 overflow_ra = 0; 1590 1591 switch (GET_OPCODE(op)) { 1592 case SLJIT_MOV: 1593 case SLJIT_MOV_P: 1594 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1595 if (dst != src2) 1596 return ADD(reg_map[dst], reg_map[src2], ZERO); 1597 return SLJIT_SUCCESS; 1598 1599 case SLJIT_MOV_U32: 1600 case SLJIT_MOV_S32: 1601 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1602 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 1603 if (op == SLJIT_MOV_S32) 1604 return BFEXTS(reg_map[dst], reg_map[src2], 0, 31); 1605 1606 return BFEXTU(reg_map[dst], reg_map[src2], 0, 31); 1607 } else if (dst != src2) { 1608 SLJIT_ASSERT(src2 == 0); 1609 return ADD(reg_map[dst], reg_map[src2], ZERO); 1610 } 1611 1612 return SLJIT_SUCCESS; 1613 1614 case SLJIT_MOV_U8: 1615 case SLJIT_MOV_S8: 1616 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1617 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 1618 if (op == SLJIT_MOV_S8) 1619 return BFEXTS(reg_map[dst], reg_map[src2], 0, 7); 1620 1621 return BFEXTU(reg_map[dst], reg_map[src2], 0, 7); 1622 } else if (dst != src2) { 1623 SLJIT_ASSERT(src2 == 0); 1624 return ADD(reg_map[dst], reg_map[src2], ZERO); 1625 } 1626 1627 return SLJIT_SUCCESS; 1628 1629 case SLJIT_MOV_U16: 1630 case SLJIT_MOV_S16: 1631 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1632 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 1633 if (op == SLJIT_MOV_S16) 1634 return BFEXTS(reg_map[dst], reg_map[src2], 0, 15); 1635 1636 return BFEXTU(reg_map[dst], reg_map[src2], 0, 15); 1637 } else if (dst != src2) { 1638 SLJIT_ASSERT(src2 == 0); 1639 return ADD(reg_map[dst], reg_map[src2], ZERO); 1640 } 1641 1642 return SLJIT_SUCCESS; 1643 1644 case SLJIT_NOT: 1645 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1646 if (op & SLJIT_SET_E) 1647 FAIL_IF(NOR(EQUAL_FLAG, reg_map[src2], reg_map[src2])); 1648 if (CHECK_FLAGS(SLJIT_SET_E)) 1649 FAIL_IF(NOR(reg_map[dst], reg_map[src2], reg_map[src2])); 1650 1651 return SLJIT_SUCCESS; 1652 1653 case SLJIT_CLZ: 1654 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1655 if (op & SLJIT_SET_E) 1656 FAIL_IF(CLZ(EQUAL_FLAG, reg_map[src2])); 1657 if (CHECK_FLAGS(SLJIT_SET_E)) 1658 FAIL_IF(CLZ(reg_map[dst], reg_map[src2])); 1659 1660 return SLJIT_SUCCESS; 1661 1662 case SLJIT_ADD: 1663 if (flags & SRC2_IMM) { 1664 if (op & SLJIT_SET_O) { 1665 FAIL_IF(SHRUI(TMP_EREG1, reg_map[src1], 63)); 1666 if (src2 < 0) 1667 FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1)); 1668 } 1669 1670 if (op & SLJIT_SET_E) 1671 FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], src2)); 1672 1673 if (op & SLJIT_SET_C) { 1674 if (src2 >= 0) 1675 FAIL_IF(ORI(ULESS_FLAG ,reg_map[src1], src2)); 1676 else { 1677 FAIL_IF(ADDLI(ULESS_FLAG ,ZERO, src2)); 1678 FAIL_IF(OR(ULESS_FLAG,reg_map[src1],ULESS_FLAG)); 1679 } 1680 } 1681 1682 /* dst may be the same as src1 or src2. */ 1683 if (CHECK_FLAGS(SLJIT_SET_E)) 1684 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2)); 1685 1686 if (op & SLJIT_SET_O) { 1687 FAIL_IF(SHRUI(OVERFLOW_FLAG, reg_map[dst], 63)); 1688 1689 if (src2 < 0) 1690 FAIL_IF(XORI(OVERFLOW_FLAG, OVERFLOW_FLAG, 1)); 1691 } 1692 } else { 1693 if (op & SLJIT_SET_O) { 1694 FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2])); 1695 FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63)); 1696 1697 if (src1 != dst) 1698 overflow_ra = reg_map[src1]; 1699 else if (src2 != dst) 1700 overflow_ra = reg_map[src2]; 1701 else { 1702 /* Rare ocasion. */ 1703 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); 1704 overflow_ra = TMP_EREG2; 1705 } 1706 } 1707 1708 if (op & SLJIT_SET_E) 1709 FAIL_IF(ADD(EQUAL_FLAG ,reg_map[src1], reg_map[src2])); 1710 1711 if (op & SLJIT_SET_C) 1712 FAIL_IF(OR(ULESS_FLAG,reg_map[src1], reg_map[src2])); 1713 1714 /* dst may be the same as src1 or src2. */ 1715 if (CHECK_FLAGS(SLJIT_SET_E)) 1716 FAIL_IF(ADD(reg_map[dst],reg_map[src1], reg_map[src2])); 1717 1718 if (op & SLJIT_SET_O) { 1719 FAIL_IF(XOR(OVERFLOW_FLAG,reg_map[dst], overflow_ra)); 1720 FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63)); 1721 } 1722 } 1723 1724 /* a + b >= a | b (otherwise, the carry should be set to 1). */ 1725 if (op & SLJIT_SET_C) 1726 FAIL_IF(CMPLTU(ULESS_FLAG ,reg_map[dst] ,ULESS_FLAG)); 1727 1728 if (op & SLJIT_SET_O) 1729 return CMOVNEZ(OVERFLOW_FLAG, TMP_EREG1, ZERO); 1730 1731 return SLJIT_SUCCESS; 1732 1733 case SLJIT_ADDC: 1734 if (flags & SRC2_IMM) { 1735 if (op & SLJIT_SET_C) { 1736 if (src2 >= 0) 1737 FAIL_IF(ORI(TMP_EREG1, reg_map[src1], src2)); 1738 else { 1739 FAIL_IF(ADDLI(TMP_EREG1, ZERO, src2)); 1740 FAIL_IF(OR(TMP_EREG1, reg_map[src1], TMP_EREG1)); 1741 } 1742 } 1743 1744 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2)); 1745 1746 } else { 1747 if (op & SLJIT_SET_C) 1748 FAIL_IF(OR(TMP_EREG1, reg_map[src1], reg_map[src2])); 1749 1750 /* dst may be the same as src1 or src2. */ 1751 FAIL_IF(ADD(reg_map[dst], reg_map[src1], reg_map[src2])); 1752 } 1753 1754 if (op & SLJIT_SET_C) 1755 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[dst], TMP_EREG1)); 1756 1757 FAIL_IF(ADD(reg_map[dst], reg_map[dst], ULESS_FLAG)); 1758 1759 if (!(op & SLJIT_SET_C)) 1760 return SLJIT_SUCCESS; 1761 1762 /* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */ 1763 FAIL_IF(CMPLTUI(TMP_EREG2, reg_map[dst], 1)); 1764 FAIL_IF(AND(TMP_EREG2, TMP_EREG2, ULESS_FLAG)); 1765 /* Set carry flag. */ 1766 return OR(ULESS_FLAG, TMP_EREG2, TMP_EREG1); 1767 1768 case SLJIT_SUB: 1769 if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_16BIT_MIN)) { 1770 FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2)); 1771 src2 = TMP_REG2; 1772 flags &= ~SRC2_IMM; 1773 } 1774 1775 if (flags & SRC2_IMM) { 1776 if (op & SLJIT_SET_O) { 1777 FAIL_IF(SHRUI(TMP_EREG1,reg_map[src1], 63)); 1778 1779 if (src2 < 0) 1780 FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1)); 1781 1782 if (src1 != dst) 1783 overflow_ra = reg_map[src1]; 1784 else { 1785 /* Rare ocasion. */ 1786 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); 1787 overflow_ra = TMP_EREG2; 1788 } 1789 } 1790 1791 if (op & SLJIT_SET_E) 1792 FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], -src2)); 1793 1794 if (op & SLJIT_SET_C) { 1795 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); 1796 FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], ADDR_TMP_mapped)); 1797 } 1798 1799 /* dst may be the same as src1 or src2. */ 1800 if (CHECK_FLAGS(SLJIT_SET_E)) 1801 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2)); 1802 1803 } else { 1804 1805 if (op & SLJIT_SET_O) { 1806 FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2])); 1807 FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63)); 1808 1809 if (src1 != dst) 1810 overflow_ra = reg_map[src1]; 1811 else { 1812 /* Rare ocasion. */ 1813 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); 1814 overflow_ra = TMP_EREG2; 1815 } 1816 } 1817 1818 if (op & SLJIT_SET_E) 1819 FAIL_IF(SUB(EQUAL_FLAG, reg_map[src1], reg_map[src2])); 1820 1821 if (op & (SLJIT_SET_U | SLJIT_SET_C)) 1822 FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], reg_map[src2])); 1823 1824 if (op & SLJIT_SET_U) 1825 FAIL_IF(CMPLTU(UGREATER_FLAG, reg_map[src2], reg_map[src1])); 1826 1827 if (op & SLJIT_SET_S) { 1828 FAIL_IF(CMPLTS(LESS_FLAG ,reg_map[src1] ,reg_map[src2])); 1829 FAIL_IF(CMPLTS(GREATER_FLAG ,reg_map[src2] ,reg_map[src1])); 1830 } 1831 1832 /* dst may be the same as src1 or src2. */ 1833 if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C)) 1834 FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2])); 1835 } 1836 1837 if (op & SLJIT_SET_O) { 1838 FAIL_IF(XOR(OVERFLOW_FLAG, reg_map[dst], overflow_ra)); 1839 FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63)); 1840 return CMOVEQZ(OVERFLOW_FLAG, TMP_EREG1, ZERO); 1841 } 1842 1843 return SLJIT_SUCCESS; 1844 1845 case SLJIT_SUBC: 1846 if ((flags & SRC2_IMM) && src2 == SIMM_16BIT_MIN) { 1847 FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2)); 1848 src2 = TMP_REG2; 1849 flags &= ~SRC2_IMM; 1850 } 1851 1852 if (flags & SRC2_IMM) { 1853 if (op & SLJIT_SET_C) { 1854 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, -src2)); 1855 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], ADDR_TMP_mapped)); 1856 } 1857 1858 /* dst may be the same as src1 or src2. */ 1859 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2)); 1860 1861 } else { 1862 if (op & SLJIT_SET_C) 1863 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], reg_map[src2])); 1864 /* dst may be the same as src1 or src2. */ 1865 FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2])); 1866 } 1867 1868 if (op & SLJIT_SET_C) 1869 FAIL_IF(CMOVEQZ(TMP_EREG1, reg_map[dst], ULESS_FLAG)); 1870 1871 FAIL_IF(SUB(reg_map[dst], reg_map[dst], ULESS_FLAG)); 1872 1873 if (op & SLJIT_SET_C) 1874 FAIL_IF(ADD(ULESS_FLAG, TMP_EREG1, ZERO)); 1875 1876 return SLJIT_SUCCESS; 1877 1878 case SLJIT_MUL: 1879 if (flags & SRC2_IMM) { 1880 FAIL_IF(load_immediate(compiler, TMP_REG2_mapped, src2)); 1881 src2 = TMP_REG2; 1882 flags &= ~SRC2_IMM; 1883 } 1884 1885 FAIL_IF(MUL(reg_map[dst], reg_map[src1], reg_map[src2])); 1886 1887 return SLJIT_SUCCESS; 1888 1889#define EMIT_LOGICAL(op_imm, op_norm) \ 1890 if (flags & SRC2_IMM) { \ 1891 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \ 1892 if (op & SLJIT_SET_E) \ 1893 FAIL_IF(push_3_buffer( \ 1894 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ 1895 ADDR_TMP_mapped, __LINE__)); \ 1896 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1897 FAIL_IF(push_3_buffer( \ 1898 compiler, op_norm, reg_map[dst], reg_map[src1], \ 1899 ADDR_TMP_mapped, __LINE__)); \ 1900 } else { \ 1901 if (op & SLJIT_SET_E) \ 1902 FAIL_IF(push_3_buffer( \ 1903 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ 1904 reg_map[src2], __LINE__)); \ 1905 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1906 FAIL_IF(push_3_buffer( \ 1907 compiler, op_norm, reg_map[dst], reg_map[src1], \ 1908 reg_map[src2], __LINE__)); \ 1909 } 1910 1911 case SLJIT_AND: 1912 EMIT_LOGICAL(TILEGX_OPC_ANDI, TILEGX_OPC_AND); 1913 return SLJIT_SUCCESS; 1914 1915 case SLJIT_OR: 1916 EMIT_LOGICAL(TILEGX_OPC_ORI, TILEGX_OPC_OR); 1917 return SLJIT_SUCCESS; 1918 1919 case SLJIT_XOR: 1920 EMIT_LOGICAL(TILEGX_OPC_XORI, TILEGX_OPC_XOR); 1921 return SLJIT_SUCCESS; 1922 1923#define EMIT_SHIFT(op_imm, op_norm) \ 1924 if (flags & SRC2_IMM) { \ 1925 if (op & SLJIT_SET_E) \ 1926 FAIL_IF(push_3_buffer( \ 1927 compiler, op_imm, EQUAL_FLAG, reg_map[src1], \ 1928 src2 & 0x3F, __LINE__)); \ 1929 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1930 FAIL_IF(push_3_buffer( \ 1931 compiler, op_imm, reg_map[dst], reg_map[src1], \ 1932 src2 & 0x3F, __LINE__)); \ 1933 } else { \ 1934 if (op & SLJIT_SET_E) \ 1935 FAIL_IF(push_3_buffer( \ 1936 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ 1937 reg_map[src2], __LINE__)); \ 1938 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1939 FAIL_IF(push_3_buffer( \ 1940 compiler, op_norm, reg_map[dst], reg_map[src1], \ 1941 reg_map[src2], __LINE__)); \ 1942 } 1943 1944 case SLJIT_SHL: 1945 EMIT_SHIFT(TILEGX_OPC_SHLI, TILEGX_OPC_SHL); 1946 return SLJIT_SUCCESS; 1947 1948 case SLJIT_LSHR: 1949 EMIT_SHIFT(TILEGX_OPC_SHRUI, TILEGX_OPC_SHRU); 1950 return SLJIT_SUCCESS; 1951 1952 case SLJIT_ASHR: 1953 EMIT_SHIFT(TILEGX_OPC_SHRSI, TILEGX_OPC_SHRS); 1954 return SLJIT_SUCCESS; 1955 } 1956 1957 SLJIT_UNREACHABLE(); 1958 return SLJIT_SUCCESS; 1959} 1960 1961static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) 1962{ 1963 /* arg1 goes to TMP_REG1 or src reg. 1964 arg2 goes to TMP_REG2, imm or src reg. 1965 TMP_REG3 can be used for caching. 1966 result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ 1967 sljit_s32 dst_r = TMP_REG2; 1968 sljit_s32 src1_r; 1969 sljit_sw src2_r = 0; 1970 sljit_s32 sugg_src2_r = TMP_REG2; 1971 1972 if (!(flags & ALT_KEEP_CACHE)) { 1973 compiler->cache_arg = 0; 1974 compiler->cache_argw = 0; 1975 } 1976 1977 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 1978 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM)) 1979 return SLJIT_SUCCESS; 1980 if (GET_FLAGS(op)) 1981 flags |= UNUSED_DEST; 1982 } else if (FAST_IS_REG(dst)) { 1983 dst_r = dst; 1984 flags |= REG_DEST; 1985 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) 1986 sugg_src2_r = dst_r; 1987 } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw)) 1988 flags |= SLOW_DEST; 1989 1990 if (flags & IMM_OP) { 1991 if ((src2 & SLJIT_IMM) && src2w) { 1992 if ((!(flags & LOGICAL_OP) 1993 && (src2w <= SIMM_16BIT_MAX && src2w >= SIMM_16BIT_MIN)) 1994 || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_16BIT_MAX))) { 1995 flags |= SRC2_IMM; 1996 src2_r = src2w; 1997 } 1998 } 1999 2000 if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { 2001 if ((!(flags & LOGICAL_OP) 2002 && (src1w <= SIMM_16BIT_MAX && src1w >= SIMM_16BIT_MIN)) 2003 || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_16BIT_MAX))) { 2004 flags |= SRC2_IMM; 2005 src2_r = src1w; 2006 2007 /* And swap arguments. */ 2008 src1 = src2; 2009 src1w = src2w; 2010 src2 = SLJIT_IMM; 2011 /* src2w = src2_r unneeded. */ 2012 } 2013 } 2014 } 2015 2016 /* Source 1. */ 2017 if (FAST_IS_REG(src1)) { 2018 src1_r = src1; 2019 flags |= REG1_SOURCE; 2020 } else if (src1 & SLJIT_IMM) { 2021 if (src1w) { 2022 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, src1w)); 2023 src1_r = TMP_REG1; 2024 } else 2025 src1_r = 0; 2026 } else { 2027 if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w)) 2028 FAIL_IF(compiler->error); 2029 else 2030 flags |= SLOW_SRC1; 2031 src1_r = TMP_REG1; 2032 } 2033 2034 /* Source 2. */ 2035 if (FAST_IS_REG(src2)) { 2036 src2_r = src2; 2037 flags |= REG2_SOURCE; 2038 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) 2039 dst_r = src2_r; 2040 } else if (src2 & SLJIT_IMM) { 2041 if (!(flags & SRC2_IMM)) { 2042 if (src2w) { 2043 FAIL_IF(load_immediate(compiler, reg_map[sugg_src2_r], src2w)); 2044 src2_r = sugg_src2_r; 2045 } else { 2046 src2_r = 0; 2047 if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) && (dst & SLJIT_MEM)) 2048 dst_r = 0; 2049 } 2050 } 2051 } else { 2052 if (getput_arg_fast(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w)) 2053 FAIL_IF(compiler->error); 2054 else 2055 flags |= SLOW_SRC2; 2056 src2_r = sugg_src2_r; 2057 } 2058 2059 if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { 2060 SLJIT_ASSERT(src2_r == TMP_REG2); 2061 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { 2062 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, src1, src1w)); 2063 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw)); 2064 } else { 2065 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, src2, src2w)); 2066 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, dst, dstw)); 2067 } 2068 } else if (flags & SLOW_SRC1) 2069 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw)); 2070 else if (flags & SLOW_SRC2) 2071 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w, dst, dstw)); 2072 2073 FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); 2074 2075 if (dst & SLJIT_MEM) { 2076 if (!(flags & SLOW_DEST)) { 2077 getput_arg_fast(compiler, flags, reg_map[dst_r], dst, dstw); 2078 return compiler->error; 2079 } 2080 2081 return getput_arg(compiler, flags, reg_map[dst_r], dst, dstw, 0, 0); 2082 } 2083 2084 return SLJIT_SUCCESS; 2085} 2086 2087SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw, sljit_s32 type) 2088{ 2089 sljit_s32 sugg_dst_ar, dst_ar; 2090 sljit_s32 flags = GET_ALL_FLAGS(op); 2091 sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; 2092 2093 CHECK_ERROR(); 2094 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); 2095 ADJUST_LOCAL_OFFSET(dst, dstw); 2096 2097 if (dst == SLJIT_UNUSED) 2098 return SLJIT_SUCCESS; 2099 2100 op = GET_OPCODE(op); 2101 if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32) 2102 mem_type = INT_DATA | SIGNED_DATA; 2103 sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2]; 2104 2105 compiler->cache_arg = 0; 2106 compiler->cache_argw = 0; 2107 if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { 2108 ADJUST_LOCAL_OFFSET(src, srcw); 2109 FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw)); 2110 src = TMP_REG1; 2111 srcw = 0; 2112 } 2113 2114 switch (type & 0xff) { 2115 case SLJIT_EQUAL: 2116 case SLJIT_NOT_EQUAL: 2117 FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1)); 2118 dst_ar = sugg_dst_ar; 2119 break; 2120 case SLJIT_LESS: 2121 case SLJIT_GREATER_EQUAL: 2122 dst_ar = ULESS_FLAG; 2123 break; 2124 case SLJIT_GREATER: 2125 case SLJIT_LESS_EQUAL: 2126 dst_ar = UGREATER_FLAG; 2127 break; 2128 case SLJIT_SIG_LESS: 2129 case SLJIT_SIG_GREATER_EQUAL: 2130 dst_ar = LESS_FLAG; 2131 break; 2132 case SLJIT_SIG_GREATER: 2133 case SLJIT_SIG_LESS_EQUAL: 2134 dst_ar = GREATER_FLAG; 2135 break; 2136 case SLJIT_OVERFLOW: 2137 case SLJIT_NOT_OVERFLOW: 2138 dst_ar = OVERFLOW_FLAG; 2139 break; 2140 case SLJIT_MUL_OVERFLOW: 2141 case SLJIT_MUL_NOT_OVERFLOW: 2142 FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1)); 2143 dst_ar = sugg_dst_ar; 2144 type ^= 0x1; /* Flip type bit for the XORI below. */ 2145 break; 2146 2147 default: 2148 SLJIT_UNREACHABLE(); 2149 dst_ar = sugg_dst_ar; 2150 break; 2151 } 2152 2153 if (type & 0x1) { 2154 FAIL_IF(XORI(sugg_dst_ar, dst_ar, 1)); 2155 dst_ar = sugg_dst_ar; 2156 } 2157 2158 if (op >= SLJIT_ADD) { 2159 if (TMP_REG2_mapped != dst_ar) 2160 FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO)); 2161 return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); 2162 } 2163 2164 if (dst & SLJIT_MEM) 2165 return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw); 2166 2167 if (sugg_dst_ar != dst_ar) 2168 return ADD(sugg_dst_ar, dst_ar, ZERO); 2169 2170 return SLJIT_SUCCESS; 2171} 2172 2173SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { 2174 CHECK_ERROR(); 2175 CHECK(check_sljit_emit_op0(compiler, op)); 2176 2177 op = GET_OPCODE(op); 2178 switch (op) { 2179 case SLJIT_NOP: 2180 return push_0_buffer(compiler, TILEGX_OPC_FNOP, __LINE__); 2181 2182 case SLJIT_BREAKPOINT: 2183 return PI(BPT); 2184 2185 case SLJIT_LMUL_UW: 2186 case SLJIT_LMUL_SW: 2187 case SLJIT_DIVMOD_UW: 2188 case SLJIT_DIVMOD_SW: 2189 case SLJIT_DIV_UW: 2190 case SLJIT_DIV_SW: 2191 SLJIT_UNREACHABLE(); 2192 } 2193 2194 return SLJIT_SUCCESS; 2195} 2196 2197SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) 2198{ 2199 CHECK_ERROR(); 2200 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); 2201 ADJUST_LOCAL_OFFSET(dst, dstw); 2202 ADJUST_LOCAL_OFFSET(src, srcw); 2203 2204 switch (GET_OPCODE(op)) { 2205 case SLJIT_MOV: 2206 case SLJIT_MOV_P: 2207 return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2208 2209 case SLJIT_MOV_U32: 2210 return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2211 2212 case SLJIT_MOV_S32: 2213 return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2214 2215 case SLJIT_MOV_U8: 2216 return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8) srcw : srcw); 2217 2218 case SLJIT_MOV_S8: 2219 return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8) srcw : srcw); 2220 2221 case SLJIT_MOV_U16: 2222 return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16) srcw : srcw); 2223 2224 case SLJIT_MOV_S16: 2225 return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16) srcw : srcw); 2226 2227 case SLJIT_MOVU: 2228 case SLJIT_MOVU_P: 2229 return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 2230 2231 case SLJIT_MOVU_U32: 2232 return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 2233 2234 case SLJIT_MOVU_S32: 2235 return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 2236 2237 case SLJIT_MOVU_U8: 2238 return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8) srcw : srcw); 2239 2240 case SLJIT_MOVU_S8: 2241 return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8) srcw : srcw); 2242 2243 case SLJIT_MOVU_U16: 2244 return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16) srcw : srcw); 2245 2246 case SLJIT_MOVU_S16: 2247 return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16) srcw : srcw); 2248 2249 case SLJIT_NOT: 2250 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); 2251 2252 case SLJIT_NEG: 2253 return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); 2254 2255 case SLJIT_CLZ: 2256 return emit_op(compiler, op, (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2257 } 2258 2259 return SLJIT_SUCCESS; 2260} 2261 2262SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) 2263{ 2264 CHECK_ERROR(); 2265 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2266 ADJUST_LOCAL_OFFSET(dst, dstw); 2267 ADJUST_LOCAL_OFFSET(src1, src1w); 2268 ADJUST_LOCAL_OFFSET(src2, src2w); 2269 2270 switch (GET_OPCODE(op)) { 2271 case SLJIT_ADD: 2272 case SLJIT_ADDC: 2273 return emit_op(compiler, op, CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2274 2275 case SLJIT_SUB: 2276 case SLJIT_SUBC: 2277 return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2278 2279 case SLJIT_MUL: 2280 return emit_op(compiler, op, CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); 2281 2282 case SLJIT_AND: 2283 case SLJIT_OR: 2284 case SLJIT_XOR: 2285 return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2286 2287 case SLJIT_SHL: 2288 case SLJIT_LSHR: 2289 case SLJIT_ASHR: 2290 if (src2 & SLJIT_IMM) 2291 src2w &= 0x3f; 2292 if (op & SLJIT_I32_OP) 2293 src2w &= 0x1f; 2294 2295 return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2296 } 2297 2298 return SLJIT_SUCCESS; 2299} 2300 2301SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler) 2302{ 2303 struct sljit_label *label; 2304 2305 flush_buffer(compiler); 2306 2307 CHECK_ERROR_PTR(); 2308 CHECK_PTR(check_sljit_emit_label(compiler)); 2309 2310 if (compiler->last_label && compiler->last_label->size == compiler->size) 2311 return compiler->last_label; 2312 2313 label = (struct sljit_label *)ensure_abuf(compiler, sizeof(struct sljit_label)); 2314 PTR_FAIL_IF(!label); 2315 set_label(label, compiler); 2316 return label; 2317} 2318 2319SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) 2320{ 2321 sljit_s32 src_r = TMP_REG2; 2322 struct sljit_jump *jump = NULL; 2323 2324 flush_buffer(compiler); 2325 2326 CHECK_ERROR(); 2327 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); 2328 ADJUST_LOCAL_OFFSET(src, srcw); 2329 2330 if (FAST_IS_REG(src)) { 2331 if (reg_map[src] != 0) 2332 src_r = src; 2333 else 2334 FAIL_IF(ADD_SOLO(TMP_REG2_mapped, reg_map[src], ZERO)); 2335 } 2336 2337 if (type >= SLJIT_CALL0) { 2338 SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); 2339 if (src & (SLJIT_IMM | SLJIT_MEM)) { 2340 if (src & SLJIT_IMM) 2341 FAIL_IF(emit_const(compiler, reg_map[PIC_ADDR_REG], srcw, 1)); 2342 else { 2343 SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM)); 2344 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); 2345 } 2346 2347 FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); 2348 2349 FAIL_IF(ADDI_SOLO(54, 54, -16)); 2350 2351 FAIL_IF(JALR_SOLO(reg_map[PIC_ADDR_REG])); 2352 2353 return ADDI_SOLO(54, 54, 16); 2354 } 2355 2356 /* Register input. */ 2357 if (type >= SLJIT_CALL1) 2358 FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); 2359 2360 FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO)); 2361 2362 FAIL_IF(ADDI_SOLO(54, 54, -16)); 2363 2364 FAIL_IF(JALR_SOLO(reg_map[src_r])); 2365 2366 return ADDI_SOLO(54, 54, 16); 2367 } 2368 2369 if (src & SLJIT_IMM) { 2370 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2371 FAIL_IF(!jump); 2372 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); 2373 jump->u.target = srcw; 2374 FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1)); 2375 2376 if (type >= SLJIT_FAST_CALL) { 2377 FAIL_IF(ADD_SOLO(ZERO, ZERO, ZERO)); 2378 jump->addr = compiler->size; 2379 FAIL_IF(JR_SOLO(reg_map[src_r])); 2380 } else { 2381 jump->addr = compiler->size; 2382 FAIL_IF(JR_SOLO(reg_map[src_r])); 2383 } 2384 2385 return SLJIT_SUCCESS; 2386 2387 } else if (src & SLJIT_MEM) { 2388 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); 2389 flush_buffer(compiler); 2390 } 2391 2392 FAIL_IF(JR_SOLO(reg_map[src_r])); 2393 2394 if (jump) 2395 jump->addr = compiler->size; 2396 2397 return SLJIT_SUCCESS; 2398} 2399 2400#define BR_Z(src) \ 2401 inst = BEQZ_X1 | SRCA_X1(src); \ 2402 flags = IS_COND; 2403 2404#define BR_NZ(src) \ 2405 inst = BNEZ_X1 | SRCA_X1(src); \ 2406 flags = IS_COND; 2407 2408SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) 2409{ 2410 struct sljit_jump *jump; 2411 sljit_ins inst; 2412 sljit_s32 flags = 0; 2413 2414 flush_buffer(compiler); 2415 2416 CHECK_ERROR_PTR(); 2417 CHECK_PTR(check_sljit_emit_jump(compiler, type)); 2418 2419 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2420 PTR_FAIL_IF(!jump); 2421 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2422 type &= 0xff; 2423 2424 switch (type) { 2425 case SLJIT_EQUAL: 2426 BR_NZ(EQUAL_FLAG); 2427 break; 2428 case SLJIT_NOT_EQUAL: 2429 BR_Z(EQUAL_FLAG); 2430 break; 2431 case SLJIT_LESS: 2432 BR_Z(ULESS_FLAG); 2433 break; 2434 case SLJIT_GREATER_EQUAL: 2435 BR_NZ(ULESS_FLAG); 2436 break; 2437 case SLJIT_GREATER: 2438 BR_Z(UGREATER_FLAG); 2439 break; 2440 case SLJIT_LESS_EQUAL: 2441 BR_NZ(UGREATER_FLAG); 2442 break; 2443 case SLJIT_SIG_LESS: 2444 BR_Z(LESS_FLAG); 2445 break; 2446 case SLJIT_SIG_GREATER_EQUAL: 2447 BR_NZ(LESS_FLAG); 2448 break; 2449 case SLJIT_SIG_GREATER: 2450 BR_Z(GREATER_FLAG); 2451 break; 2452 case SLJIT_SIG_LESS_EQUAL: 2453 BR_NZ(GREATER_FLAG); 2454 break; 2455 case SLJIT_OVERFLOW: 2456 case SLJIT_MUL_OVERFLOW: 2457 BR_Z(OVERFLOW_FLAG); 2458 break; 2459 case SLJIT_NOT_OVERFLOW: 2460 case SLJIT_MUL_NOT_OVERFLOW: 2461 BR_NZ(OVERFLOW_FLAG); 2462 break; 2463 default: 2464 /* Not conditional branch. */ 2465 inst = 0; 2466 break; 2467 } 2468 2469 jump->flags |= flags; 2470 2471 if (inst) { 2472 inst = inst | ((type <= SLJIT_JUMP) ? BOFF_X1(5) : BOFF_X1(6)); 2473 PTR_FAIL_IF(PI(inst)); 2474 } 2475 2476 PTR_FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1)); 2477 if (type <= SLJIT_JUMP) { 2478 jump->addr = compiler->size; 2479 PTR_FAIL_IF(JR_SOLO(TMP_REG2_mapped)); 2480 } else { 2481 SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); 2482 /* Cannot be optimized out if type is >= CALL0. */ 2483 jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0); 2484 PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); 2485 jump->addr = compiler->size; 2486 PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped)); 2487 } 2488 2489 return jump; 2490} 2491 2492SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) 2493{ 2494 return 0; 2495} 2496 2497SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) 2498{ 2499 SLJIT_UNREACHABLE(); 2500} 2501 2502SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) 2503{ 2504 SLJIT_UNREACHABLE(); 2505} 2506 2507SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) 2508{ 2509 struct sljit_const *const_; 2510 sljit_s32 reg; 2511 2512 flush_buffer(compiler); 2513 2514 CHECK_ERROR_PTR(); 2515 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); 2516 ADJUST_LOCAL_OFFSET(dst, dstw); 2517 2518 const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const)); 2519 PTR_FAIL_IF(!const_); 2520 set_const(const_, compiler); 2521 2522 reg = FAST_IS_REG(dst) ? dst : TMP_REG2; 2523 2524 PTR_FAIL_IF(emit_const_64(compiler, reg, init_value, 1)); 2525 2526 if (dst & SLJIT_MEM) 2527 PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); 2528 return const_; 2529} 2530 2531SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target) 2532{ 2533 sljit_ins *inst = (sljit_ins *)addr; 2534 2535 inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_target >> 32) & 0xffff) << 43); 2536 inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_target >> 16) & 0xffff) << 43); 2537 inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_target & 0xffff) << 43); 2538 SLJIT_CACHE_FLUSH(inst, inst + 3); 2539} 2540 2541SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) 2542{ 2543 sljit_ins *inst = (sljit_ins *)addr; 2544 2545 inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_constant >> 48) & 0xFFFFL) << 43); 2546 inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_constant >> 32) & 0xFFFFL) << 43); 2547 inst[2] = (inst[2] & ~(0xFFFFL << 43)) | (((new_constant >> 16) & 0xFFFFL) << 43); 2548 inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43); 2549 SLJIT_CACHE_FLUSH(inst, inst + 4); 2550} 2551 2552SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) 2553{ 2554 CHECK_REG_INDEX(check_sljit_get_register_index(reg)); 2555 return reg_map[reg]; 2556} 2557 2558SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, 2559 void *instruction, sljit_s32 size) 2560{ 2561 CHECK_ERROR(); 2562 CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); 2563 return SLJIT_ERR_UNSUPPORTED; 2564} 2565 2566