1/*
2 *    Stack-less Just-In-Time compiler
3 *
4 *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 *   1. Redistributions of source code must retain the above copyright notice, this list of
10 *      conditions and the following disclaimer.
11 *
12 *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13 *      of conditions and the following disclaimer in the documentation and/or other materials
14 *      provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
28{
29	return "MIPS" SLJIT_CPUINFO;
30}
31
32/* Latest MIPS architecture. */
33/* Detect SLJIT_MIPS_32_64 */
34
35/* Length of an instruction word
36   Both for mips-32 and mips-64 */
37typedef sljit_ui sljit_ins;
38
39#define TMP_REG1	(SLJIT_NO_REGISTERS + 1)
40#define TMP_REG2	(SLJIT_NO_REGISTERS + 2)
41#define TMP_REG3	(SLJIT_NO_REGISTERS + 3)
42
43/* For position independent code, t9 must contain the function address. */
44#define PIC_ADDR_REG		TMP_REG2
45
46/* TMP_EREG1 is used mainly for literal encoding on 64 bit. */
47#define TMP_EREG1		15
48#define TMP_EREG2		24
49/* Floating point status register. */
50#define FCSR_REG		31
51/* Return address register. */
52#define RETURN_ADDR_REG		31
53
54/* Flags are keept in volatile registers. */
55#define EQUAL_FLAG	7
56/* And carry flag as well. */
57#define ULESS_FLAG	10
58#define UGREATER_FLAG	11
59#define LESS_FLAG	12
60#define GREATER_FLAG	13
61#define OVERFLOW_FLAG	14
62
63#define TMP_FREG1	(SLJIT_FLOAT_REG4 + 1)
64#define TMP_FREG2	(SLJIT_FLOAT_REG4 + 2)
65
66/* --------------------------------------------------------------------- */
67/*  Instrucion forms                                                     */
68/* --------------------------------------------------------------------- */
69
70#define S(s)		(reg_map[s] << 21)
71#define T(t)		(reg_map[t] << 16)
72#define D(d)		(reg_map[d] << 11)
73/* Absolute registers. */
74#define SA(s)		((s) << 21)
75#define TA(t)		((t) << 16)
76#define DA(d)		((d) << 11)
77#define FT(t)		((t) << (16 + 1))
78#define FS(s)		((s) << (11 + 1))
79#define FD(d)		((d) << (6 + 1))
80#define IMM(imm)	((imm) & 0xffff)
81#define SH_IMM(imm)	((imm & 0x1f) << 6)
82
83#define DR(dr)		(reg_map[dr])
84#define HI(opcode)	((opcode) << 26)
85#define LO(opcode)	(opcode)
86#define FMT_D		(17 << 21)
87
88#define ABS_D		(HI(17) | FMT_D | LO(5))
89#define ADD_D		(HI(17) | FMT_D | LO(0))
90#define ADDU		(HI(0) | LO(33))
91#define ADDIU		(HI(9))
92#define AND		(HI(0) | LO(36))
93#define ANDI		(HI(12))
94#define B		(HI(4))
95#define BAL		(HI(1) | (17 << 16))
96#define BC1F		(HI(17) | (8 << 21))
97#define BC1T		(HI(17) | (8 << 21) | (1 << 16))
98#define BEQ		(HI(4))
99#define BGEZ		(HI(1) | (1 << 16))
100#define BGTZ		(HI(7))
101#define BLEZ		(HI(6))
102#define BLTZ		(HI(1) | (0 << 16))
103#define BNE		(HI(5))
104#define BREAK		(HI(0) | LO(13))
105#define C_UN_D		(HI(17) | FMT_D | LO(49))
106#define C_UEQ_D		(HI(17) | FMT_D | LO(51))
107#define C_ULE_D		(HI(17) | FMT_D | LO(55))
108#define C_ULT_D		(HI(17) | FMT_D | LO(53))
109#define DIV		(HI(0) | LO(26))
110#define DIVU		(HI(0) | LO(27))
111#define DIV_D		(HI(17) | FMT_D | LO(3))
112#define J		(HI(2))
113#define JAL		(HI(3))
114#define JALR		(HI(0) | LO(9))
115#define JR		(HI(0) | LO(8))
116#define LD		(HI(55))
117#define LDC1		(HI(53))
118#define LUI		(HI(15))
119#define LW		(HI(35))
120#define NEG_D		(HI(17) | FMT_D | LO(7))
121#define MFHI		(HI(0) | LO(16))
122#define MFLO		(HI(0) | LO(18))
123#define MOV_D		(HI(17) | FMT_D | LO(6))
124#define CFC1		(HI(17) | (2 << 21))
125#define MOVN		(HI(0) | LO(11))
126#define MOVZ		(HI(0) | LO(10))
127#define MUL_D		(HI(17) | FMT_D | LO(2))
128#define MULT		(HI(0) | LO(24))
129#define MULTU		(HI(0) | LO(25))
130#define NOP		(HI(0) | LO(0))
131#define NOR		(HI(0) | LO(39))
132#define OR		(HI(0) | LO(37))
133#define ORI		(HI(13))
134#define SD		(HI(63))
135#define SDC1		(HI(61))
136#define SLT		(HI(0) | LO(42))
137#define SLTI		(HI(10))
138#define SLTIU		(HI(11))
139#define SLTU		(HI(0) | LO(43))
140#define SLL		(HI(0) | LO(0))
141#define SLLV		(HI(0) | LO(4))
142#define SRL		(HI(0) | LO(2))
143#define SRLV		(HI(0) | LO(6))
144#define SRA		(HI(0) | LO(3))
145#define SRAV		(HI(0) | LO(7))
146#define SUB_D		(HI(17) | FMT_D | LO(1))
147#define SUBU		(HI(0) | LO(35))
148#define SW		(HI(43))
149#define XOR		(HI(0) | LO(38))
150#define XORI		(HI(14))
151
152#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
153#define CLZ		(HI(28) | LO(32))
154#define MUL		(HI(28) | LO(2))
155#define SEB		(HI(31) | (16 << 6) | LO(32))
156#define SEH		(HI(31) | (24 << 6) | LO(32))
157#endif
158
159#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
160#define ADDU_W		ADDU
161#define ADDIU_W		ADDIU
162#define SLL_W		SLL
163#define SUBU_W		SUBU
164#else
165#define ADDU_W		DADDU
166#define ADDIU_W		DADDIU
167#define SLL_W		DSLL
168#define SUBU_W		DSUBU
169#endif
170
171#define SIMM_MAX	(0x7fff)
172#define SIMM_MIN	(-0x8000)
173#define UIMM_MAX	(0xffff)
174
175static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
176  0, 2, 5, 6, 3, 8, 16, 17, 18, 19, 20, 29, 4, 25, 9
177};
178
179/* dest_reg is the absolute name of the register
180   Useful for reordering instructions in the delay slot. */
181static int push_inst(struct sljit_compiler *compiler, sljit_ins ins, int delay_slot)
182{
183	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
184	FAIL_IF(!ptr);
185	*ptr = ins;
186	compiler->size++;
187	compiler->delay_slot = delay_slot;
188	return SLJIT_SUCCESS;
189}
190
191static SLJIT_INLINE sljit_ins invert_branch(int flags)
192{
193	return (flags & IS_BIT26_COND) ? (1 << 26) : (1 << 16);
194}
195
196static SLJIT_INLINE sljit_ins* optimize_jump(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
197{
198	sljit_w diff;
199	sljit_uw target_addr;
200	sljit_ins *inst;
201	sljit_ins saved_inst;
202
203	if (jump->flags & SLJIT_REWRITABLE_JUMP)
204		return code_ptr;
205
206	if (jump->flags & JUMP_ADDR)
207		target_addr = jump->u.target;
208	else {
209		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
210		target_addr = (sljit_uw)(code + jump->u.label->size);
211	}
212	inst = (sljit_ins*)jump->addr;
213	if (jump->flags & IS_COND)
214		inst--;
215
216	/* B instructions. */
217	if (jump->flags & IS_MOVABLE) {
218		diff = ((sljit_w)target_addr - (sljit_w)(inst)) >> 2;
219		if (diff <= SIMM_MAX && diff >= SIMM_MIN) {
220			jump->flags |= PATCH_B;
221
222			if (!(jump->flags & IS_COND)) {
223				inst[0] = inst[-1];
224				inst[-1] = (jump->flags & IS_JAL) ? BAL : B;
225				jump->addr -= sizeof(sljit_ins);
226				return inst;
227			}
228			saved_inst = inst[0];
229			inst[0] = inst[-1];
230			inst[-1] = saved_inst ^ invert_branch(jump->flags);
231			jump->addr -= 2 * sizeof(sljit_ins);
232			return inst;
233		}
234	}
235
236	diff = ((sljit_w)target_addr - (sljit_w)(inst + 1)) >> 2;
237	if (diff <= SIMM_MAX && diff >= SIMM_MIN) {
238		jump->flags |= PATCH_B;
239
240		if (!(jump->flags & IS_COND)) {
241			inst[0] = (jump->flags & IS_JAL) ? BAL : B;
242			inst[1] = NOP;
243			return inst + 1;
244		}
245		inst[0] = inst[0] ^ invert_branch(jump->flags);
246		inst[1] = NOP;
247		jump->addr -= sizeof(sljit_ins);
248		return inst + 1;
249	}
250
251	if (jump->flags & IS_COND) {
252		if ((target_addr & ~0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~0xfffffff)) {
253			jump->flags |= PATCH_J;
254			inst[0] = (inst[0] & 0xffff0000) | 3;
255			inst[1] = NOP;
256			inst[2] = J;
257			inst[3] = NOP;
258			jump->addr += sizeof(sljit_ins);
259			return inst + 3;
260		}
261		return code_ptr;
262	}
263
264	/* J instuctions. */
265	if (jump->flags & IS_MOVABLE) {
266		if ((target_addr & ~0xfffffff) == (jump->addr & ~0xfffffff)) {
267			jump->flags |= PATCH_J;
268			inst[0] = inst[-1];
269			inst[-1] = (jump->flags & IS_JAL) ? JAL : J;
270			jump->addr -= sizeof(sljit_ins);
271			return inst;
272		}
273	}
274
275	if ((target_addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)) {
276		jump->flags |= PATCH_J;
277		inst[0] = (jump->flags & IS_JAL) ? JAL : J;
278		inst[1] = NOP;
279		return inst + 1;
280	}
281
282	return code_ptr;
283}
284
285#ifdef __GNUC__
286static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_ptr)
287{
288	SLJIT_CACHE_FLUSH(code, code_ptr);
289}
290#endif
291
292SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
293{
294	struct sljit_memory_fragment *buf;
295	sljit_ins *code;
296	sljit_ins *code_ptr;
297	sljit_ins *buf_ptr;
298	sljit_ins *buf_end;
299	sljit_uw word_count;
300	sljit_uw addr;
301
302	struct sljit_label *label;
303	struct sljit_jump *jump;
304	struct sljit_const *const_;
305
306	CHECK_ERROR_PTR();
307	check_sljit_generate_code(compiler);
308	reverse_buf(compiler);
309
310	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
311	PTR_FAIL_WITH_EXEC_IF(code);
312	buf = compiler->buf;
313
314	code_ptr = code;
315	word_count = 0;
316	label = compiler->labels;
317	jump = compiler->jumps;
318	const_ = compiler->consts;
319	do {
320		buf_ptr = (sljit_ins*)buf->memory;
321		buf_end = buf_ptr + (buf->used_size >> 2);
322		do {
323			*code_ptr = *buf_ptr++;
324			SLJIT_ASSERT(!label || label->size >= word_count);
325			SLJIT_ASSERT(!jump || jump->addr >= word_count);
326			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
327			/* These structures are ordered by their address. */
328			if (label && label->size == word_count) {
329				/* Just recording the address. */
330				label->addr = (sljit_uw)code_ptr;
331				label->size = code_ptr - code;
332				label = label->next;
333			}
334			if (jump && jump->addr == word_count) {
335#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
336				jump->addr = (sljit_uw)(code_ptr - 3);
337#else
338				jump->addr = (sljit_uw)(code_ptr - 6);
339#endif
340				code_ptr = optimize_jump(jump, code_ptr, code);
341				jump = jump->next;
342			}
343			if (const_ && const_->addr == word_count) {
344				/* Just recording the address. */
345				const_->addr = (sljit_uw)code_ptr;
346				const_ = const_->next;
347			}
348			code_ptr ++;
349			word_count ++;
350		} while (buf_ptr < buf_end);
351
352		buf = buf->next;
353	} while (buf);
354
355	if (label && label->size == word_count) {
356		label->addr = (sljit_uw)code_ptr;
357		label->size = code_ptr - code;
358		label = label->next;
359	}
360
361	SLJIT_ASSERT(!label);
362	SLJIT_ASSERT(!jump);
363	SLJIT_ASSERT(!const_);
364	SLJIT_ASSERT(code_ptr - code <= (int)compiler->size);
365
366	jump = compiler->jumps;
367	while (jump) {
368		do {
369			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
370			buf_ptr = (sljit_ins*)jump->addr;
371
372			if (jump->flags & PATCH_B) {
373				addr = (sljit_w)(addr - (jump->addr + sizeof(sljit_ins))) >> 2;
374				SLJIT_ASSERT((sljit_w)addr <= SIMM_MAX && (sljit_w)addr >= SIMM_MIN);
375				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff);
376				break;
377			}
378			if (jump->flags & PATCH_J) {
379				SLJIT_ASSERT((addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff));
380				buf_ptr[0] |= (addr >> 2) & 0x03ffffff;
381				break;
382			}
383
384			/* Set the fields of immediate loads. */
385#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
386			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
387			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
388#else
389			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
390			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
391			buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
392			buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
393#endif
394		} while (0);
395		jump = jump->next;
396	}
397
398	compiler->error = SLJIT_ERR_COMPILED;
399	compiler->executable_size = compiler->size * sizeof(sljit_ins);
400#ifndef __GNUC__
401	SLJIT_CACHE_FLUSH(code, code_ptr);
402#else
403	/* GCC workaround for invalid code generation with -O2. */
404	sljit_cache_flush(code, code_ptr);
405#endif
406	return code;
407}
408
409/* Creates an index in data_transfer_insts array. */
410#define WORD_DATA	0x00
411#define BYTE_DATA	0x01
412#define HALF_DATA	0x02
413#define INT_DATA	0x03
414#define SIGNED_DATA	0x04
415#define LOAD_DATA	0x08
416
417#define MEM_MASK	0x0f
418
419#define WRITE_BACK	0x00010
420#define ARG_TEST	0x00020
421#define CUMULATIVE_OP	0x00040
422#define LOGICAL_OP	0x00080
423#define IMM_OP		0x00100
424#define SRC2_IMM	0x00200
425
426#define UNUSED_DEST	0x00400
427#define REG_DEST	0x00800
428#define REG1_SOURCE	0x01000
429#define REG2_SOURCE	0x02000
430#define SLOW_SRC1	0x04000
431#define SLOW_SRC2	0x08000
432#define SLOW_DEST	0x10000
433
434/* Only these flags are set. UNUSED_DEST is not set when no flags should be set. */
435#define CHECK_FLAGS(list) \
436	(!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))
437
438#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
439#include "sljitNativeMIPS_32.c"
440#else
441#include "sljitNativeMIPS_64.c"
442#endif
443
444#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
445#define STACK_STORE	SW
446#define STACK_LOAD	LW
447#else
448#define STACK_STORE	SD
449#define STACK_LOAD	LD
450#endif
451
452static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags,
453	int dst, sljit_w dstw,
454	int src1, sljit_w src1w,
455	int src2, sljit_w src2w);
456
457SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
458{
459	sljit_ins base;
460
461	CHECK_ERROR();
462	check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size);
463
464	compiler->temporaries = temporaries;
465	compiler->saveds = saveds;
466#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
467	compiler->logical_local_size = local_size;
468#endif
469
470	local_size += (saveds + 1 + 4) * sizeof(sljit_w);
471	local_size = (local_size + 15) & ~0xf;
472	compiler->local_size = local_size;
473
474	if (local_size <= SIMM_MAX) {
475		/* Frequent case. */
476		FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(-local_size), DR(SLJIT_LOCALS_REG)));
477		base = S(SLJIT_LOCALS_REG);
478	}
479	else {
480		FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
481		FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
482		FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(SLJIT_LOCALS_REG), DR(SLJIT_LOCALS_REG)));
483		base = S(TMP_REG2);
484		local_size = 0;
485	}
486
487	FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (int)sizeof(sljit_w)), MOVABLE_INS));
488	if (saveds >= 1)
489		FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (int)sizeof(sljit_w)), MOVABLE_INS));
490	if (saveds >= 2)
491		FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (int)sizeof(sljit_w)), MOVABLE_INS));
492	if (saveds >= 3)
493		FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (int)sizeof(sljit_w)), MOVABLE_INS));
494	if (saveds >= 4)
495		FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (int)sizeof(sljit_w)), MOVABLE_INS));
496	if (saveds >= 5)
497		FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (int)sizeof(sljit_w)), MOVABLE_INS));
498
499	if (args >= 1)
500		FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_SAVED_REG1), DR(SLJIT_SAVED_REG1)));
501	if (args >= 2)
502		FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_SAVED_REG2), DR(SLJIT_SAVED_REG2)));
503	if (args >= 3)
504		FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_SAVED_REG3), DR(SLJIT_SAVED_REG3)));
505
506	return SLJIT_SUCCESS;
507}
508
509SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
510{
511	CHECK_ERROR_VOID();
512	check_sljit_set_context(compiler, args, temporaries, saveds, local_size);
513
514	compiler->temporaries = temporaries;
515	compiler->saveds = saveds;
516#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
517	compiler->logical_local_size = local_size;
518#endif
519
520	local_size += (saveds + 1 + 4) * sizeof(sljit_w);
521	compiler->local_size = (local_size + 15) & ~0xf;
522}
523
524SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw)
525{
526	int local_size;
527	sljit_ins base;
528
529	CHECK_ERROR();
530	check_sljit_emit_return(compiler, op, src, srcw);
531	ADJUST_LOCAL_OFFSET(src, srcw);
532
533	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
534
535	local_size = compiler->local_size;
536	if (local_size <= SIMM_MAX)
537		base = S(SLJIT_LOCALS_REG);
538	else {
539		FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
540		FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1)));
541		base = S(TMP_REG1);
542		local_size = 0;
543	}
544
545	FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (int)sizeof(sljit_w)), RETURN_ADDR_REG));
546	if (compiler->saveds >= 5)
547		FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_EREG2)));
548	if (compiler->saveds >= 4)
549		FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_EREG1)));
550	if (compiler->saveds >= 3)
551		FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_REG3)));
552	if (compiler->saveds >= 2)
553		FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_REG2)));
554	if (compiler->saveds >= 1)
555		FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_REG1)));
556
557	FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
558	if (compiler->local_size <= SIMM_MAX)
559		return push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(compiler->local_size), UNMOVABLE_INS);
560	else
561		return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_LOCALS_REG), UNMOVABLE_INS);
562}
563
564#undef STACK_STORE
565#undef STACK_LOAD
566
567/* --------------------------------------------------------------------- */
568/*  Operators                                                            */
569/* --------------------------------------------------------------------- */
570
571#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
572#define ARCH_DEPEND(a, b)	a
573#else
574#define ARCH_DEPEND(a, b)	b
575#endif
576
577static SLJIT_CONST sljit_ins data_transfer_insts[16] = {
578/* s u w */ ARCH_DEPEND(HI(43) /* sw */, HI(63) /* sd */),
579/* s u b */ HI(40) /* sb */,
580/* s u h */ HI(41) /* sh*/,
581/* s u i */ HI(43) /* sw */,
582
583/* s s w */ ARCH_DEPEND(HI(43) /* sw */, HI(63) /* sd */),
584/* s s b */ HI(40) /* sb */,
585/* s s h */ HI(41) /* sh*/,
586/* s s i */ HI(43) /* sw */,
587
588/* l u w */ ARCH_DEPEND(HI(35) /* lw */, HI(55) /* ld */),
589/* l u b */ HI(36) /* lbu */,
590/* l u h */ HI(37) /* lhu */,
591/* l u i */ ARCH_DEPEND(HI(35) /* lw */, HI(39) /* lwu */),
592
593/* l s w */ ARCH_DEPEND(HI(35) /* lw */, HI(55) /* ld */),
594/* l s b */ HI(32) /* lb */,
595/* l s h */ HI(33) /* lh */,
596/* l s i */ HI(35) /* lw */,
597};
598
599/* reg_ar is an absoulute register! */
600
601/* Can perform an operation using at most 1 instruction. */
602static int getput_arg_fast(struct sljit_compiler *compiler, int flags, int reg_ar, int arg, sljit_w argw)
603{
604	SLJIT_ASSERT(arg & SLJIT_MEM);
605
606	if (!(flags & WRITE_BACK) && !(arg & 0xf0) && argw <= SIMM_MAX && argw >= SIMM_MIN) {
607		/* Works for both absoulte and relative addresses. */
608		if (SLJIT_UNLIKELY(flags & ARG_TEST))
609			return 1;
610		FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & 0xf) | TA(reg_ar) | IMM(argw), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS));
611		return -1;
612	}
613	return (flags & ARG_TEST) ? SLJIT_SUCCESS : 0;
614}
615
616/* See getput_arg below.
617   Note: can_cache is called only for binary operators. Those
618   operators always uses word arguments without write back. */
619static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw)
620{
621	if (!(next_arg & SLJIT_MEM))
622		return 0;
623
624	/* Simple operation except for updates. */
625	if (arg & 0xf0) {
626		argw &= 0x3;
627		next_argw &= 0x3;
628		if (argw && argw == next_argw && (arg == next_arg || (arg & 0xf0) == (next_arg & 0xf0)))
629			return 1;
630		return 0;
631	}
632
633	if (arg == next_arg) {
634		if (((sljit_uw)(next_argw - argw) <= SIMM_MAX && (sljit_uw)(next_argw - argw) >= SIMM_MIN))
635			return 1;
636		return 0;
637	}
638
639	return 0;
640}
641
642/* Emit the necessary instructions. See can_cache above. */
643static int getput_arg(struct sljit_compiler *compiler, int flags, int reg_ar, int arg, sljit_w argw, int next_arg, sljit_w next_argw)
644{
645	int tmp_ar;
646	int base;
647
648	SLJIT_ASSERT(arg & SLJIT_MEM);
649	if (!(next_arg & SLJIT_MEM)) {
650		next_arg = 0;
651		next_argw = 0;
652	}
653
654	tmp_ar = (flags & LOAD_DATA) ? reg_ar : DR(TMP_REG3);
655	base = arg & 0xf;
656
657	if (SLJIT_UNLIKELY(arg & 0xf0)) {
658		argw &= 0x3;
659		if ((flags & WRITE_BACK) && reg_ar == DR(base)) {
660			SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar);
661			FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
662			reg_ar = DR(TMP_REG1);
663		}
664
665		/* Using the cache. */
666		if (argw == compiler->cache_argw) {
667			if (!(flags & WRITE_BACK)) {
668				if (arg == compiler->cache_arg)
669					return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
670				if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) {
671					if (arg == next_arg && argw == (next_argw & 0x3)) {
672						compiler->cache_arg = arg;
673						compiler->cache_argw = argw;
674						FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
675						return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
676					}
677					FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar));
678					return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
679				}
680			}
681			else {
682				if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) {
683					FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
684					return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
685				}
686			}
687		}
688
689		if (SLJIT_UNLIKELY(argw)) {
690			compiler->cache_arg = SLJIT_MEM | (arg & 0xf0);
691			compiler->cache_argw = argw;
692			FAIL_IF(push_inst(compiler, SLL_W | T((arg >> 4) & 0xf) | D(TMP_REG3) | SH_IMM(argw), DR(TMP_REG3)));
693		}
694
695		if (!(flags & WRITE_BACK)) {
696			if (arg == next_arg && argw == (next_argw & 0x3)) {
697				compiler->cache_arg = arg;
698				compiler->cache_argw = argw;
699				FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
700				tmp_ar = DR(TMP_REG3);
701			}
702			else
703				FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | DA(tmp_ar), tmp_ar));
704			return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
705		}
706		FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | D(base), DR(base)));
707		return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
708	}
709
710	if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
711		/* Update only applies if a base register exists. */
712		if (reg_ar == DR(base)) {
713			SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar);
714			if (argw <= SIMM_MAX && argw >= SIMM_MIN) {
715				FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar) | IMM(argw), MOVABLE_INS));
716				if (argw)
717					return push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base));
718				return SLJIT_SUCCESS;
719			}
720			FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
721			reg_ar = DR(TMP_REG1);
722		}
723
724		if (argw <= SIMM_MAX && argw >= SIMM_MIN) {
725			if (argw)
726				FAIL_IF(push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base)));
727		}
728		else {
729			if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
730				if (argw != compiler->cache_argw) {
731					FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
732					compiler->cache_argw = argw;
733				}
734				FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
735			}
736			else {
737				compiler->cache_arg = SLJIT_MEM;
738				compiler->cache_argw = argw;
739				FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw));
740				FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
741			}
742		}
743		return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
744	}
745
746	if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
747		if (argw != compiler->cache_argw) {
748			FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
749			compiler->cache_argw = argw;
750		}
751		return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
752	}
753
754	if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
755		if (argw != compiler->cache_argw)
756			FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
757	}
758	else {
759		compiler->cache_arg = SLJIT_MEM;
760		FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw));
761	}
762	compiler->cache_argw = argw;
763
764	if (!base)
765		return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
766
767	if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) {
768		compiler->cache_arg = arg;
769		FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | D(TMP_REG3), DR(TMP_REG3)));
770		return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
771	}
772
773	FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | DA(tmp_ar), tmp_ar));
774	return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
775}
776
777static SLJIT_INLINE int emit_op_mem(struct sljit_compiler *compiler, int flags, int reg_ar, int arg, sljit_w argw)
778{
779	if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
780		return compiler->error;
781	compiler->cache_arg = 0;
782	compiler->cache_argw = 0;
783	return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
784}
785
786static int emit_op(struct sljit_compiler *compiler, int op, int flags,
787	int dst, sljit_w dstw,
788	int src1, sljit_w src1w,
789	int src2, sljit_w src2w)
790{
791	/* arg1 goes to TMP_REG1 or src reg
792	   arg2 goes to TMP_REG2, imm or src reg
793	   TMP_REG3 can be used for caching
794	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
795	int dst_r = TMP_REG2;
796	int src1_r;
797	sljit_w src2_r = 0;
798	int sugg_src2_r = TMP_REG2;
799
800	compiler->cache_arg = 0;
801	compiler->cache_argw = 0;
802
803	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REG3) {
804		dst_r = dst;
805		flags |= REG_DEST;
806		if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI)
807			sugg_src2_r = dst_r;
808	}
809	else if (dst == SLJIT_UNUSED) {
810		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
811			return SLJIT_SUCCESS;
812		if (GET_FLAGS(op))
813			flags |= UNUSED_DEST;
814	}
815	else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw))
816		flags |= SLOW_DEST;
817
818	if (flags & IMM_OP) {
819		if ((src2 & SLJIT_IMM) && src2w) {
820			if ((!(flags & LOGICAL_OP) && (src2w <= SIMM_MAX && src2w >= SIMM_MIN))
821				|| ((flags & LOGICAL_OP) && !(src2w & ~UIMM_MAX))) {
822				flags |= SRC2_IMM;
823				src2_r = src2w;
824			}
825		}
826		if ((src1 & SLJIT_IMM) && src1w && (flags & CUMULATIVE_OP) && !(flags & SRC2_IMM)) {
827			if ((!(flags & LOGICAL_OP) && (src1w <= SIMM_MAX && src1w >= SIMM_MIN))
828				|| ((flags & LOGICAL_OP) && !(src1w & ~UIMM_MAX))) {
829				flags |= SRC2_IMM;
830				src2_r = src1w;
831
832				/* And swap arguments. */
833				src1 = src2;
834				src1w = src2w;
835				src2 = SLJIT_IMM;
836				/* src2w = src2_r unneeded. */
837			}
838		}
839	}
840
841	/* Source 1. */
842	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= TMP_REG3) {
843		src1_r = src1;
844		flags |= REG1_SOURCE;
845	}
846	else if (src1 & SLJIT_IMM) {
847		if (src1w) {
848			FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w));
849			src1_r = TMP_REG1;
850		}
851		else
852			src1_r = 0;
853	}
854	else {
855		if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w))
856			FAIL_IF(compiler->error);
857		else
858			flags |= SLOW_SRC1;
859		src1_r = TMP_REG1;
860	}
861
862	/* Source 2. */
863	if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REG3) {
864		src2_r = src2;
865		flags |= REG2_SOURCE;
866		if (!(flags & REG_DEST) && GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI)
867			dst_r = src2_r;
868	}
869	else if (src2 & SLJIT_IMM) {
870		if (!(flags & SRC2_IMM)) {
871			if (src2w || (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI)) {
872				FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w));
873				src2_r = sugg_src2_r;
874			}
875			else
876				src2_r = 0;
877		}
878	}
879	else {
880		if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w))
881			FAIL_IF(compiler->error);
882		else
883			flags |= SLOW_SRC2;
884		src2_r = sugg_src2_r;
885	}
886
887	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
888		SLJIT_ASSERT(src2_r == TMP_REG2);
889		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
890			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, src1, src1w));
891			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw));
892		}
893		else {
894			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, src2, src2w));
895			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, dst, dstw));
896		}
897	}
898	else if (flags & SLOW_SRC1)
899		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw));
900	else if (flags & SLOW_SRC2)
901		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w, dst, dstw));
902
903	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
904
905	if (dst & SLJIT_MEM) {
906		if (!(flags & SLOW_DEST)) {
907			getput_arg_fast(compiler, flags, DR(dst_r), dst, dstw);
908			return compiler->error;
909		}
910		return getput_arg(compiler, flags, DR(dst_r), dst, dstw, 0, 0);
911	}
912
913	return SLJIT_SUCCESS;
914}
915
916SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
917{
918	CHECK_ERROR();
919	check_sljit_emit_op0(compiler, op);
920
921	op = GET_OPCODE(op);
922	switch (op) {
923	case SLJIT_BREAKPOINT:
924		return push_inst(compiler, BREAK, UNMOVABLE_INS);
925	case SLJIT_NOP:
926		return push_inst(compiler, NOP, UNMOVABLE_INS);
927	case SLJIT_UMUL:
928	case SLJIT_SMUL:
929		FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_TEMPORARY_REG1) | T(SLJIT_TEMPORARY_REG2), MOVABLE_INS));
930		FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_TEMPORARY_REG1), DR(SLJIT_TEMPORARY_REG1)));
931		return push_inst(compiler, MFHI | D(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2));
932	case SLJIT_UDIV:
933	case SLJIT_SDIV:
934#if !(defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
935		FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
936		FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
937#endif
938		FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_TEMPORARY_REG1) | T(SLJIT_TEMPORARY_REG2), MOVABLE_INS));
939		FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_TEMPORARY_REG1), DR(SLJIT_TEMPORARY_REG1)));
940		return push_inst(compiler, MFHI | D(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2));
941	}
942
943	return SLJIT_SUCCESS;
944}
945
946SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
947	int dst, sljit_w dstw,
948	int src, sljit_w srcw)
949{
950#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
951	#define inp_flags 0
952#endif
953
954	CHECK_ERROR();
955	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
956	ADJUST_LOCAL_OFFSET(dst, dstw);
957	ADJUST_LOCAL_OFFSET(src, srcw);
958
959	SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
960
961	switch (GET_OPCODE(op)) {
962	case SLJIT_MOV:
963		return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
964
965	case SLJIT_MOV_UI:
966		return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
967
968	case SLJIT_MOV_SI:
969		return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
970
971	case SLJIT_MOV_UB:
972		return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw);
973
974	case SLJIT_MOV_SB:
975		return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw);
976
977	case SLJIT_MOV_UH:
978		return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw);
979
980	case SLJIT_MOV_SH:
981		return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw);
982
983	case SLJIT_MOVU:
984		return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
985
986	case SLJIT_MOVU_UI:
987		return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
988
989	case SLJIT_MOVU_SI:
990		return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
991
992	case SLJIT_MOVU_UB:
993		return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw);
994
995	case SLJIT_MOVU_SB:
996		return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw);
997
998	case SLJIT_MOVU_UH:
999		return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw);
1000
1001	case SLJIT_MOVU_SH:
1002		return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw);
1003
1004	case SLJIT_NOT:
1005		return emit_op(compiler, op, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw);
1006
1007	case SLJIT_NEG:
1008		return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), inp_flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
1009
1010	case SLJIT_CLZ:
1011		return emit_op(compiler, op, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw);
1012	}
1013
1014	return SLJIT_SUCCESS;
1015#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
1016	#undef inp_flags
1017#endif
1018}
1019
1020SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
1021	int dst, sljit_w dstw,
1022	int src1, sljit_w src1w,
1023	int src2, sljit_w src2w)
1024{
1025#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
1026	#define inp_flags 0
1027#endif
1028
1029	CHECK_ERROR();
1030	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1031	ADJUST_LOCAL_OFFSET(dst, dstw);
1032	ADJUST_LOCAL_OFFSET(src1, src1w);
1033	ADJUST_LOCAL_OFFSET(src2, src2w);
1034
1035	switch (GET_OPCODE(op)) {
1036	case SLJIT_ADD:
1037	case SLJIT_ADDC:
1038		return emit_op(compiler, op, inp_flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1039
1040	case SLJIT_SUB:
1041	case SLJIT_SUBC:
1042		return emit_op(compiler, op, inp_flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1043
1044	case SLJIT_MUL:
1045		return emit_op(compiler, op, inp_flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
1046
1047	case SLJIT_AND:
1048	case SLJIT_OR:
1049	case SLJIT_XOR:
1050		return emit_op(compiler, op, inp_flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1051
1052	case SLJIT_SHL:
1053	case SLJIT_LSHR:
1054	case SLJIT_ASHR:
1055#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
1056		if (src2 & SLJIT_IMM)
1057			src2w &= 0x1f;
1058#else
1059		if (src2 & SLJIT_IMM)
1060			src2w &= 0x3f;
1061#endif
1062		return emit_op(compiler, op, inp_flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1063	}
1064
1065	return SLJIT_SUCCESS;
1066#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
1067	#undef inp_flags
1068#endif
1069}
1070
1071SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
1072{
1073	check_sljit_get_register_index(reg);
1074	return reg_map[reg];
1075}
1076
1077SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
1078	void *instruction, int size)
1079{
1080	CHECK_ERROR();
1081	check_sljit_emit_op_custom(compiler, instruction, size);
1082	SLJIT_ASSERT(size == 4);
1083
1084	return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS);
1085}
1086
1087/* --------------------------------------------------------------------- */
1088/*  Floating point operators                                             */
1089/* --------------------------------------------------------------------- */
1090
1091SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
1092{
1093#if (defined SLJIT_QEMU && SLJIT_QEMU)
1094	/* Qemu says fir is 0 by default. */
1095	return 1;
1096#elif defined(__GNUC__)
1097	sljit_w fir;
1098	asm ("cfc1 %0, $0" : "=r"(fir));
1099	return (fir >> 22) & 0x1;
1100#else
1101#error "FIR check is not implemented for this architecture"
1102#endif
1103}
1104
1105static int emit_fpu_data_transfer(struct sljit_compiler *compiler, int fpu_reg, int load, int arg, sljit_w argw)
1106{
1107	int hi_reg;
1108
1109	SLJIT_ASSERT(arg & SLJIT_MEM);
1110
1111	/* Fast loads and stores. */
1112	if (!(arg & 0xf0)) {
1113		/* Both for (arg & 0xf) == SLJIT_UNUSED and (arg & 0xf) != SLJIT_UNUSED. */
1114		if (argw <= SIMM_MAX && argw >= SIMM_MIN)
1115			return push_inst(compiler, (load ? LDC1 : SDC1) | S(arg & 0xf) | FT(fpu_reg) | IMM(argw), MOVABLE_INS);
1116	}
1117
1118	if (arg & 0xf0) {
1119		argw &= 0x3;
1120		hi_reg = (arg >> 4) & 0xf;
1121		if (argw) {
1122			FAIL_IF(push_inst(compiler, SLL_W | T(hi_reg) | D(TMP_REG1) | SH_IMM(argw), DR(TMP_REG1)));
1123			hi_reg = TMP_REG1;
1124		}
1125		FAIL_IF(push_inst(compiler, ADDU_W | S(hi_reg) | T(arg & 0xf) | D(TMP_REG1), DR(TMP_REG1)));
1126		return push_inst(compiler, (load ? LDC1 : SDC1) | S(TMP_REG1) | FT(fpu_reg) | IMM(0), MOVABLE_INS);
1127	}
1128
1129	/* Use cache. */
1130	if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN)
1131		return push_inst(compiler, (load ? LDC1 : SDC1) | S(TMP_REG3) | FT(fpu_reg) | IMM(argw - compiler->cache_argw), MOVABLE_INS);
1132
1133	/* Put value to cache. */
1134	compiler->cache_arg = arg;
1135	compiler->cache_argw = argw;
1136
1137	FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw));
1138	if (arg & 0xf)
1139		FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(arg & 0xf) | D(TMP_REG3), DR(TMP_REG3)));
1140	return push_inst(compiler, (load ? LDC1 : SDC1) | S(TMP_REG3) | FT(fpu_reg) | IMM(0), MOVABLE_INS);
1141}
1142
1143SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
1144	int dst, sljit_w dstw,
1145	int src, sljit_w srcw)
1146{
1147	int dst_fr;
1148
1149	CHECK_ERROR();
1150	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
1151
1152	compiler->cache_arg = 0;
1153	compiler->cache_argw = 0;
1154
1155	if (GET_OPCODE(op) == SLJIT_FCMP) {
1156		if (dst > SLJIT_FLOAT_REG4) {
1157			FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, dst, dstw));
1158			dst = TMP_FREG1;
1159		}
1160		if (src > SLJIT_FLOAT_REG4) {
1161			FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src, srcw));
1162			src = TMP_FREG2;
1163		}
1164
1165		/* src and dst are swapped. */
1166		if (op & SLJIT_SET_E) {
1167			FAIL_IF(push_inst(compiler, C_UEQ_D | FT(src) | FS(dst), UNMOVABLE_INS));
1168			FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG));
1169			FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG));
1170			FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG));
1171		}
1172		if (op & SLJIT_SET_S) {
1173			/* Mixing the instructions for the two checks. */
1174			FAIL_IF(push_inst(compiler, C_ULT_D | FT(src) | FS(dst), UNMOVABLE_INS));
1175			FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG));
1176			FAIL_IF(push_inst(compiler, C_ULT_D | FT(dst) | FS(src), UNMOVABLE_INS));
1177			FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG));
1178			FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG));
1179			FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG));
1180			FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG));
1181			FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG));
1182		}
1183		return push_inst(compiler, C_UN_D | FT(src) | FS(dst), FCSR_FCC);
1184	}
1185
1186	dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;
1187
1188	if (src > SLJIT_FLOAT_REG4) {
1189		FAIL_IF(emit_fpu_data_transfer(compiler, dst_fr, 1, src, srcw));
1190		src = dst_fr;
1191	}
1192
1193	switch (op) {
1194		case SLJIT_FMOV:
1195			if (src != dst_fr && dst_fr != TMP_FREG1)
1196				FAIL_IF(push_inst(compiler, MOV_D | FS(src) | FD(dst_fr), MOVABLE_INS));
1197			break;
1198		case SLJIT_FNEG:
1199			FAIL_IF(push_inst(compiler, NEG_D | FS(src) | FD(dst_fr), MOVABLE_INS));
1200			break;
1201		case SLJIT_FABS:
1202			FAIL_IF(push_inst(compiler, ABS_D | FS(src) | FD(dst_fr), MOVABLE_INS));
1203			break;
1204	}
1205
1206	if (dst_fr == TMP_FREG1)
1207		FAIL_IF(emit_fpu_data_transfer(compiler, src, 0, dst, dstw));
1208
1209	return SLJIT_SUCCESS;
1210}
1211
1212SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
1213	int dst, sljit_w dstw,
1214	int src1, sljit_w src1w,
1215	int src2, sljit_w src2w)
1216{
1217	int dst_fr;
1218
1219	CHECK_ERROR();
1220	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1221
1222	compiler->cache_arg = 0;
1223	compiler->cache_argw = 0;
1224
1225	dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;
1226
1227	if (src2 > SLJIT_FLOAT_REG4) {
1228		FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w));
1229		src2 = TMP_FREG2;
1230	}
1231
1232	if (src1 > SLJIT_FLOAT_REG4) {
1233		FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w));
1234		src1 = TMP_FREG1;
1235	}
1236
1237	switch (op) {
1238	case SLJIT_FADD:
1239		FAIL_IF(push_inst(compiler, ADD_D | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
1240		break;
1241
1242	case SLJIT_FSUB:
1243		FAIL_IF(push_inst(compiler, SUB_D | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
1244		break;
1245
1246	case SLJIT_FMUL:
1247		FAIL_IF(push_inst(compiler, MUL_D | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
1248		break;
1249
1250	case SLJIT_FDIV:
1251		FAIL_IF(push_inst(compiler, DIV_D | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
1252		break;
1253	}
1254
1255	if (dst_fr == TMP_FREG1)
1256		FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 0, dst, dstw));
1257
1258	return SLJIT_SUCCESS;
1259}
1260
1261/* --------------------------------------------------------------------- */
1262/*  Other instructions                                                   */
1263/* --------------------------------------------------------------------- */
1264
1265SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw)
1266{
1267	CHECK_ERROR();
1268	check_sljit_emit_fast_enter(compiler, dst, dstw);
1269	ADJUST_LOCAL_OFFSET(dst, dstw);
1270
1271	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
1272		return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst));
1273	else if (dst & SLJIT_MEM)
1274		return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw);
1275	return SLJIT_SUCCESS;
1276}
1277
1278SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw)
1279{
1280	CHECK_ERROR();
1281	check_sljit_emit_fast_return(compiler, src, srcw);
1282	ADJUST_LOCAL_OFFSET(src, srcw);
1283
1284	if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
1285		FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG));
1286	else if (src & SLJIT_MEM)
1287		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
1288	else if (src & SLJIT_IMM)
1289		FAIL_IF(load_immediate(compiler, RETURN_ADDR_REG, srcw));
1290
1291	FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
1292	return push_inst(compiler, NOP, UNMOVABLE_INS);
1293}
1294
1295/* --------------------------------------------------------------------- */
1296/*  Conditional instructions                                             */
1297/* --------------------------------------------------------------------- */
1298
1299SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
1300{
1301	struct sljit_label *label;
1302
1303	CHECK_ERROR_PTR();
1304	check_sljit_emit_label(compiler);
1305
1306	if (compiler->last_label && compiler->last_label->size == compiler->size)
1307		return compiler->last_label;
1308
1309	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
1310	PTR_FAIL_IF(!label);
1311	set_label(label, compiler);
1312	compiler->delay_slot = UNMOVABLE_INS;
1313	return label;
1314}
1315
1316#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
1317#define JUMP_LENGTH	4
1318#else
1319#define JUMP_LENGTH	7
1320#endif
1321
1322#define BR_Z(src) \
1323	inst = BEQ | SA(src) | TA(0) | JUMP_LENGTH; \
1324	flags = IS_BIT26_COND; \
1325	delay_check = src;
1326
1327#define BR_NZ(src) \
1328	inst = BNE | SA(src) | TA(0) | JUMP_LENGTH; \
1329	flags = IS_BIT26_COND; \
1330	delay_check = src;
1331
1332#define BR_T() \
1333	inst = BC1T | JUMP_LENGTH; \
1334	flags = IS_BIT16_COND; \
1335	delay_check = FCSR_FCC;
1336
1337#define BR_F() \
1338	inst = BC1F | JUMP_LENGTH; \
1339	flags = IS_BIT16_COND; \
1340	delay_check = FCSR_FCC;
1341
1342SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
1343{
1344	struct sljit_jump *jump;
1345	sljit_ins inst;
1346	int flags = 0;
1347	int delay_check = UNMOVABLE_INS;
1348
1349	CHECK_ERROR_PTR();
1350	check_sljit_emit_jump(compiler, type);
1351
1352	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1353	PTR_FAIL_IF(!jump);
1354	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1355	type &= 0xff;
1356
1357	switch (type) {
1358	case SLJIT_C_EQUAL:
1359	case SLJIT_C_FLOAT_NOT_EQUAL:
1360		BR_NZ(EQUAL_FLAG);
1361		break;
1362	case SLJIT_C_NOT_EQUAL:
1363	case SLJIT_C_FLOAT_EQUAL:
1364		BR_Z(EQUAL_FLAG);
1365		break;
1366	case SLJIT_C_LESS:
1367	case SLJIT_C_FLOAT_LESS:
1368		BR_Z(ULESS_FLAG);
1369		break;
1370	case SLJIT_C_GREATER_EQUAL:
1371	case SLJIT_C_FLOAT_GREATER_EQUAL:
1372		BR_NZ(ULESS_FLAG);
1373		break;
1374	case SLJIT_C_GREATER:
1375	case SLJIT_C_FLOAT_GREATER:
1376		BR_Z(UGREATER_FLAG);
1377		break;
1378	case SLJIT_C_LESS_EQUAL:
1379	case SLJIT_C_FLOAT_LESS_EQUAL:
1380		BR_NZ(UGREATER_FLAG);
1381		break;
1382	case SLJIT_C_SIG_LESS:
1383		BR_Z(LESS_FLAG);
1384		break;
1385	case SLJIT_C_SIG_GREATER_EQUAL:
1386		BR_NZ(LESS_FLAG);
1387		break;
1388	case SLJIT_C_SIG_GREATER:
1389		BR_Z(GREATER_FLAG);
1390		break;
1391	case SLJIT_C_SIG_LESS_EQUAL:
1392		BR_NZ(GREATER_FLAG);
1393		break;
1394	case SLJIT_C_OVERFLOW:
1395	case SLJIT_C_MUL_OVERFLOW:
1396		BR_Z(OVERFLOW_FLAG);
1397		break;
1398	case SLJIT_C_NOT_OVERFLOW:
1399	case SLJIT_C_MUL_NOT_OVERFLOW:
1400		BR_NZ(OVERFLOW_FLAG);
1401		break;
1402	case SLJIT_C_FLOAT_NAN:
1403		BR_F();
1404		break;
1405	case SLJIT_C_FLOAT_NOT_NAN:
1406		BR_T();
1407		break;
1408	default:
1409		/* Not conditional branch. */
1410		inst = 0;
1411		break;
1412	}
1413
1414	jump->flags |= flags;
1415	if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != delay_check))
1416		jump->flags |= IS_MOVABLE;
1417
1418	if (inst)
1419		PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS));
1420
1421	PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
1422	if (type <= SLJIT_JUMP) {
1423		PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
1424		jump->addr = compiler->size;
1425		PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
1426	} else {
1427		SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
1428		/* Cannot be optimized out if type is >= CALL0. */
1429		jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0);
1430		PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
1431		jump->addr = compiler->size;
1432		/* A NOP if type < CALL1. */
1433		PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_TEMPORARY_REG1) | TA(0) | DA(4), UNMOVABLE_INS));
1434	}
1435	return jump;
1436}
1437
1438#define RESOLVE_IMM1() \
1439	if (src1 & SLJIT_IMM) { \
1440		if (src1w) { \
1441			PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); \
1442			src1 = TMP_REG1; \
1443		} \
1444		else \
1445			src1 = 0; \
1446	}
1447
1448#define RESOLVE_IMM2() \
1449	if (src2 & SLJIT_IMM) { \
1450		if (src2w) { \
1451			PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \
1452			src2 = TMP_REG2; \
1453		} \
1454		else \
1455			src2 = 0; \
1456	}
1457
1458SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, int type,
1459	int src1, sljit_w src1w,
1460	int src2, sljit_w src2w)
1461{
1462	struct sljit_jump *jump;
1463	int flags;
1464	sljit_ins inst;
1465
1466	CHECK_ERROR_PTR();
1467	check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w);
1468	ADJUST_LOCAL_OFFSET(src1, src1w);
1469	ADJUST_LOCAL_OFFSET(src2, src2w);
1470
1471	compiler->cache_arg = 0;
1472	compiler->cache_argw = 0;
1473	flags = ((type & SLJIT_INT_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA;
1474	if (src1 & SLJIT_MEM) {
1475		if (getput_arg_fast(compiler, flags, DR(TMP_REG1), src1, src1w))
1476			PTR_FAIL_IF(compiler->error);
1477		else
1478			PTR_FAIL_IF(getput_arg(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w));
1479		src1 = TMP_REG1;
1480	}
1481	if (src2 & SLJIT_MEM) {
1482		if (getput_arg_fast(compiler, flags, DR(TMP_REG2), src2, src2w))
1483			PTR_FAIL_IF(compiler->error);
1484		else
1485			PTR_FAIL_IF(getput_arg(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0));
1486		src2 = TMP_REG2;
1487	}
1488
1489	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1490	PTR_FAIL_IF(!jump);
1491	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1492	type &= 0xff;
1493
1494	if (type <= SLJIT_C_NOT_EQUAL) {
1495		RESOLVE_IMM1();
1496		RESOLVE_IMM2();
1497		jump->flags |= IS_BIT26_COND;
1498		if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2)))
1499			jump->flags |= IS_MOVABLE;
1500		PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_C_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | JUMP_LENGTH, UNMOVABLE_INS));
1501	}
1502	else if (type >= SLJIT_C_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) {
1503		inst = NOP;
1504		if ((src1 & SLJIT_IMM) && (src1w == 0)) {
1505			RESOLVE_IMM2();
1506			switch (type) {
1507			case SLJIT_C_SIG_LESS:
1508				inst = BLEZ;
1509				jump->flags |= IS_BIT26_COND;
1510				break;
1511			case SLJIT_C_SIG_GREATER_EQUAL:
1512				inst = BGTZ;
1513				jump->flags |= IS_BIT26_COND;
1514				break;
1515			case SLJIT_C_SIG_GREATER:
1516				inst = BGEZ;
1517				jump->flags |= IS_BIT16_COND;
1518				break;
1519			case SLJIT_C_SIG_LESS_EQUAL:
1520				inst = BLTZ;
1521				jump->flags |= IS_BIT16_COND;
1522				break;
1523			}
1524			src1 = src2;
1525		}
1526		else {
1527			RESOLVE_IMM1();
1528			switch (type) {
1529			case SLJIT_C_SIG_LESS:
1530				inst = BGEZ;
1531				jump->flags |= IS_BIT16_COND;
1532				break;
1533			case SLJIT_C_SIG_GREATER_EQUAL:
1534				inst = BLTZ;
1535				jump->flags |= IS_BIT16_COND;
1536				break;
1537			case SLJIT_C_SIG_GREATER:
1538				inst = BLEZ;
1539				jump->flags |= IS_BIT26_COND;
1540				break;
1541			case SLJIT_C_SIG_LESS_EQUAL:
1542				inst = BGTZ;
1543				jump->flags |= IS_BIT26_COND;
1544				break;
1545			}
1546		}
1547		PTR_FAIL_IF(push_inst(compiler, inst | S(src1) | JUMP_LENGTH, UNMOVABLE_INS));
1548	}
1549	else {
1550		if (type == SLJIT_C_LESS || type == SLJIT_C_GREATER_EQUAL || type == SLJIT_C_SIG_LESS || type == SLJIT_C_SIG_GREATER_EQUAL) {
1551			RESOLVE_IMM1();
1552			if ((src2 & SLJIT_IMM) && src2w <= SIMM_MAX && src2w >= SIMM_MIN)
1553				PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1)));
1554			else {
1555				RESOLVE_IMM2();
1556				PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTU : SLT) | S(src1) | T(src2) | D(TMP_REG1), DR(TMP_REG1)));
1557			}
1558			type = (type == SLJIT_C_LESS || type == SLJIT_C_SIG_LESS) ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
1559		}
1560		else {
1561			RESOLVE_IMM2();
1562			if ((src1 & SLJIT_IMM) && src1w <= SIMM_MAX && src1w >= SIMM_MIN)
1563				PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1)));
1564			else {
1565				RESOLVE_IMM1();
1566				PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTU : SLT) | S(src2) | T(src1) | D(TMP_REG1), DR(TMP_REG1)));
1567			}
1568			type = (type == SLJIT_C_GREATER || type == SLJIT_C_SIG_GREATER) ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
1569		}
1570
1571		jump->flags |= IS_BIT26_COND;
1572		PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_C_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | JUMP_LENGTH, UNMOVABLE_INS));
1573	}
1574
1575	PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
1576	PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
1577	jump->addr = compiler->size;
1578	PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
1579	return jump;
1580}
1581
1582#undef RESOLVE_IMM1
1583#undef RESOLVE_IMM2
1584
1585SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, int type,
1586	int src1, sljit_w src1w,
1587	int src2, sljit_w src2w)
1588{
1589	struct sljit_jump *jump;
1590	sljit_ins inst;
1591	int if_true;
1592
1593	CHECK_ERROR_PTR();
1594	check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w);
1595
1596	compiler->cache_arg = 0;
1597	compiler->cache_argw = 0;
1598
1599	if (src1 > SLJIT_FLOAT_REG4) {
1600		PTR_FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w));
1601		src1 = TMP_FREG1;
1602	}
1603	if (src2 > SLJIT_FLOAT_REG4) {
1604		PTR_FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w));
1605		src2 = TMP_FREG2;
1606	}
1607
1608	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1609	PTR_FAIL_IF(!jump);
1610	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1611	jump->flags |= IS_BIT16_COND;
1612	type &= 0xff;
1613
1614	switch (type) {
1615	case SLJIT_C_FLOAT_EQUAL:
1616		inst = C_UEQ_D;
1617		if_true = 1;
1618		break;
1619	case SLJIT_C_FLOAT_NOT_EQUAL:
1620		inst = C_UEQ_D;
1621		if_true = 0;
1622		break;
1623	case SLJIT_C_FLOAT_LESS:
1624		inst = C_ULT_D;
1625		if_true = 1;
1626		break;
1627	case SLJIT_C_FLOAT_GREATER_EQUAL:
1628		inst = C_ULT_D;
1629		if_true = 0;
1630		break;
1631	case SLJIT_C_FLOAT_GREATER:
1632		inst = C_ULE_D;
1633		if_true = 0;
1634		break;
1635	case SLJIT_C_FLOAT_LESS_EQUAL:
1636		inst = C_ULE_D;
1637		if_true = 1;
1638		break;
1639	case SLJIT_C_FLOAT_NAN:
1640		inst = C_UN_D;
1641		if_true = 1;
1642		break;
1643	case SLJIT_C_FLOAT_NOT_NAN:
1644	default: /* Make compilers happy. */
1645		inst = C_UN_D;
1646		if_true = 0;
1647		break;
1648	}
1649
1650	PTR_FAIL_IF(push_inst(compiler, inst | FT(src2) | FS(src1), UNMOVABLE_INS));
1651	/* Intentionally the other opcode. */
1652	PTR_FAIL_IF(push_inst(compiler, (if_true ? BC1F : BC1T) | JUMP_LENGTH, UNMOVABLE_INS));
1653	PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
1654	PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
1655	jump->addr = compiler->size;
1656	PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
1657	return jump;
1658}
1659
1660#undef JUMP_LENGTH
1661#undef BR_Z
1662#undef BR_NZ
1663#undef BR_T
1664#undef BR_F
1665
1666SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
1667{
1668	int src_r = TMP_REG2;
1669	struct sljit_jump *jump = NULL;
1670
1671	CHECK_ERROR();
1672	check_sljit_emit_ijump(compiler, type, src, srcw);
1673	ADJUST_LOCAL_OFFSET(src, srcw);
1674
1675	if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
1676		if (DR(src) != 4)
1677			src_r = src;
1678		else
1679			FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
1680	}
1681
1682	if (type >= SLJIT_CALL0) {
1683		SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
1684		if (src & (SLJIT_IMM | SLJIT_MEM)) {
1685			if (src & SLJIT_IMM)
1686				FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
1687			else {
1688				SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM));
1689				FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
1690			}
1691			FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
1692			/* We need an extra instruction in any case. */
1693			return push_inst(compiler, ADDU_W | S(SLJIT_TEMPORARY_REG1) | TA(0) | DA(4), UNMOVABLE_INS);
1694		}
1695
1696		/* Register input. */
1697		if (type >= SLJIT_CALL1)
1698			FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_TEMPORARY_REG1) | TA(0) | DA(4), 4));
1699		FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
1700		return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | D(PIC_ADDR_REG), UNMOVABLE_INS);
1701	}
1702
1703	if (src & SLJIT_IMM) {
1704		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1705		FAIL_IF(!jump);
1706		set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0));
1707		jump->u.target = srcw;
1708
1709		if (compiler->delay_slot != UNMOVABLE_INS)
1710			jump->flags |= IS_MOVABLE;
1711
1712		FAIL_IF(emit_const(compiler, TMP_REG2, 0));
1713	}
1714	else if (src & SLJIT_MEM)
1715		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
1716
1717	FAIL_IF(push_inst(compiler, JR | S(src_r), UNMOVABLE_INS));
1718	if (jump)
1719		jump->addr = compiler->size;
1720	FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
1721	return SLJIT_SUCCESS;
1722}
1723
1724SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
1725{
1726	int sugg_dst_ar, dst_ar;
1727
1728	CHECK_ERROR();
1729	check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
1730	ADJUST_LOCAL_OFFSET(dst, dstw);
1731
1732	if (dst == SLJIT_UNUSED)
1733		return SLJIT_SUCCESS;
1734
1735	sugg_dst_ar = DR((op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2);
1736
1737	switch (type) {
1738	case SLJIT_C_EQUAL:
1739	case SLJIT_C_NOT_EQUAL:
1740		FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
1741		dst_ar = sugg_dst_ar;
1742		break;
1743	case SLJIT_C_LESS:
1744	case SLJIT_C_GREATER_EQUAL:
1745	case SLJIT_C_FLOAT_LESS:
1746	case SLJIT_C_FLOAT_GREATER_EQUAL:
1747		dst_ar = ULESS_FLAG;
1748		break;
1749	case SLJIT_C_GREATER:
1750	case SLJIT_C_LESS_EQUAL:
1751	case SLJIT_C_FLOAT_GREATER:
1752	case SLJIT_C_FLOAT_LESS_EQUAL:
1753		dst_ar = UGREATER_FLAG;
1754		break;
1755	case SLJIT_C_SIG_LESS:
1756	case SLJIT_C_SIG_GREATER_EQUAL:
1757		dst_ar = LESS_FLAG;
1758		break;
1759	case SLJIT_C_SIG_GREATER:
1760	case SLJIT_C_SIG_LESS_EQUAL:
1761		dst_ar = GREATER_FLAG;
1762		break;
1763	case SLJIT_C_OVERFLOW:
1764	case SLJIT_C_NOT_OVERFLOW:
1765		dst_ar = OVERFLOW_FLAG;
1766		break;
1767	case SLJIT_C_MUL_OVERFLOW:
1768	case SLJIT_C_MUL_NOT_OVERFLOW:
1769		FAIL_IF(push_inst(compiler, SLTIU | SA(OVERFLOW_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
1770		dst_ar = sugg_dst_ar;
1771		type ^= 0x1; /* Flip type bit for the XORI below. */
1772		break;
1773	case SLJIT_C_FLOAT_EQUAL:
1774	case SLJIT_C_FLOAT_NOT_EQUAL:
1775		dst_ar = EQUAL_FLAG;
1776		break;
1777
1778	case SLJIT_C_FLOAT_NAN:
1779	case SLJIT_C_FLOAT_NOT_NAN:
1780		FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar));
1781		FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar));
1782		FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
1783		dst_ar = sugg_dst_ar;
1784		break;
1785
1786	default:
1787		SLJIT_ASSERT_STOP();
1788		dst_ar = sugg_dst_ar;
1789		break;
1790	}
1791
1792	if (type & 0x1) {
1793		FAIL_IF(push_inst(compiler, XORI | SA(dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
1794		dst_ar = sugg_dst_ar;
1795	}
1796
1797	if (GET_OPCODE(op) == SLJIT_OR) {
1798		if (DR(TMP_REG2) != dst_ar)
1799			FAIL_IF(push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
1800		return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, dst, dstw, TMP_REG2, 0);
1801	}
1802
1803	if (dst & SLJIT_MEM)
1804		return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw);
1805
1806	if (sugg_dst_ar != dst_ar)
1807		return push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | DA(sugg_dst_ar), sugg_dst_ar);
1808	return SLJIT_SUCCESS;
1809}
1810
1811SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
1812{
1813	struct sljit_const *const_;
1814	int reg;
1815
1816	CHECK_ERROR_PTR();
1817	check_sljit_emit_const(compiler, dst, dstw, init_value);
1818	ADJUST_LOCAL_OFFSET(dst, dstw);
1819
1820	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
1821	PTR_FAIL_IF(!const_);
1822	set_const(const_, compiler);
1823
1824	reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2;
1825
1826	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
1827
1828	if (dst & SLJIT_MEM)
1829		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
1830	return const_;
1831}
1832