1/*
2 *    Stack-less Just-In-Time compiler
3 *
4 *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 *   1. Redistributions of source code must retain the above copyright notice, this list of
10 *      conditions and the following disclaimer.
11 *
12 *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13 *      of conditions and the following disclaimer in the documentation and/or other materials
14 *      provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
28{
29#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
30	return "ARMv7" SLJIT_CPUINFO;
31#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
32	return "ARMv5" SLJIT_CPUINFO;
33#else
34#error "Internal error: Unknown ARM architecture"
35#endif
36}
37
38/* Last register + 1. */
39#define TMP_REG1	(SLJIT_NO_REGISTERS + 1)
40#define TMP_REG2	(SLJIT_NO_REGISTERS + 2)
41#define TMP_REG3	(SLJIT_NO_REGISTERS + 3)
42#define TMP_PC		(SLJIT_NO_REGISTERS + 4)
43
44#define TMP_FREG1	(SLJIT_FLOAT_REG4 + 1)
45#define TMP_FREG2	(SLJIT_FLOAT_REG4 + 2)
46
47/* In ARM instruction words.
48   Cache lines are usually 32 byte aligned. */
49#define CONST_POOL_ALIGNMENT	8
50#define CONST_POOL_EMPTY	0xffffffff
51
52#define ALIGN_INSTRUCTION(ptr) \
53	(sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1))
54#define MAX_DIFFERENCE(max_diff) \
55	(((max_diff) / (int)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))
56
57/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
58static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
59  0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15
60};
61
62#define RM(rm) (reg_map[rm])
63#define RD(rd) (reg_map[rd] << 12)
64#define RN(rn) (reg_map[rn] << 16)
65
66/* --------------------------------------------------------------------- */
67/*  Instrucion forms                                                     */
68/* --------------------------------------------------------------------- */
69
70/* The instruction includes the AL condition.
71   INST_NAME - CONDITIONAL remove this flag. */
72#define COND_MASK	0xf0000000
73#define CONDITIONAL	0xe0000000
74#define PUSH_POOL	0xff000000
75
76/* DP - Data Processing instruction (use with EMIT_DATA_PROCESS_INS). */
77#define ADC_DP		0x5
78#define ADD_DP		0x4
79#define AND_DP		0x0
80#define B		0xea000000
81#define BIC_DP		0xe
82#define BL		0xeb000000
83#define BLX		0xe12fff30
84#define BX		0xe12fff10
85#define CLZ		0xe16f0f10
86#define CMP_DP		0xa
87#define BKPT		0xe1200070
88#define EOR_DP		0x1
89#define MOV_DP		0xd
90#define MUL		0xe0000090
91#define MVN_DP		0xf
92#define NOP		0xe1a00000
93#define ORR_DP		0xc
94#define PUSH		0xe92d0000
95#define POP		0xe8bd0000
96#define RSB_DP		0x3
97#define RSC_DP		0x7
98#define SBC_DP		0x6
99#define SMULL		0xe0c00090
100#define SUB_DP		0x2
101#define UMULL		0xe0800090
102#define VABS_F64	0xeeb00bc0
103#define VADD_F64	0xee300b00
104#define VCMP_F64	0xeeb40b40
105#define VDIV_F64	0xee800b00
106#define VMOV_F64	0xeeb00b40
107#define VMRS		0xeef1fa10
108#define VMUL_F64	0xee200b00
109#define VNEG_F64	0xeeb10b40
110#define VSTR		0xed000b00
111#define VSUB_F64	0xee300b40
112
113#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
114/* Arm v7 specific instructions. */
115#define MOVW		0xe3000000
116#define MOVT		0xe3400000
117#define SXTB		0xe6af0070
118#define SXTH		0xe6bf0070
119#define UXTB		0xe6ef0070
120#define UXTH		0xe6ff0070
121#endif
122
123#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
124
125static int push_cpool(struct sljit_compiler *compiler)
126{
127	/* Pushing the constant pool into the instruction stream. */
128	sljit_uw* inst;
129	sljit_uw* cpool_ptr;
130	sljit_uw* cpool_end;
131	int i;
132
133	/* The label could point the address after the constant pool. */
134	if (compiler->last_label && compiler->last_label->size == compiler->size)
135		compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
136
137	SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
138	inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
139	FAIL_IF(!inst);
140	compiler->size++;
141	*inst = 0xff000000 | compiler->cpool_fill;
142
143	for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
144		inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
145		FAIL_IF(!inst);
146		compiler->size++;
147		*inst = 0;
148	}
149
150	cpool_ptr = compiler->cpool;
151	cpool_end = cpool_ptr + compiler->cpool_fill;
152	while (cpool_ptr < cpool_end) {
153		inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
154		FAIL_IF(!inst);
155		compiler->size++;
156		*inst = *cpool_ptr++;
157	}
158	compiler->cpool_diff = CONST_POOL_EMPTY;
159	compiler->cpool_fill = 0;
160	return SLJIT_SUCCESS;
161}
162
163static int push_inst(struct sljit_compiler *compiler, sljit_uw inst)
164{
165	sljit_uw* ptr;
166
167	if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
168		FAIL_IF(push_cpool(compiler));
169
170	ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
171	FAIL_IF(!ptr);
172	compiler->size++;
173	*ptr = inst;
174	return SLJIT_SUCCESS;
175}
176
177static int push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
178{
179	sljit_uw* ptr;
180	sljit_uw cpool_index = CPOOL_SIZE;
181	sljit_uw* cpool_ptr;
182	sljit_uw* cpool_end;
183	sljit_ub* cpool_unique_ptr;
184
185	if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
186		FAIL_IF(push_cpool(compiler));
187	else if (compiler->cpool_fill > 0) {
188		cpool_ptr = compiler->cpool;
189		cpool_end = cpool_ptr + compiler->cpool_fill;
190		cpool_unique_ptr = compiler->cpool_unique;
191		do {
192			if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
193				cpool_index = cpool_ptr - compiler->cpool;
194				break;
195			}
196			cpool_ptr++;
197			cpool_unique_ptr++;
198		} while (cpool_ptr < cpool_end);
199	}
200
201	if (cpool_index == CPOOL_SIZE) {
202		/* Must allocate a new entry in the literal pool. */
203		if (compiler->cpool_fill < CPOOL_SIZE) {
204			cpool_index = compiler->cpool_fill;
205			compiler->cpool_fill++;
206		}
207		else {
208			FAIL_IF(push_cpool(compiler));
209			cpool_index = 0;
210			compiler->cpool_fill = 1;
211		}
212	}
213
214	SLJIT_ASSERT((inst & 0xfff) == 0);
215	ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
216	FAIL_IF(!ptr);
217	compiler->size++;
218	*ptr = inst | cpool_index;
219
220	compiler->cpool[cpool_index] = literal;
221	compiler->cpool_unique[cpool_index] = 0;
222	if (compiler->cpool_diff == CONST_POOL_EMPTY)
223		compiler->cpool_diff = compiler->size;
224	return SLJIT_SUCCESS;
225}
226
227static int push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
228{
229	sljit_uw* ptr;
230	if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
231		FAIL_IF(push_cpool(compiler));
232
233	SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
234	ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
235	FAIL_IF(!ptr);
236	compiler->size++;
237	*ptr = inst | compiler->cpool_fill;
238
239	compiler->cpool[compiler->cpool_fill] = literal;
240	compiler->cpool_unique[compiler->cpool_fill] = 1;
241	compiler->cpool_fill++;
242	if (compiler->cpool_diff == CONST_POOL_EMPTY)
243		compiler->cpool_diff = compiler->size;
244	return SLJIT_SUCCESS;
245}
246
247static SLJIT_INLINE int prepare_blx(struct sljit_compiler *compiler)
248{
249	/* Place for at least two instruction (doesn't matter whether the first has a literal). */
250	if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
251		return push_cpool(compiler);
252	return SLJIT_SUCCESS;
253}
254
255static SLJIT_INLINE int emit_blx(struct sljit_compiler *compiler)
256{
257	/* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
258	SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
259	return push_inst(compiler, BLX | RM(TMP_REG1));
260}
261
262static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
263{
264	sljit_uw diff;
265	sljit_uw ind;
266	sljit_uw counter = 0;
267	sljit_uw* clear_const_pool = const_pool;
268	sljit_uw* clear_const_pool_end = const_pool + cpool_size;
269
270	SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
271	/* Set unused flag for all literals in the constant pool.
272	   I.e.: unused literals can belong to branches, which can be encoded as B or BL.
273	   We can "compress" the constant pool by discarding these literals. */
274	while (clear_const_pool < clear_const_pool_end)
275		*clear_const_pool++ = (sljit_uw)(-1);
276
277	while (last_pc_patch < code_ptr) {
278		/* Data transfer instruction with Rn == r15. */
279		if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) {
280			diff = const_pool - last_pc_patch;
281			ind = (*last_pc_patch) & 0xfff;
282
283			/* Must be a load instruction with immediate offset. */
284			SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
285			if ((int)const_pool[ind] < 0) {
286				const_pool[ind] = counter;
287				ind = counter;
288				counter++;
289			}
290			else
291				ind = const_pool[ind];
292
293			SLJIT_ASSERT(diff >= 1);
294			if (diff >= 2 || ind > 0) {
295				diff = (diff + ind - 2) << 2;
296				SLJIT_ASSERT(diff <= 0xfff);
297				*last_pc_patch = (*last_pc_patch & ~0xfff) | diff;
298			}
299			else
300				*last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004;
301		}
302		last_pc_patch++;
303	}
304	return counter;
305}
306
307/* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
308struct future_patch {
309	struct future_patch* next;
310	int index;
311	int value;
312};
313
314static SLJIT_INLINE int resolve_const_pool_index(struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
315{
316	int value;
317	struct future_patch *curr_patch, *prev_patch;
318
319	/* Using the values generated by patch_pc_relative_loads. */
320	if (!*first_patch)
321		value = (int)cpool_start_address[cpool_current_index];
322	else {
323		curr_patch = *first_patch;
324		prev_patch = 0;
325		while (1) {
326			if (!curr_patch) {
327				value = (int)cpool_start_address[cpool_current_index];
328				break;
329			}
330			if ((sljit_uw)curr_patch->index == cpool_current_index) {
331				value = curr_patch->value;
332				if (prev_patch)
333					prev_patch->next = curr_patch->next;
334				else
335					*first_patch = curr_patch->next;
336				SLJIT_FREE(curr_patch);
337				break;
338			}
339			prev_patch = curr_patch;
340			curr_patch = curr_patch->next;
341		}
342	}
343
344	if (value >= 0) {
345		if ((sljit_uw)value > cpool_current_index) {
346			curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch));
347			if (!curr_patch) {
348				while (*first_patch) {
349					curr_patch = *first_patch;
350					*first_patch = (*first_patch)->next;
351					SLJIT_FREE(curr_patch);
352				}
353				return SLJIT_ERR_ALLOC_FAILED;
354			}
355			curr_patch->next = *first_patch;
356			curr_patch->index = value;
357			curr_patch->value = cpool_start_address[value];
358			*first_patch = curr_patch;
359		}
360		cpool_start_address[value] = *buf_ptr;
361	}
362	return SLJIT_SUCCESS;
363}
364
365#else
366
367static int push_inst(struct sljit_compiler *compiler, sljit_uw inst)
368{
369	sljit_uw* ptr;
370
371	ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
372	FAIL_IF(!ptr);
373	compiler->size++;
374	*ptr = inst;
375	return SLJIT_SUCCESS;
376}
377
378static SLJIT_INLINE int emit_imm(struct sljit_compiler *compiler, int reg, sljit_w imm)
379{
380	FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
381	return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
382}
383
384#endif
385
386static SLJIT_INLINE int detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code)
387{
388	sljit_w diff;
389
390	if (jump->flags & SLJIT_REWRITABLE_JUMP)
391		return 0;
392
393#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
394	if (jump->flags & IS_BL)
395		code_ptr--;
396
397	if (jump->flags & JUMP_ADDR)
398		diff = ((sljit_w)jump->u.target - (sljit_w)(code_ptr + 2));
399	else {
400		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
401		diff = ((sljit_w)(code + jump->u.label->size) - (sljit_w)(code_ptr + 2));
402	}
403
404	/* Branch to Thumb code has not been optimized yet. */
405	if (diff & 0x3)
406		return 0;
407
408	diff >>= 2;
409	if (jump->flags & IS_BL) {
410		if (diff <= 0x01ffffff && diff >= -0x02000000) {
411			*code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
412			jump->flags |= PATCH_B;
413			return 1;
414		}
415	}
416	else {
417		if (diff <= 0x01ffffff && diff >= -0x02000000) {
418			*code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
419			jump->flags |= PATCH_B;
420		}
421	}
422#else
423	if (jump->flags & JUMP_ADDR)
424		diff = ((sljit_w)jump->u.target - (sljit_w)code_ptr);
425	else {
426		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
427		diff = ((sljit_w)(code + jump->u.label->size) - (sljit_w)code_ptr);
428	}
429
430	/* Branch to Thumb code has not been optimized yet. */
431	if (diff & 0x3)
432		return 0;
433
434	diff >>= 2;
435	if (diff <= 0x01ffffff && diff >= -0x02000000) {
436		code_ptr -= 2;
437		*code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK);
438		jump->flags |= PATCH_B;
439		return 1;
440	}
441#endif
442	return 0;
443}
444
445static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, int flush)
446{
447#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
448	sljit_uw *ptr = (sljit_uw*)addr;
449	sljit_uw *inst = (sljit_uw*)ptr[0];
450	sljit_uw mov_pc = ptr[1];
451	int bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
452	sljit_w diff = (sljit_w)(((sljit_w)new_addr - (sljit_w)(inst + 2)) >> 2);
453
454	if (diff <= 0x7fffff && diff >= -0x800000) {
455		/* Turn to branch. */
456		if (!bl) {
457			inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
458			if (flush) {
459				SLJIT_CACHE_FLUSH(inst, inst + 1);
460			}
461		} else {
462			inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
463			inst[1] = NOP;
464			if (flush) {
465				SLJIT_CACHE_FLUSH(inst, inst + 2);
466			}
467		}
468	} else {
469		/* Get the position of the constant. */
470		if (mov_pc & (1 << 23))
471			ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
472		else
473			ptr = inst + 1;
474
475		if (*inst != mov_pc) {
476			inst[0] = mov_pc;
477			if (!bl) {
478				if (flush) {
479					SLJIT_CACHE_FLUSH(inst, inst + 1);
480				}
481			} else {
482				inst[1] = BLX | RM(TMP_REG1);
483				if (flush) {
484					SLJIT_CACHE_FLUSH(inst, inst + 2);
485				}
486			}
487		}
488		*ptr = new_addr;
489	}
490#else
491	sljit_uw *inst = (sljit_uw*)addr;
492	SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
493	inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
494	inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
495	if (flush) {
496		SLJIT_CACHE_FLUSH(inst, inst + 2);
497	}
498#endif
499}
500
501static sljit_uw get_immediate(sljit_uw imm);
502
503static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_w new_constant, int flush)
504{
505#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
506	sljit_uw *ptr = (sljit_uw*)addr;
507	sljit_uw *inst = (sljit_uw*)ptr[0];
508	sljit_uw ldr_literal = ptr[1];
509	sljit_uw src2;
510
511	src2 = get_immediate(new_constant);
512	if (src2) {
513		*inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
514		if (flush) {
515			SLJIT_CACHE_FLUSH(inst, inst + 1);
516		}
517		return;
518	}
519
520	src2 = get_immediate(~new_constant);
521	if (src2) {
522		*inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
523		if (flush) {
524			SLJIT_CACHE_FLUSH(inst, inst + 1);
525		}
526		return;
527	}
528
529	if (ldr_literal & (1 << 23))
530		ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
531	else
532		ptr = inst + 1;
533
534	if (*inst != ldr_literal) {
535		*inst = ldr_literal;
536		if (flush) {
537			SLJIT_CACHE_FLUSH(inst, inst + 1);
538		}
539	}
540	*ptr = new_constant;
541#else
542	sljit_uw *inst = (sljit_uw*)addr;
543	SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
544	inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
545	inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
546	if (flush) {
547		SLJIT_CACHE_FLUSH(inst, inst + 2);
548	}
549#endif
550}
551
552SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
553{
554	struct sljit_memory_fragment *buf;
555	sljit_uw *code;
556	sljit_uw *code_ptr;
557	sljit_uw *buf_ptr;
558	sljit_uw *buf_end;
559	sljit_uw size;
560	sljit_uw word_count;
561#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
562	sljit_uw cpool_size;
563	sljit_uw cpool_skip_alignment;
564	sljit_uw cpool_current_index;
565	sljit_uw *cpool_start_address;
566	sljit_uw *last_pc_patch;
567	struct future_patch *first_patch;
568#endif
569
570	struct sljit_label *label;
571	struct sljit_jump *jump;
572	struct sljit_const *const_;
573
574	CHECK_ERROR_PTR();
575	check_sljit_generate_code(compiler);
576	reverse_buf(compiler);
577
578	/* Second code generation pass. */
579#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
580	size = compiler->size + (compiler->patches << 1);
581	if (compiler->cpool_fill > 0)
582		size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
583#else
584	size = compiler->size;
585#endif
586	code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw));
587	PTR_FAIL_WITH_EXEC_IF(code);
588	buf = compiler->buf;
589
590#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
591	cpool_size = 0;
592	cpool_skip_alignment = 0;
593	cpool_current_index = 0;
594	cpool_start_address = NULL;
595	first_patch = NULL;
596	last_pc_patch = code;
597#endif
598
599	code_ptr = code;
600	word_count = 0;
601
602	label = compiler->labels;
603	jump = compiler->jumps;
604	const_ = compiler->consts;
605
606	if (label && label->size == 0) {
607		label->addr = (sljit_uw)code;
608		label->size = 0;
609		label = label->next;
610	}
611
612	do {
613		buf_ptr = (sljit_uw*)buf->memory;
614		buf_end = buf_ptr + (buf->used_size >> 2);
615		do {
616			word_count++;
617#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
618			if (cpool_size > 0) {
619				if (cpool_skip_alignment > 0) {
620					buf_ptr++;
621					cpool_skip_alignment--;
622				}
623				else {
624					if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
625						SLJIT_FREE_EXEC(code);
626						compiler->error = SLJIT_ERR_ALLOC_FAILED;
627						return NULL;
628					}
629					buf_ptr++;
630					if (++cpool_current_index >= cpool_size) {
631						SLJIT_ASSERT(!first_patch);
632						cpool_size = 0;
633						if (label && label->size == word_count) {
634							/* Points after the current instruction. */
635							label->addr = (sljit_uw)code_ptr;
636							label->size = code_ptr - code;
637							label = label->next;
638						}
639					}
640				}
641			}
642			else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
643#endif
644				*code_ptr = *buf_ptr++;
645				/* These structures are ordered by their address. */
646				SLJIT_ASSERT(!label || label->size >= word_count);
647				SLJIT_ASSERT(!jump || jump->addr >= word_count);
648				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
649				if (jump && jump->addr == word_count) {
650#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
651					if (detect_jump_type(jump, code_ptr, code))
652						code_ptr--;
653					jump->addr = (sljit_uw)code_ptr;
654#else
655					jump->addr = (sljit_uw)(code_ptr - 2);
656					if (detect_jump_type(jump, code_ptr, code))
657						code_ptr -= 2;
658#endif
659					jump = jump->next;
660				}
661				if (label && label->size == word_count) {
662					/* code_ptr can be affected above. */
663					label->addr = (sljit_uw)(code_ptr + 1);
664					label->size = (code_ptr + 1) - code;
665					label = label->next;
666				}
667				if (const_ && const_->addr == word_count) {
668#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
669					const_->addr = (sljit_uw)code_ptr;
670#else
671					const_->addr = (sljit_uw)(code_ptr - 1);
672#endif
673					const_ = const_->next;
674				}
675				code_ptr++;
676#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
677			}
678			else {
679				/* Fortunately, no need to shift. */
680				cpool_size = *buf_ptr++ & ~PUSH_POOL;
681				SLJIT_ASSERT(cpool_size > 0);
682				cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
683				cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
684				if (cpool_current_index > 0) {
685					/* Unconditional branch. */
686					*code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
687					code_ptr = cpool_start_address + cpool_current_index;
688				}
689				cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
690				cpool_current_index = 0;
691				last_pc_patch = code_ptr;
692			}
693#endif
694		} while (buf_ptr < buf_end);
695		buf = buf->next;
696	} while (buf);
697
698	SLJIT_ASSERT(!label);
699	SLJIT_ASSERT(!jump);
700	SLJIT_ASSERT(!const_);
701
702#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
703	SLJIT_ASSERT(cpool_size == 0);
704	if (compiler->cpool_fill > 0) {
705		cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
706		cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
707		if (cpool_current_index > 0)
708			code_ptr = cpool_start_address + cpool_current_index;
709
710		buf_ptr = compiler->cpool;
711		buf_end = buf_ptr + compiler->cpool_fill;
712		cpool_current_index = 0;
713		while (buf_ptr < buf_end) {
714			if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
715				SLJIT_FREE_EXEC(code);
716				compiler->error = SLJIT_ERR_ALLOC_FAILED;
717				return NULL;
718			}
719			buf_ptr++;
720			cpool_current_index++;
721		}
722		SLJIT_ASSERT(!first_patch);
723	}
724#endif
725
726	jump = compiler->jumps;
727	while (jump) {
728		buf_ptr = (sljit_uw*)jump->addr;
729
730		if (jump->flags & PATCH_B) {
731			if (!(jump->flags & JUMP_ADDR)) {
732				SLJIT_ASSERT(jump->flags & JUMP_LABEL);
733				SLJIT_ASSERT(((sljit_w)jump->u.label->addr - (sljit_w)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_w)jump->u.label->addr - (sljit_w)(buf_ptr + 2)) >= -0x02000000);
734				*buf_ptr |= (((sljit_w)jump->u.label->addr - (sljit_w)(buf_ptr + 2)) >> 2) & 0x00ffffff;
735			}
736			else {
737				SLJIT_ASSERT(((sljit_w)jump->u.target - (sljit_w)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_w)jump->u.target - (sljit_w)(buf_ptr + 2)) >= -0x02000000);
738				*buf_ptr |= (((sljit_w)jump->u.target - (sljit_w)(buf_ptr + 2)) >> 2) & 0x00ffffff;
739			}
740		}
741		else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
742#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
743			jump->addr = (sljit_uw)code_ptr;
744			code_ptr[0] = (sljit_uw)buf_ptr;
745			code_ptr[1] = *buf_ptr;
746			inline_set_jump_addr((sljit_uw)code_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
747			code_ptr += 2;
748#else
749			inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
750#endif
751		}
752		else {
753#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
754			if (jump->flags & IS_BL)
755				buf_ptr--;
756			if (*buf_ptr & (1 << 23))
757				buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
758			else
759				buf_ptr += 1;
760			*buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
761#else
762			inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
763#endif
764		}
765		jump = jump->next;
766	}
767
768#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
769	const_ = compiler->consts;
770	while (const_) {
771		buf_ptr = (sljit_uw*)const_->addr;
772		const_->addr = (sljit_uw)code_ptr;
773
774		code_ptr[0] = (sljit_uw)buf_ptr;
775		code_ptr[1] = *buf_ptr;
776		if (*buf_ptr & (1 << 23))
777			buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
778		else
779			buf_ptr += 1;
780		/* Set the value again (can be a simple constant). */
781		inline_set_const((sljit_uw)code_ptr, *buf_ptr, 0);
782		code_ptr += 2;
783
784		const_ = const_->next;
785	}
786#endif
787
788	SLJIT_ASSERT(code_ptr - code <= (int)size);
789
790	SLJIT_CACHE_FLUSH(code, code_ptr);
791	compiler->error = SLJIT_ERR_COMPILED;
792	compiler->executable_size = size * sizeof(sljit_uw);
793	return code;
794}
795
796/* emit_op inp_flags.
797   WRITE_BACK must be the first, since it is a flag. */
798#define WRITE_BACK	0x01
799#define ALLOW_IMM	0x02
800#define ALLOW_INV_IMM	0x04
801#define ALLOW_ANY_IMM	(ALLOW_IMM | ALLOW_INV_IMM)
802#define ARG_TEST	0x08
803
804/* Creates an index in data_transfer_insts array. */
805#define WORD_DATA	0x00
806#define BYTE_DATA	0x10
807#define HALF_DATA	0x20
808#define SIGNED_DATA	0x40
809#define LOAD_DATA	0x80
810
811#define EMIT_INSTRUCTION(inst) \
812	FAIL_IF(push_inst(compiler, (inst)))
813
814/* Condition: AL. */
815#define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \
816	(0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2))
817
818static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags,
819	int dst, sljit_w dstw,
820	int src1, sljit_w src1w,
821	int src2, sljit_w src2w);
822
823SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
824{
825	int size;
826	sljit_uw push;
827
828	CHECK_ERROR();
829	check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size);
830
831	compiler->temporaries = temporaries;
832	compiler->saveds = saveds;
833#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
834	compiler->logical_local_size = local_size;
835#endif
836
837	/* Push saved registers, temporary registers
838	   stmdb sp!, {..., lr} */
839	push = PUSH | (1 << 14);
840	if (temporaries >= 5)
841		push |= 1 << 11;
842	if (temporaries >= 4)
843		push |= 1 << 10;
844	if (saveds >= 5)
845		push |= 1 << 8;
846	if (saveds >= 4)
847		push |= 1 << 7;
848	if (saveds >= 3)
849		push |= 1 << 6;
850	if (saveds >= 2)
851		push |= 1 << 5;
852	if (saveds >= 1)
853		push |= 1 << 4;
854	EMIT_INSTRUCTION(push);
855
856	/* Stack must be aligned to 8 bytes: */
857	size = (1 + saveds) * sizeof(sljit_uw);
858	if (temporaries >= 4)
859		size += (temporaries - 3) * sizeof(sljit_uw);
860	local_size += size;
861	local_size = (local_size + 7) & ~7;
862	local_size -= size;
863	compiler->local_size = local_size;
864	if (local_size > 0)
865		FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size));
866
867	if (args >= 1)
868		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG1)));
869	if (args >= 2)
870		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG2)));
871	if (args >= 3)
872		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG3)));
873
874	return SLJIT_SUCCESS;
875}
876
877SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
878{
879	int size;
880
881	CHECK_ERROR_VOID();
882	check_sljit_set_context(compiler, args, temporaries, saveds, local_size);
883
884	compiler->temporaries = temporaries;
885	compiler->saveds = saveds;
886#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
887	compiler->logical_local_size = local_size;
888#endif
889
890	size = (1 + saveds) * sizeof(sljit_uw);
891	if (temporaries >= 4)
892		size += (temporaries - 3) * sizeof(sljit_uw);
893	local_size += size;
894	local_size = (local_size + 7) & ~7;
895	local_size -= size;
896	compiler->local_size = local_size;
897}
898
899SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw)
900{
901	sljit_uw pop;
902
903	CHECK_ERROR();
904	check_sljit_emit_return(compiler, op, src, srcw);
905	ADJUST_LOCAL_OFFSET(src, srcw);
906
907	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
908
909	if (compiler->local_size > 0)
910		FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size));
911
912	pop = POP | (1 << 15);
913	/* Push saved registers, temporary registers
914	   ldmia sp!, {..., pc} */
915	if (compiler->temporaries >= 5)
916		pop |= 1 << 11;
917	if (compiler->temporaries >= 4)
918		pop |= 1 << 10;
919	if (compiler->saveds >= 5)
920		pop |= 1 << 8;
921	if (compiler->saveds >= 4)
922		pop |= 1 << 7;
923	if (compiler->saveds >= 3)
924		pop |= 1 << 6;
925	if (compiler->saveds >= 2)
926		pop |= 1 << 5;
927	if (compiler->saveds >= 1)
928		pop |= 1 << 4;
929
930	return push_inst(compiler, pop);
931}
932
933/* --------------------------------------------------------------------- */
934/*  Operators                                                            */
935/* --------------------------------------------------------------------- */
936
937/* s/l - store/load (1 bit)
938   u/s - signed/unsigned (1 bit)
939   w/b/h/N - word/byte/half/NOT allowed (2 bit)
940   It contans 16 items, but not all are different. */
941
942static sljit_w data_transfer_insts[16] = {
943/* s u w */ 0xe5000000 /* str */,
944/* s u b */ 0xe5400000 /* strb */,
945/* s u h */ 0xe10000b0 /* strh */,
946/* s u N */ 0x00000000 /* not allowed */,
947/* s s w */ 0xe5000000 /* str */,
948/* s s b */ 0xe5400000 /* strb */,
949/* s s h */ 0xe10000b0 /* strh */,
950/* s s N */ 0x00000000 /* not allowed */,
951
952/* l u w */ 0xe5100000 /* ldr */,
953/* l u b */ 0xe5500000 /* ldrb */,
954/* l u h */ 0xe11000b0 /* ldrh */,
955/* l u N */ 0x00000000 /* not allowed */,
956/* l s w */ 0xe5100000 /* ldr */,
957/* l s b */ 0xe11000d0 /* ldrsb */,
958/* l s h */ 0xe11000f0 /* ldrsh */,
959/* l s N */ 0x00000000 /* not allowed */,
960};
961
962#define EMIT_DATA_TRANSFER(type, add, wb, target, base1, base2) \
963	(data_transfer_insts[(type) >> 4] | ((add) << 23) | ((wb) << 21) | (reg_map[target] << 12) | (reg_map[base1] << 16) | (base2))
964/* Normal ldr/str instruction.
965   Type2: ldrsb, ldrh, ldrsh */
966#define IS_TYPE1_TRANSFER(type) \
967	(data_transfer_insts[(type) >> 4] & 0x04000000)
968#define TYPE2_TRANSFER_IMM(imm) \
969	(((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
970
971/* flags: */
972  /* Arguments are swapped. */
973#define ARGS_SWAPPED	0x01
974  /* Inverted immediate. */
975#define INV_IMM		0x02
976  /* Source and destination is register. */
977#define REG_DEST	0x04
978#define REG_SOURCE	0x08
979  /* One instruction is enough. */
980#define FAST_DEST	0x10
981  /* Multiple instructions are required. */
982#define SLOW_DEST	0x20
983/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
984#define SET_FLAGS	(1 << 20)
985/* dst: reg
986   src1: reg
987   src2: reg or imm (if allowed)
988   SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
989#define SRC2_IMM	(1 << 25)
990
991#define EMIT_DATA_PROCESS_INS_AND_RETURN(opcode) \
992	return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)))
993
994#define EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(opcode, dst, src1, src2) \
995	return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, src2))
996
997#define EMIT_SHIFT_INS_AND_RETURN(opcode) \
998	SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \
999	if (compiler->shift_imm != 0x20) { \
1000		SLJIT_ASSERT(src1 == TMP_REG1); \
1001		SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \
1002		if (compiler->shift_imm != 0) \
1003			return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \
1004		return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, reg_map[src2])); \
1005	} \
1006	return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1])));
1007
1008static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, int flags,
1009	int dst, int src1, int src2)
1010{
1011	sljit_w mul_inst;
1012
1013	switch (GET_OPCODE(op)) {
1014	case SLJIT_ADD:
1015		SLJIT_ASSERT(!(flags & INV_IMM));
1016		EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP);
1017
1018	case SLJIT_ADDC:
1019		SLJIT_ASSERT(!(flags & INV_IMM));
1020		EMIT_DATA_PROCESS_INS_AND_RETURN(ADC_DP);
1021
1022	case SLJIT_SUB:
1023		SLJIT_ASSERT(!(flags & INV_IMM));
1024		if (!(flags & ARGS_SWAPPED))
1025			EMIT_DATA_PROCESS_INS_AND_RETURN(SUB_DP);
1026		EMIT_DATA_PROCESS_INS_AND_RETURN(RSB_DP);
1027
1028	case SLJIT_SUBC:
1029		SLJIT_ASSERT(!(flags & INV_IMM));
1030		if (!(flags & ARGS_SWAPPED))
1031			EMIT_DATA_PROCESS_INS_AND_RETURN(SBC_DP);
1032		EMIT_DATA_PROCESS_INS_AND_RETURN(RSC_DP);
1033
1034	case SLJIT_MUL:
1035		SLJIT_ASSERT(!(flags & INV_IMM));
1036		SLJIT_ASSERT(!(src2 & SRC2_IMM));
1037		if (SLJIT_UNLIKELY(op & SLJIT_SET_O))
1038			mul_inst = SMULL | (reg_map[TMP_REG3] << 16) | (reg_map[dst] << 12);
1039		else
1040			mul_inst = MUL | (reg_map[dst] << 16);
1041
1042		if (dst != src2)
1043			FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src1] << 8) | reg_map[src2]));
1044		else if (dst != src1)
1045			FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[src1]));
1046		else {
1047			/* Rm and Rd must not be the same register. */
1048			SLJIT_ASSERT(dst != TMP_REG1);
1049			FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, reg_map[src2])));
1050			FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[TMP_REG1]));
1051		}
1052
1053		if (!(op & SLJIT_SET_O))
1054			return SLJIT_SUCCESS;
1055
1056		/* We need to use TMP_REG3. */
1057		compiler->cache_arg = 0;
1058		compiler->cache_argw = 0;
1059		/* cmp TMP_REG2, dst asr #31. */
1060		return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG3, RM(dst) | 0xfc0));
1061
1062	case SLJIT_AND:
1063		if (!(flags & INV_IMM))
1064			EMIT_DATA_PROCESS_INS_AND_RETURN(AND_DP);
1065		EMIT_DATA_PROCESS_INS_AND_RETURN(BIC_DP);
1066
1067	case SLJIT_OR:
1068		SLJIT_ASSERT(!(flags & INV_IMM));
1069		EMIT_DATA_PROCESS_INS_AND_RETURN(ORR_DP);
1070
1071	case SLJIT_XOR:
1072		SLJIT_ASSERT(!(flags & INV_IMM));
1073		EMIT_DATA_PROCESS_INS_AND_RETURN(EOR_DP);
1074
1075	case SLJIT_SHL:
1076		EMIT_SHIFT_INS_AND_RETURN(0);
1077
1078	case SLJIT_LSHR:
1079		EMIT_SHIFT_INS_AND_RETURN(1);
1080
1081	case SLJIT_ASHR:
1082		EMIT_SHIFT_INS_AND_RETURN(2);
1083
1084	case SLJIT_MOV:
1085		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1086		if (dst != src2) {
1087			if (src2 & SRC2_IMM) {
1088				if (flags & INV_IMM)
1089					EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
1090				EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
1091			}
1092			EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]);
1093		}
1094		return SLJIT_SUCCESS;
1095
1096	case SLJIT_MOV_UB:
1097	case SLJIT_MOV_SB:
1098		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1099		if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
1100#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
1101			if (op == SLJIT_MOV_UB)
1102				return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
1103			EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]));
1104			return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst]));
1105#else
1106			return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2));
1107#endif
1108		}
1109		else if (dst != src2) {
1110			SLJIT_ASSERT(src2 & SRC2_IMM);
1111			if (flags & INV_IMM)
1112				EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
1113			EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
1114		}
1115		return SLJIT_SUCCESS;
1116
1117	case SLJIT_MOV_UH:
1118	case SLJIT_MOV_SH:
1119		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1120		if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
1121#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
1122			EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]));
1123			return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst]));
1124#else
1125			return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2));
1126#endif
1127		}
1128		else if (dst != src2) {
1129			SLJIT_ASSERT(src2 & SRC2_IMM);
1130			if (flags & INV_IMM)
1131				EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
1132			EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
1133		}
1134		return SLJIT_SUCCESS;
1135
1136	case SLJIT_NOT:
1137		if (src2 & SRC2_IMM) {
1138			if (flags & INV_IMM)
1139				EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
1140			EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
1141		}
1142		EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2));
1143
1144	case SLJIT_CLZ:
1145		SLJIT_ASSERT(!(flags & INV_IMM));
1146		SLJIT_ASSERT(!(src2 & SRC2_IMM));
1147		FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
1148		if (flags & SET_FLAGS)
1149			EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM);
1150		return SLJIT_SUCCESS;
1151	}
1152	SLJIT_ASSERT_STOP();
1153	return SLJIT_SUCCESS;
1154}
1155
1156#undef EMIT_DATA_PROCESS_INS_AND_RETURN
1157#undef EMIT_FULL_DATA_PROCESS_INS_AND_RETURN
1158#undef EMIT_SHIFT_INS_AND_RETURN
1159
1160/* Tests whether the immediate can be stored in the 12 bit imm field.
1161   Returns with 0 if not possible. */
1162static sljit_uw get_immediate(sljit_uw imm)
1163{
1164	int rol;
1165
1166	if (imm <= 0xff)
1167		return SRC2_IMM | imm;
1168
1169	if (!(imm & 0xff000000)) {
1170		imm <<= 8;
1171		rol = 8;
1172	}
1173	else {
1174		imm = (imm << 24) | (imm >> 8);
1175		rol = 0;
1176	}
1177
1178	if (!(imm & 0xff000000)) {
1179		imm <<= 8;
1180		rol += 4;
1181	}
1182
1183	if (!(imm & 0xf0000000)) {
1184		imm <<= 4;
1185		rol += 2;
1186	}
1187
1188	if (!(imm & 0xc0000000)) {
1189		imm <<= 2;
1190		rol += 1;
1191	}
1192
1193	if (!(imm & 0x00ffffff))
1194		return SRC2_IMM | (imm >> 24) | (rol << 8);
1195	else
1196		return 0;
1197}
1198
1199#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
1200static int generate_int(struct sljit_compiler *compiler, int reg, sljit_uw imm, int positive)
1201{
1202	sljit_uw mask;
1203	sljit_uw imm1;
1204	sljit_uw imm2;
1205	int rol;
1206
1207	/* Step1: Search a zero byte (8 continous zero bit). */
1208	mask = 0xff000000;
1209	rol = 8;
1210	while(1) {
1211		if (!(imm & mask)) {
1212			/* Rol imm by rol. */
1213			imm = (imm << rol) | (imm >> (32 - rol));
1214			/* Calculate arm rol. */
1215			rol = 4 + (rol >> 1);
1216			break;
1217		}
1218		rol += 2;
1219		mask >>= 2;
1220		if (mask & 0x3) {
1221			/* rol by 8. */
1222			imm = (imm << 8) | (imm >> 24);
1223			mask = 0xff00;
1224			rol = 24;
1225			while (1) {
1226				if (!(imm & mask)) {
1227					/* Rol imm by rol. */
1228					imm = (imm << rol) | (imm >> (32 - rol));
1229					/* Calculate arm rol. */
1230					rol = (rol >> 1) - 8;
1231					break;
1232				}
1233				rol += 2;
1234				mask >>= 2;
1235				if (mask & 0x3)
1236					return 0;
1237			}
1238			break;
1239		}
1240	}
1241
1242	/* The low 8 bit must be zero. */
1243	SLJIT_ASSERT(!(imm & 0xff));
1244
1245	if (!(imm & 0xff000000)) {
1246		imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
1247		imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
1248	}
1249	else if (imm & 0xc0000000) {
1250		imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1251		imm <<= 8;
1252		rol += 4;
1253
1254		if (!(imm & 0xff000000)) {
1255			imm <<= 8;
1256			rol += 4;
1257		}
1258
1259		if (!(imm & 0xf0000000)) {
1260			imm <<= 4;
1261			rol += 2;
1262		}
1263
1264		if (!(imm & 0xc0000000)) {
1265			imm <<= 2;
1266			rol += 1;
1267		}
1268
1269		if (!(imm & 0x00ffffff))
1270			imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1271		else
1272			return 0;
1273	}
1274	else {
1275		if (!(imm & 0xf0000000)) {
1276			imm <<= 4;
1277			rol += 2;
1278		}
1279
1280		if (!(imm & 0xc0000000)) {
1281			imm <<= 2;
1282			rol += 1;
1283		}
1284
1285		imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1286		imm <<= 8;
1287		rol += 4;
1288
1289		if (!(imm & 0xf0000000)) {
1290			imm <<= 4;
1291			rol += 2;
1292		}
1293
1294		if (!(imm & 0xc0000000)) {
1295			imm <<= 2;
1296			rol += 1;
1297		}
1298
1299		if (!(imm & 0x00ffffff))
1300			imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1301		else
1302			return 0;
1303	}
1304
1305	EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1));
1306	EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2));
1307	return 1;
1308}
1309#endif
1310
1311static int load_immediate(struct sljit_compiler *compiler, int reg, sljit_uw imm)
1312{
1313	sljit_uw tmp;
1314
1315#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
1316	if (!(imm & ~0xffff))
1317		return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
1318#endif
1319
1320	/* Create imm by 1 inst. */
1321	tmp = get_immediate(imm);
1322	if (tmp) {
1323		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp));
1324		return SLJIT_SUCCESS;
1325	}
1326
1327	tmp = get_immediate(~imm);
1328	if (tmp) {
1329		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp));
1330		return SLJIT_SUCCESS;
1331	}
1332
1333#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
1334	/* Create imm by 2 inst. */
1335	FAIL_IF(generate_int(compiler, reg, imm, 1));
1336	FAIL_IF(generate_int(compiler, reg, ~imm, 0));
1337
1338	/* Load integer. */
1339	return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm);
1340#else
1341	return emit_imm(compiler, reg, imm);
1342#endif
1343}
1344
1345/* Can perform an operation using at most 1 instruction. */
1346static int getput_arg_fast(struct sljit_compiler *compiler, int inp_flags, int reg, int arg, sljit_w argw)
1347{
1348	sljit_uw imm;
1349
1350	if (arg & SLJIT_IMM) {
1351		imm = get_immediate(argw);
1352		if (imm) {
1353			if (inp_flags & ARG_TEST)
1354				return 1;
1355			EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm));
1356			return -1;
1357		}
1358		imm = get_immediate(~argw);
1359		if (imm) {
1360			if (inp_flags & ARG_TEST)
1361				return 1;
1362			EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm));
1363			return -1;
1364		}
1365		return (inp_flags & ARG_TEST) ? SLJIT_SUCCESS : 0;
1366	}
1367
1368	SLJIT_ASSERT(arg & SLJIT_MEM);
1369
1370	/* Fast loads/stores. */
1371	if (arg & 0xf) {
1372		if (!(arg & 0xf0)) {
1373			if (IS_TYPE1_TRANSFER(inp_flags)) {
1374				if (argw >= 0 && argw <= 0xfff) {
1375					if (inp_flags & ARG_TEST)
1376						return 1;
1377					EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, argw));
1378					return -1;
1379				}
1380				if (argw < 0 && argw >= -0xfff) {
1381					if (inp_flags & ARG_TEST)
1382						return 1;
1383					EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & 0xf, -argw));
1384					return -1;
1385				}
1386			}
1387			else {
1388				if (argw >= 0 && argw <= 0xff) {
1389					if (inp_flags & ARG_TEST)
1390						return 1;
1391					EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, TYPE2_TRANSFER_IMM(argw)));
1392					return -1;
1393				}
1394				if (argw < 0 && argw >= -0xff) {
1395					if (inp_flags & ARG_TEST)
1396						return 1;
1397					argw = -argw;
1398					EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & 0xf, TYPE2_TRANSFER_IMM(argw)));
1399					return -1;
1400				}
1401			}
1402		}
1403		else if ((argw & 0x3) == 0 || IS_TYPE1_TRANSFER(inp_flags)) {
1404			if (inp_flags & ARG_TEST)
1405				return 1;
1406			EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf,
1407				RM((arg >> 4) & 0xf) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7)));
1408			return -1;
1409		}
1410	}
1411
1412	return (inp_flags & ARG_TEST) ? SLJIT_SUCCESS : 0;
1413}
1414
1415/* See getput_arg below.
1416   Note: can_cache is called only for binary operators. Those
1417   operators always uses word arguments without write back. */
1418static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw)
1419{
1420	/* Immediate caching is not supported as it would be an operation on constant arguments. */
1421	if (arg & SLJIT_IMM)
1422		return 0;
1423
1424	/* Always a simple operation. */
1425	if (arg & 0xf0)
1426		return 0;
1427
1428	if (!(arg & 0xf)) {
1429		/* Immediate access. */
1430		if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff))
1431			return 1;
1432		return 0;
1433	}
1434
1435	if (argw <= 0xfffff && argw >= -0xfffff)
1436		return 0;
1437
1438	if (argw == next_argw && (next_arg & SLJIT_MEM))
1439		return 1;
1440
1441	if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff))
1442		return 1;
1443
1444	return 0;
1445}
1446
1447#define GETPUT_ARG_DATA_TRANSFER(add, wb, target, base, imm) \
1448	if (max_delta & 0xf00) \
1449		FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, imm))); \
1450	else \
1451		FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, TYPE2_TRANSFER_IMM(imm))));
1452
1453#define TEST_WRITE_BACK() \
1454	if (inp_flags & WRITE_BACK) { \
1455		tmp_r = arg & 0xf; \
1456		if (reg == tmp_r) { \
1457			/* This can only happen for stores */ \
1458			/* since ldr reg, [reg, ...]! has no meaning */ \
1459			SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \
1460			EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg))); \
1461			reg = TMP_REG3; \
1462		} \
1463	}
1464
1465/* Emit the necessary instructions. See can_cache above. */
1466static int getput_arg(struct sljit_compiler *compiler, int inp_flags, int reg, int arg, sljit_w argw, int next_arg, sljit_w next_argw)
1467{
1468	int tmp_r;
1469	sljit_w max_delta;
1470	sljit_w sign;
1471
1472	if (arg & SLJIT_IMM) {
1473		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1474		return load_immediate(compiler, reg, argw);
1475	}
1476
1477	SLJIT_ASSERT(arg & SLJIT_MEM);
1478
1479	tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3;
1480	max_delta = IS_TYPE1_TRANSFER(inp_flags) ? 0xfff : 0xff;
1481
1482	if ((arg & 0xf) == SLJIT_UNUSED) {
1483		/* Write back is not used. */
1484		if ((compiler->cache_arg & SLJIT_IMM) && (((sljit_uw)argw - (sljit_uw)compiler->cache_argw) <= (sljit_uw)max_delta || ((sljit_uw)compiler->cache_argw - (sljit_uw)argw) <= (sljit_uw)max_delta)) {
1485			if (((sljit_uw)argw - (sljit_uw)compiler->cache_argw) <= (sljit_uw)max_delta) {
1486				sign = 1;
1487				argw = argw - compiler->cache_argw;
1488			}
1489			else {
1490				sign = 0;
1491				argw = compiler->cache_argw - argw;
1492			}
1493
1494			if (max_delta & 0xf00) {
1495				EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, sign, 0, reg, TMP_REG3, argw));
1496			}
1497			else {
1498				EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, sign, 0, reg, TMP_REG3, TYPE2_TRANSFER_IMM(argw)));
1499			}
1500			return SLJIT_SUCCESS;
1501		}
1502
1503		/* With write back, we can create some sophisticated loads, but
1504		   it is hard to decide whether we should convert downward (0s) or upward (1s). */
1505		if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= (sljit_uw)max_delta || (sljit_uw)next_argw - (sljit_uw)argw <= (sljit_uw)max_delta)) {
1506			SLJIT_ASSERT(inp_flags & LOAD_DATA);
1507
1508			compiler->cache_arg = SLJIT_IMM;
1509			compiler->cache_argw = argw;
1510			tmp_r = TMP_REG3;
1511		}
1512
1513		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1514		GETPUT_ARG_DATA_TRANSFER(1, 0, reg, tmp_r, 0);
1515		return SLJIT_SUCCESS;
1516	}
1517
1518	/* Extended imm addressing for [reg+imm] format. */
1519	sign = (max_delta << 8) | 0xff;
1520	if (!(arg & 0xf0) && argw <= sign && argw >= -sign) {
1521		TEST_WRITE_BACK();
1522		if (argw >= 0) {
1523			sign = 1;
1524		}
1525		else {
1526			sign = 0;
1527			argw = -argw;
1528		}
1529
1530		/* Optimization: add is 0x4, sub is 0x2. Sign is 1 for add and 0 for sub. */
1531		if (max_delta & 0xf00)
1532			EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP << sign, 0, tmp_r, arg & 0xf, SRC2_IMM | (argw >> 12) | 0xa00));
1533		else
1534			EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP << sign, 0, tmp_r, arg & 0xf, SRC2_IMM | (argw >> 8) | 0xc00));
1535
1536		argw &= max_delta;
1537		GETPUT_ARG_DATA_TRANSFER(sign, inp_flags & WRITE_BACK, reg, tmp_r, argw);
1538		return SLJIT_SUCCESS;
1539	}
1540
1541	if (arg & 0xf0) {
1542		SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00));
1543		if (inp_flags & WRITE_BACK)
1544			tmp_r = arg & 0xf;
1545		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & 0xf, RM((arg >> 4) & 0xf) | ((argw & 0x3) << 7)));
1546		EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0)));
1547		return SLJIT_SUCCESS;
1548	}
1549
1550	if (compiler->cache_arg == arg && ((sljit_uw)argw - (sljit_uw)compiler->cache_argw) <= (sljit_uw)max_delta) {
1551		SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
1552		argw = argw - compiler->cache_argw;
1553		GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, argw);
1554		return SLJIT_SUCCESS;
1555	}
1556
1557	if (compiler->cache_arg == arg && ((sljit_uw)compiler->cache_argw - (sljit_uw)argw) <= (sljit_uw)max_delta) {
1558		SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
1559		argw = compiler->cache_argw - argw;
1560		GETPUT_ARG_DATA_TRANSFER(0, 0, reg, TMP_REG3, argw);
1561		return SLJIT_SUCCESS;
1562	}
1563
1564	if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) {
1565		TEST_WRITE_BACK();
1566		EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
1567		return SLJIT_SUCCESS;
1568	}
1569
1570	if (argw == next_argw && (next_arg & SLJIT_MEM)) {
1571		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1572		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1573
1574		compiler->cache_arg = SLJIT_IMM;
1575		compiler->cache_argw = argw;
1576
1577		TEST_WRITE_BACK();
1578		EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
1579		return SLJIT_SUCCESS;
1580	}
1581
1582	if (arg == next_arg && !(inp_flags & WRITE_BACK) && ((sljit_uw)argw - (sljit_uw)next_argw <= (sljit_uw)max_delta || (sljit_uw)next_argw - (sljit_uw)argw <= (sljit_uw)max_delta)) {
1583		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1584		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1585		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & 0xf]));
1586
1587		compiler->cache_arg = arg;
1588		compiler->cache_argw = argw;
1589
1590		GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, 0);
1591		return SLJIT_SUCCESS;
1592	}
1593
1594	if ((arg & 0xf) == tmp_r) {
1595		compiler->cache_arg = SLJIT_IMM;
1596		compiler->cache_argw = argw;
1597		tmp_r = TMP_REG3;
1598	}
1599
1600	FAIL_IF(load_immediate(compiler, tmp_r, argw));
1601	EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0)));
1602	return SLJIT_SUCCESS;
1603}
1604
1605static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags,
1606	int dst, sljit_w dstw,
1607	int src1, sljit_w src1w,
1608	int src2, sljit_w src2w)
1609{
1610	/* arg1 goes to TMP_REG1 or src reg
1611	   arg2 goes to TMP_REG2, imm or src reg
1612	   TMP_REG3 can be used for caching
1613	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1614
1615	/* We prefers register and simple consts. */
1616	int dst_r;
1617	int src1_r;
1618	int src2_r = 0;
1619	int sugg_src2_r = TMP_REG2;
1620	int flags = GET_FLAGS(op) ? SET_FLAGS : 0;
1621
1622	compiler->cache_arg = 0;
1623	compiler->cache_argw = 0;
1624
1625	/* Destination check. */
1626	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REG3) {
1627		dst_r = dst;
1628		flags |= REG_DEST;
1629		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
1630			sugg_src2_r = dst_r;
1631	}
1632	else if (dst == SLJIT_UNUSED) {
1633		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
1634			return SLJIT_SUCCESS;
1635		dst_r = TMP_REG2;
1636	}
1637	else {
1638		SLJIT_ASSERT(dst & SLJIT_MEM);
1639		if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
1640			flags |= FAST_DEST;
1641			dst_r = TMP_REG2;
1642		}
1643		else {
1644			flags |= SLOW_DEST;
1645			dst_r = 0;
1646		}
1647	}
1648
1649	/* Source 1. */
1650	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= TMP_REG3)
1651		src1_r = src1;
1652	else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REG3) {
1653		flags |= ARGS_SWAPPED;
1654		src1_r = src2;
1655		src2 = src1;
1656		src2w = src1w;
1657	}
1658	else do { /* do { } while(0) is used because of breaks. */
1659		src1_r = 0;
1660		if ((inp_flags & ALLOW_ANY_IMM) && (src1 & SLJIT_IMM)) {
1661			/* The second check will generate a hit. */
1662			src2_r = get_immediate(src1w);
1663			if (src2_r) {
1664				flags |= ARGS_SWAPPED;
1665				src1 = src2;
1666				src1w = src2w;
1667				break;
1668			}
1669			if (inp_flags & ALLOW_INV_IMM) {
1670				src2_r = get_immediate(~src1w);
1671				if (src2_r) {
1672					flags |= ARGS_SWAPPED | INV_IMM;
1673					src1 = src2;
1674					src1w = src2w;
1675					break;
1676				}
1677			}
1678			if (GET_OPCODE(op) == SLJIT_ADD) {
1679				src2_r = get_immediate(-src1w);
1680				if (src2_r) {
1681					/* Note: ARGS_SWAPPED is intentionally not applied! */
1682					src1 = src2;
1683					src1w = src2w;
1684					op = SLJIT_SUB | GET_ALL_FLAGS(op);
1685					break;
1686				}
1687			}
1688		}
1689
1690		if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
1691			FAIL_IF(compiler->error);
1692			src1_r = TMP_REG1;
1693		}
1694	} while (0);
1695
1696	/* Source 2. */
1697	if (src2_r == 0) {
1698		if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REG3) {
1699			src2_r = src2;
1700			flags |= REG_SOURCE;
1701			if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
1702				dst_r = src2_r;
1703		}
1704		else do { /* do { } while(0) is used because of breaks. */
1705			if ((inp_flags & ALLOW_ANY_IMM) && (src2 & SLJIT_IMM)) {
1706				src2_r = get_immediate(src2w);
1707				if (src2_r)
1708					break;
1709				if (inp_flags & ALLOW_INV_IMM) {
1710					src2_r = get_immediate(~src2w);
1711					if (src2_r) {
1712						flags |= INV_IMM;
1713						break;
1714					}
1715				}
1716				if (GET_OPCODE(op) == SLJIT_ADD) {
1717					src2_r = get_immediate(-src2w);
1718					if (src2_r) {
1719						op = SLJIT_SUB | GET_ALL_FLAGS(op);
1720						flags &= ~ARGS_SWAPPED;
1721						break;
1722					}
1723				}
1724				if (GET_OPCODE(op) == SLJIT_SUB && !(flags & ARGS_SWAPPED)) {
1725					src2_r = get_immediate(-src2w);
1726					if (src2_r) {
1727						op = SLJIT_ADD | GET_ALL_FLAGS(op);
1728						flags &= ~ARGS_SWAPPED;
1729						break;
1730					}
1731				}
1732			}
1733
1734			/* src2_r is 0. */
1735			if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
1736				FAIL_IF(compiler->error);
1737				src2_r = sugg_src2_r;
1738			}
1739		} while (0);
1740	}
1741
1742	/* src1_r, src2_r and dst_r can be zero (=unprocessed) or non-zero.
1743	   If they are zero, they must not be registers. */
1744	if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
1745		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1746			SLJIT_ASSERT(!(flags & ARGS_SWAPPED));
1747			flags |= ARGS_SWAPPED;
1748			FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src2, src2w, src1, src1w));
1749			FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src1, src1w, dst, dstw));
1750		}
1751		else {
1752			FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1753			FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
1754		}
1755		src1_r = TMP_REG1;
1756		src2_r = TMP_REG2;
1757	}
1758	else if (src1_r == 0 && src2_r == 0) {
1759		FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1760		src1_r = TMP_REG1;
1761	}
1762	else if (src1_r == 0 && dst_r == 0) {
1763		FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1764		src1_r = TMP_REG1;
1765	}
1766	else if (src2_r == 0 && dst_r == 0) {
1767		FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
1768		src2_r = sugg_src2_r;
1769	}
1770
1771	if (dst_r == 0)
1772		dst_r = TMP_REG2;
1773
1774	if (src1_r == 0) {
1775		FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
1776		src1_r = TMP_REG1;
1777	}
1778
1779	if (src2_r == 0) {
1780		FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
1781		src2_r = sugg_src2_r;
1782	}
1783
1784	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1785
1786	if (flags & (FAST_DEST | SLOW_DEST)) {
1787		if (flags & FAST_DEST)
1788			FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw));
1789		else
1790			FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0));
1791	}
1792	return SLJIT_SUCCESS;
1793}
1794
1795#ifdef __cplusplus
1796extern "C" {
1797#endif
1798
1799#if defined(__GNUC__)
1800extern unsigned int __aeabi_uidivmod(unsigned numerator, unsigned denominator);
1801extern unsigned int __aeabi_idivmod(unsigned numerator, unsigned denominator);
1802#else
1803#error "Software divmod functions are needed"
1804#endif
1805
1806#ifdef __cplusplus
1807}
1808#endif
1809
1810SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
1811{
1812	CHECK_ERROR();
1813	check_sljit_emit_op0(compiler, op);
1814
1815	op = GET_OPCODE(op);
1816	switch (op) {
1817	case SLJIT_BREAKPOINT:
1818		EMIT_INSTRUCTION(BKPT);
1819		break;
1820	case SLJIT_NOP:
1821		EMIT_INSTRUCTION(NOP);
1822		break;
1823	case SLJIT_UMUL:
1824	case SLJIT_SMUL:
1825#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
1826		return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
1827			| (reg_map[SLJIT_TEMPORARY_REG2] << 16)
1828			| (reg_map[SLJIT_TEMPORARY_REG1] << 12)
1829			| (reg_map[SLJIT_TEMPORARY_REG1] << 8)
1830			| reg_map[SLJIT_TEMPORARY_REG2]);
1831#else
1832		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG2)));
1833		return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
1834			| (reg_map[SLJIT_TEMPORARY_REG2] << 16)
1835			| (reg_map[SLJIT_TEMPORARY_REG1] << 12)
1836			| (reg_map[SLJIT_TEMPORARY_REG1] << 8)
1837			| reg_map[TMP_REG1]);
1838#endif
1839	case SLJIT_UDIV:
1840	case SLJIT_SDIV:
1841		if (compiler->temporaries >= 3)
1842			EMIT_INSTRUCTION(0xe52d2008 /* str r2, [sp, #-8]! */);
1843#if defined(__GNUC__)
1844		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1845			(op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
1846#else
1847#error "Software divmod functions are needed"
1848#endif
1849		if (compiler->temporaries >= 3)
1850			return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */);
1851		return SLJIT_SUCCESS;
1852	}
1853
1854	return SLJIT_SUCCESS;
1855}
1856
1857SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
1858	int dst, sljit_w dstw,
1859	int src, sljit_w srcw)
1860{
1861	CHECK_ERROR();
1862	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1863	ADJUST_LOCAL_OFFSET(dst, dstw);
1864	ADJUST_LOCAL_OFFSET(src, srcw);
1865
1866	switch (GET_OPCODE(op)) {
1867	case SLJIT_MOV:
1868	case SLJIT_MOV_UI:
1869	case SLJIT_MOV_SI:
1870		return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
1871
1872	case SLJIT_MOV_UB:
1873		return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw);
1874
1875	case SLJIT_MOV_SB:
1876		return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw);
1877
1878	case SLJIT_MOV_UH:
1879		return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw);
1880
1881	case SLJIT_MOV_SH:
1882		return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw);
1883
1884	case SLJIT_MOVU:
1885	case SLJIT_MOVU_UI:
1886	case SLJIT_MOVU_SI:
1887		return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
1888
1889	case SLJIT_MOVU_UB:
1890		return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw);
1891
1892	case SLJIT_MOVU_SB:
1893		return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw);
1894
1895	case SLJIT_MOVU_UH:
1896		return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw);
1897
1898	case SLJIT_MOVU_SH:
1899		return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw);
1900
1901	case SLJIT_NOT:
1902		return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
1903
1904	case SLJIT_NEG:
1905#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
1906		compiler->skip_checks = 1;
1907#endif
1908		return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw);
1909
1910	case SLJIT_CLZ:
1911		return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
1912	}
1913
1914	return SLJIT_SUCCESS;
1915}
1916
1917SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
1918	int dst, sljit_w dstw,
1919	int src1, sljit_w src1w,
1920	int src2, sljit_w src2w)
1921{
1922	CHECK_ERROR();
1923	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1924	ADJUST_LOCAL_OFFSET(dst, dstw);
1925	ADJUST_LOCAL_OFFSET(src1, src1w);
1926	ADJUST_LOCAL_OFFSET(src2, src2w);
1927
1928	switch (GET_OPCODE(op)) {
1929	case SLJIT_ADD:
1930	case SLJIT_ADDC:
1931	case SLJIT_SUB:
1932	case SLJIT_SUBC:
1933	case SLJIT_OR:
1934	case SLJIT_XOR:
1935		return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w);
1936
1937	case SLJIT_MUL:
1938		return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
1939
1940	case SLJIT_AND:
1941		return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
1942
1943	case SLJIT_SHL:
1944	case SLJIT_LSHR:
1945	case SLJIT_ASHR:
1946		if (src2 & SLJIT_IMM) {
1947			compiler->shift_imm = src2w & 0x1f;
1948			return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
1949		}
1950		else {
1951			compiler->shift_imm = 0x20;
1952			return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
1953		}
1954	}
1955
1956	return SLJIT_SUCCESS;
1957}
1958
1959SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
1960{
1961	check_sljit_get_register_index(reg);
1962	return reg_map[reg];
1963}
1964
1965SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
1966	void *instruction, int size)
1967{
1968	CHECK_ERROR();
1969	check_sljit_emit_op_custom(compiler, instruction, size);
1970	SLJIT_ASSERT(size == 4);
1971
1972	return push_inst(compiler, *(sljit_uw*)instruction);
1973}
1974
1975/* --------------------------------------------------------------------- */
1976/*  Floating point operators                                             */
1977/* --------------------------------------------------------------------- */
1978
1979#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
1980
1981/* 0 - no fpu
1982   1 - vfp */
1983static int arm_fpu_type = -1;
1984
1985static void init_compiler()
1986{
1987	if (arm_fpu_type != -1)
1988		return;
1989
1990	/* TODO: Only the OS can help to determine the correct fpu type. */
1991	arm_fpu_type = 1;
1992}
1993
1994SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
1995{
1996	if (arm_fpu_type == -1)
1997		init_compiler();
1998	return arm_fpu_type;
1999}
2000
2001#else
2002
2003#define arm_fpu_type 1
2004
2005SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
2006{
2007	/* Always available. */
2008	return 1;
2009}
2010
2011#endif
2012
2013#define EMIT_FPU_DATA_TRANSFER(add, load, base, freg, offs) \
2014	(VSTR | ((add) << 23) | ((load) << 20) | (reg_map[base] << 16) | (freg << 12) | (offs))
2015#define EMIT_FPU_OPERATION(opcode, dst, src1, src2) \
2016	((opcode) | ((dst) << 12) | (src1) | ((src2) << 16))
2017
2018static int emit_fpu_data_transfer(struct sljit_compiler *compiler, int fpu_reg, int load, int arg, sljit_w argw)
2019{
2020	SLJIT_ASSERT(arg & SLJIT_MEM);
2021
2022	/* Fast loads and stores. */
2023	if ((arg & 0xf) && !(arg & 0xf0) && (argw & 0x3) == 0) {
2024		if (argw >= 0 && argw <= 0x3ff) {
2025			EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(1, load, arg & 0xf, fpu_reg, argw >> 2));
2026			return SLJIT_SUCCESS;
2027		}
2028		if (argw < 0 && argw >= -0x3ff) {
2029			EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(0, load, arg & 0xf, fpu_reg, (-argw) >> 2));
2030			return SLJIT_SUCCESS;
2031		}
2032		if (argw >= 0 && argw <= 0x3ffff) {
2033			SLJIT_ASSERT(get_immediate(argw & 0x3fc00));
2034			EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & 0xf, get_immediate(argw & 0x3fc00)));
2035			argw &= 0x3ff;
2036			EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(1, load, TMP_REG1, fpu_reg, argw >> 2));
2037			return SLJIT_SUCCESS;
2038		}
2039		if (argw < 0 && argw >= -0x3ffff) {
2040			argw = -argw;
2041			SLJIT_ASSERT(get_immediate(argw & 0x3fc00));
2042			EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & 0xf, get_immediate(argw & 0x3fc00)));
2043			argw &= 0x3ff;
2044			EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(0, load, TMP_REG1, fpu_reg, argw >> 2));
2045			return SLJIT_SUCCESS;
2046		}
2047	}
2048
2049	if (arg & 0xf0) {
2050		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & 0xf, RM((arg >> 4) & 0xf) | ((argw & 0x3) << 7)));
2051		EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(1, load, TMP_REG1, fpu_reg, 0));
2052		return SLJIT_SUCCESS;
2053	}
2054
2055	if (compiler->cache_arg == arg && ((argw - compiler->cache_argw) & 0x3) == 0) {
2056		if (((sljit_uw)argw - (sljit_uw)compiler->cache_argw) <= 0x3ff) {
2057			EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(1, load, TMP_REG3, fpu_reg, (argw - compiler->cache_argw) >> 2));
2058			return SLJIT_SUCCESS;
2059		}
2060		if (((sljit_uw)compiler->cache_argw - (sljit_uw)argw) <= 0x3ff) {
2061			EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(0, load, TMP_REG3, fpu_reg, (compiler->cache_argw - argw) >> 2));
2062			return SLJIT_SUCCESS;
2063		}
2064	}
2065
2066	compiler->cache_arg = arg;
2067	compiler->cache_argw = argw;
2068	if (arg & 0xf) {
2069		FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
2070		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & 0xf, reg_map[TMP_REG1]));
2071	}
2072	else
2073		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
2074
2075	EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(1, load, TMP_REG3, fpu_reg, 0));
2076	return SLJIT_SUCCESS;
2077}
2078
2079SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2080	int dst, sljit_w dstw,
2081	int src, sljit_w srcw)
2082{
2083	int dst_freg;
2084
2085	CHECK_ERROR();
2086	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2087
2088	compiler->cache_arg = 0;
2089	compiler->cache_argw = 0;
2090
2091	if (GET_OPCODE(op) == SLJIT_FCMP) {
2092		if (dst > SLJIT_FLOAT_REG4) {
2093			FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, dst, dstw));
2094			dst = TMP_FREG1;
2095		}
2096		if (src > SLJIT_FLOAT_REG4) {
2097			FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src, srcw));
2098			src = TMP_FREG2;
2099		}
2100		EMIT_INSTRUCTION(VCMP_F64 | (dst << 12) | src);
2101		EMIT_INSTRUCTION(VMRS);
2102		return SLJIT_SUCCESS;
2103	}
2104
2105	dst_freg = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;
2106
2107	if (src > SLJIT_FLOAT_REG4) {
2108		FAIL_IF(emit_fpu_data_transfer(compiler, dst_freg, 1, src, srcw));
2109		src = dst_freg;
2110	}
2111
2112	switch (op) {
2113		case SLJIT_FMOV:
2114			if (src != dst_freg && dst_freg != TMP_FREG1)
2115				EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F64, dst_freg, src, 0));
2116			break;
2117		case SLJIT_FNEG:
2118			EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F64, dst_freg, src, 0));
2119			break;
2120		case SLJIT_FABS:
2121			EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F64, dst_freg, src, 0));
2122			break;
2123	}
2124
2125	if (dst_freg == TMP_FREG1)
2126		FAIL_IF(emit_fpu_data_transfer(compiler, src, 0, dst, dstw));
2127
2128	return SLJIT_SUCCESS;
2129}
2130
2131SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2132	int dst, sljit_w dstw,
2133	int src1, sljit_w src1w,
2134	int src2, sljit_w src2w)
2135{
2136	int dst_freg;
2137
2138	CHECK_ERROR();
2139	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2140
2141	compiler->cache_arg = 0;
2142	compiler->cache_argw = 0;
2143
2144	dst_freg = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;
2145
2146	if (src2 > SLJIT_FLOAT_REG4) {
2147		FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w));
2148		src2 = TMP_FREG2;
2149	}
2150
2151	if (src1 > SLJIT_FLOAT_REG4) {
2152		FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w));
2153		src1 = TMP_FREG1;
2154	}
2155
2156	switch (op) {
2157	case SLJIT_FADD:
2158		EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F64, dst_freg, src2, src1));
2159		break;
2160
2161	case SLJIT_FSUB:
2162		EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F64, dst_freg, src2, src1));
2163		break;
2164
2165	case SLJIT_FMUL:
2166		EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F64, dst_freg, src2, src1));
2167		break;
2168
2169	case SLJIT_FDIV:
2170		EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F64, dst_freg, src2, src1));
2171		break;
2172	}
2173
2174	if (dst_freg == TMP_FREG1)
2175		FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 0, dst, dstw));
2176
2177	return SLJIT_SUCCESS;
2178}
2179
2180/* --------------------------------------------------------------------- */
2181/*  Other instructions                                                   */
2182/* --------------------------------------------------------------------- */
2183
2184SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw)
2185{
2186	CHECK_ERROR();
2187	check_sljit_emit_fast_enter(compiler, dst, dstw);
2188	ADJUST_LOCAL_OFFSET(dst, dstw);
2189
2190	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
2191		return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG3)));
2192	else if (dst & SLJIT_MEM) {
2193		if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw))
2194			return compiler->error;
2195		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3)));
2196		compiler->cache_arg = 0;
2197		compiler->cache_argw = 0;
2198		return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0);
2199	}
2200
2201	return SLJIT_SUCCESS;
2202}
2203
2204SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw)
2205{
2206	CHECK_ERROR();
2207	check_sljit_emit_fast_return(compiler, src, srcw);
2208	ADJUST_LOCAL_OFFSET(src, srcw);
2209
2210	if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
2211		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src)));
2212	else if (src & SLJIT_MEM) {
2213		if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw))
2214			FAIL_IF(compiler->error);
2215		else {
2216			compiler->cache_arg = 0;
2217			compiler->cache_argw = 0;
2218			FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0));
2219			EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2)));
2220		}
2221	}
2222	else if (src & SLJIT_IMM)
2223		FAIL_IF(load_immediate(compiler, TMP_REG3, srcw));
2224	return push_inst(compiler, BLX | RM(TMP_REG3));
2225}
2226
2227/* --------------------------------------------------------------------- */
2228/*  Conditional instructions                                             */
2229/* --------------------------------------------------------------------- */
2230
2231static sljit_uw get_cc(int type)
2232{
2233	switch (type) {
2234	case SLJIT_C_EQUAL:
2235	case SLJIT_C_MUL_NOT_OVERFLOW:
2236	case SLJIT_C_FLOAT_EQUAL:
2237		return 0x00000000;
2238
2239	case SLJIT_C_NOT_EQUAL:
2240	case SLJIT_C_MUL_OVERFLOW:
2241	case SLJIT_C_FLOAT_NOT_EQUAL:
2242		return 0x10000000;
2243
2244	case SLJIT_C_LESS:
2245	case SLJIT_C_FLOAT_LESS:
2246		return 0x30000000;
2247
2248	case SLJIT_C_GREATER_EQUAL:
2249	case SLJIT_C_FLOAT_GREATER_EQUAL:
2250		return 0x20000000;
2251
2252	case SLJIT_C_GREATER:
2253	case SLJIT_C_FLOAT_GREATER:
2254		return 0x80000000;
2255
2256	case SLJIT_C_LESS_EQUAL:
2257	case SLJIT_C_FLOAT_LESS_EQUAL:
2258		return 0x90000000;
2259
2260	case SLJIT_C_SIG_LESS:
2261		return 0xb0000000;
2262
2263	case SLJIT_C_SIG_GREATER_EQUAL:
2264		return 0xa0000000;
2265
2266	case SLJIT_C_SIG_GREATER:
2267		return 0xc0000000;
2268
2269	case SLJIT_C_SIG_LESS_EQUAL:
2270		return 0xd0000000;
2271
2272	case SLJIT_C_OVERFLOW:
2273	case SLJIT_C_FLOAT_NAN:
2274		return 0x60000000;
2275
2276	case SLJIT_C_NOT_OVERFLOW:
2277	case SLJIT_C_FLOAT_NOT_NAN:
2278		return 0x70000000;
2279
2280	default: /* SLJIT_JUMP */
2281		return 0xe0000000;
2282	}
2283}
2284
2285SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2286{
2287	struct sljit_label *label;
2288
2289	CHECK_ERROR_PTR();
2290	check_sljit_emit_label(compiler);
2291
2292	if (compiler->last_label && compiler->last_label->size == compiler->size)
2293		return compiler->last_label;
2294
2295	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2296	PTR_FAIL_IF(!label);
2297	set_label(label, compiler);
2298	return label;
2299}
2300
2301SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
2302{
2303	struct sljit_jump *jump;
2304
2305	CHECK_ERROR_PTR();
2306	check_sljit_emit_jump(compiler, type);
2307
2308	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2309	PTR_FAIL_IF(!jump);
2310	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2311	type &= 0xff;
2312
2313	/* In ARM, we don't need to touch the arguments. */
2314#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
2315	if (type >= SLJIT_FAST_CALL)
2316		PTR_FAIL_IF(prepare_blx(compiler));
2317	PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0,
2318		type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
2319
2320	if (jump->flags & SLJIT_REWRITABLE_JUMP) {
2321		jump->addr = compiler->size;
2322		compiler->patches++;
2323	}
2324
2325	if (type >= SLJIT_FAST_CALL) {
2326		jump->flags |= IS_BL;
2327		PTR_FAIL_IF(emit_blx(compiler));
2328	}
2329
2330	if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
2331		jump->addr = compiler->size;
2332#else
2333	if (type >= SLJIT_FAST_CALL)
2334		jump->flags |= IS_BL;
2335	PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
2336	PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type)));
2337	jump->addr = compiler->size;
2338#endif
2339	return jump;
2340}
2341
2342SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
2343{
2344	struct sljit_jump *jump;
2345
2346	CHECK_ERROR();
2347	check_sljit_emit_ijump(compiler, type, src, srcw);
2348	ADJUST_LOCAL_OFFSET(src, srcw);
2349
2350	/* In ARM, we don't need to touch the arguments. */
2351	if (src & SLJIT_IMM) {
2352		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2353		FAIL_IF(!jump);
2354		set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
2355		jump->u.target = srcw;
2356
2357#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
2358		if (type >= SLJIT_FAST_CALL)
2359			FAIL_IF(prepare_blx(compiler));
2360		FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
2361		if (type >= SLJIT_FAST_CALL)
2362			FAIL_IF(emit_blx(compiler));
2363#else
2364		FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
2365		FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
2366#endif
2367		jump->addr = compiler->size;
2368	}
2369	else {
2370		if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
2371			return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
2372
2373		SLJIT_ASSERT(src & SLJIT_MEM);
2374		FAIL_IF(emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2375		return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2));
2376	}
2377
2378	return SLJIT_SUCCESS;
2379}
2380
2381SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
2382{
2383	int reg;
2384	sljit_uw cc;
2385
2386	CHECK_ERROR();
2387	check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
2388	ADJUST_LOCAL_OFFSET(dst, dstw);
2389
2390	if (dst == SLJIT_UNUSED)
2391		return SLJIT_SUCCESS;
2392
2393	cc = get_cc(type);
2394	if (GET_OPCODE(op) == SLJIT_OR) {
2395		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
2396			EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ORR_DP, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc);
2397			if (op & SLJIT_SET_E)
2398				return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst)));
2399			return SLJIT_SUCCESS;
2400		}
2401
2402		EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, SRC2_IMM | 0));
2403		EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc);
2404#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2405		compiler->skip_checks = 1;
2406#endif
2407		return emit_op(compiler, op, ALLOW_IMM, dst, dstw, TMP_REG1, 0, dst, dstw);
2408	}
2409
2410	reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2;
2411
2412	EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, SRC2_IMM | 0));
2413	EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc);
2414
2415	if (reg == TMP_REG2)
2416		return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2417	return SLJIT_SUCCESS;
2418}
2419
2420SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
2421{
2422	struct sljit_const *const_;
2423	int reg;
2424
2425	CHECK_ERROR_PTR();
2426	check_sljit_emit_const(compiler, dst, dstw, init_value);
2427	ADJUST_LOCAL_OFFSET(dst, dstw);
2428
2429	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2430	PTR_FAIL_IF(!const_);
2431
2432	reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2;
2433
2434#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
2435	PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), init_value));
2436	compiler->patches++;
2437#else
2438	PTR_FAIL_IF(emit_imm(compiler, reg, init_value));
2439#endif
2440	set_const(const_, compiler);
2441
2442	if (reg == TMP_REG2 && dst != SLJIT_UNUSED)
2443		if (emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, TMP_REG2, 0))
2444			return NULL;
2445	return const_;
2446}
2447
2448SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2449{
2450	inline_set_jump_addr(addr, new_addr, 1);
2451}
2452
2453SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
2454{
2455	inline_set_const(addr, new_constant, 1);
2456}
2457