sljitNativePPC_64.c revision 1.1
1/*
2 *    Stack-less Just-In-Time compiler
3 *
4 *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 *   1. Redistributions of source code must retain the above copyright notice, this list of
10 *      conditions and the following disclaimer.
11 *
12 *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13 *      of conditions and the following disclaimer in the documentation and/or other materials
14 *      provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/* ppc 64-bit arch dependent functions. */
28
29#if defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
30#define ASM_SLJIT_CLZ(src, dst) \
31	__asm__ volatile ( "cntlzd %0, %1" : "=r"(dst) : "r"(src) )
32#elif defined(__xlc__)
33#error "Please enable GCC syntax for inline assembly statements"
34#else
35#error "Must implement count leading zeroes"
36#endif
37
38#define RLDI(dst, src, sh, mb, type) \
39	(HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20))
40
41#define PUSH_RLDICR(reg, shift) \
42	push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1))
43
44static int load_immediate(struct sljit_compiler *compiler, int reg, sljit_w imm)
45{
46	sljit_uw tmp;
47	sljit_uw shift;
48	sljit_uw tmp2;
49	sljit_uw shift2;
50
51	if (imm <= SIMM_MAX && imm >= SIMM_MIN)
52		return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm));
53
54	if (!(imm & ~0xffff))
55		return push_inst(compiler, ORI | S(ZERO_REG) | A(reg) | IMM(imm));
56
57	if (imm <= SLJIT_W(0x7fffffff) && imm >= SLJIT_W(-0x80000000)) {
58		FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16)));
59		return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
60	}
61
62	/* Count leading zeroes. */
63	tmp = (imm >= 0) ? imm : ~imm;
64	ASM_SLJIT_CLZ(tmp, shift);
65	SLJIT_ASSERT(shift > 0);
66	shift--;
67	tmp = (imm << shift);
68
69	if ((tmp & ~0xffff000000000000ul) == 0) {
70		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
71		shift += 15;
72		return PUSH_RLDICR(reg, shift);
73	}
74
75	if ((tmp & ~0xffffffff00000000ul) == 0) {
76		FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(tmp >> 48)));
77		FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32)));
78		shift += 31;
79		return PUSH_RLDICR(reg, shift);
80	}
81
82	/* Cut out the 16 bit from immediate. */
83	shift += 15;
84	tmp2 = imm & ((1ul << (63 - shift)) - 1);
85
86	if (tmp2 <= 0xffff) {
87		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
88		FAIL_IF(PUSH_RLDICR(reg, shift));
89		return push_inst(compiler, ORI | S(reg) | A(reg) | tmp2);
90	}
91
92	if (tmp2 <= 0xffffffff) {
93		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
94		FAIL_IF(PUSH_RLDICR(reg, shift));
95		FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (tmp2 >> 16)));
96		return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS;
97	}
98
99	ASM_SLJIT_CLZ(tmp2, shift2);
100	tmp2 <<= shift2;
101
102	if ((tmp2 & ~0xffff000000000000ul) == 0) {
103		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
104		shift2 += 15;
105		shift += (63 - shift2);
106		FAIL_IF(PUSH_RLDICR(reg, shift));
107		FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (tmp2 >> 48)));
108		return PUSH_RLDICR(reg, shift2);
109	}
110
111	/* The general version. */
112	FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 48)));
113	FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32)));
114	FAIL_IF(PUSH_RLDICR(reg, 31));
115	FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16)));
116	return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm));
117}
118
119/* Simplified mnemonics: clrldi. */
120#define INS_CLEAR_LEFT(dst, src, from) \
121	(RLDICL | S(src) | A(dst) | ((from) << 6) | (1 << 5))
122
123/* Sign extension for integer operations. */
124#define UN_EXTS() \
125	if ((flags & (ALT_SIGN_EXT | REG2_SOURCE)) == (ALT_SIGN_EXT | REG2_SOURCE)) { \
126		FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \
127		src2 = TMP_REG2; \
128	}
129
130#define BIN_EXTS() \
131	if (flags & ALT_SIGN_EXT) { \
132		if (flags & REG1_SOURCE) { \
133			FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \
134			src1 = TMP_REG1; \
135		} \
136		if (flags & REG2_SOURCE) { \
137			FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \
138			src2 = TMP_REG2; \
139		} \
140	}
141
142#define BIN_IMM_EXTS() \
143	if ((flags & (ALT_SIGN_EXT | REG1_SOURCE)) == (ALT_SIGN_EXT | REG1_SOURCE)) { \
144		FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \
145		src1 = TMP_REG1; \
146	}
147
148static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, int flags,
149	int dst, int src1, int src2)
150{
151	switch (op) {
152	case SLJIT_ADD:
153		if (flags & ALT_FORM1) {
154			/* Flags does not set: BIN_IMM_EXTS unnecessary. */
155			SLJIT_ASSERT(src2 == TMP_REG2);
156			return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm);
157		}
158		if (flags & ALT_FORM2) {
159			/* Flags does not set: BIN_IMM_EXTS unnecessary. */
160			SLJIT_ASSERT(src2 == TMP_REG2);
161			return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
162		}
163		if (flags & ALT_FORM3) {
164			SLJIT_ASSERT(src2 == TMP_REG2);
165			BIN_IMM_EXTS();
166			return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm);
167		}
168		if (flags & ALT_FORM4) {
169			/* Flags does not set: BIN_IMM_EXTS unnecessary. */
170			FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)));
171			return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)));
172		}
173		if (!(flags & ALT_SET_FLAGS))
174			return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2));
175		BIN_EXTS();
176		return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
177
178	case SLJIT_ADDC:
179		if (flags & ALT_FORM1) {
180			FAIL_IF(push_inst(compiler, MFXER | S(0)));
181			FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)));
182			return push_inst(compiler, MTXER | S(0));
183		}
184		BIN_EXTS();
185		return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2));
186
187	case SLJIT_SUB:
188		if (flags & ALT_FORM1) {
189			/* Flags does not set: BIN_IMM_EXTS unnecessary. */
190			SLJIT_ASSERT(src2 == TMP_REG2);
191			return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
192		}
193		if (flags & (ALT_FORM2 | ALT_FORM3)) {
194			SLJIT_ASSERT(src2 == TMP_REG2);
195			if (flags & ALT_FORM2)
196				FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm));
197			if (flags & ALT_FORM3)
198				return push_inst(compiler, CMPLI | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm);
199			return SLJIT_SUCCESS;
200		}
201		if (flags & (ALT_FORM4 | ALT_FORM5)) {
202			if (flags & ALT_FORM4)
203				FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
204			if (flags & ALT_FORM5)
205				return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2));
206			return SLJIT_SUCCESS;
207		}
208		if (!(flags & ALT_SET_FLAGS))
209			return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
210		BIN_EXTS();
211		if (flags & ALT_FORM6)
212			FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
213		return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
214
215	case SLJIT_SUBC:
216		if (flags & ALT_FORM1) {
217			FAIL_IF(push_inst(compiler, MFXER | S(0)));
218			FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)));
219			return push_inst(compiler, MTXER | S(0));
220		}
221		BIN_EXTS();
222		return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));
223
224	case SLJIT_MUL:
225		if (flags & ALT_FORM1) {
226			SLJIT_ASSERT(src2 == TMP_REG2);
227			return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm);
228		}
229		BIN_EXTS();
230		if (flags & ALT_FORM2)
231			return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1));
232		return push_inst(compiler, MULLD | OERC(flags) | D(dst) | A(src2) | B(src1));
233
234	case SLJIT_AND:
235		if (flags & ALT_FORM1) {
236			SLJIT_ASSERT(src2 == TMP_REG2);
237			return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm);
238		}
239		if (flags & ALT_FORM2) {
240			SLJIT_ASSERT(src2 == TMP_REG2);
241			return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm);
242		}
243		return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2));
244
245	case SLJIT_OR:
246		if (flags & ALT_FORM1) {
247			SLJIT_ASSERT(src2 == TMP_REG2);
248			return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm);
249		}
250		if (flags & ALT_FORM2) {
251			SLJIT_ASSERT(src2 == TMP_REG2);
252			return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm);
253		}
254		if (flags & ALT_FORM3) {
255			SLJIT_ASSERT(src2 == TMP_REG2);
256			FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm)));
257			return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
258		}
259		return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
260
261	case SLJIT_XOR:
262		if (flags & ALT_FORM1) {
263			SLJIT_ASSERT(src2 == TMP_REG2);
264			return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm);
265		}
266		if (flags & ALT_FORM2) {
267			SLJIT_ASSERT(src2 == TMP_REG2);
268			return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm);
269		}
270		if (flags & ALT_FORM3) {
271			SLJIT_ASSERT(src2 == TMP_REG2);
272			FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm)));
273			return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
274		}
275		return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
276
277	case SLJIT_SHL:
278		if (flags & ALT_FORM1) {
279			SLJIT_ASSERT(src2 == TMP_REG2);
280			if (flags & ALT_FORM2) {
281				compiler->imm &= 0x1f;
282				return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
283			}
284			else {
285				compiler->imm &= 0x3f;
286				return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags));
287			}
288		}
289		if (flags & ALT_FORM2)
290			return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2));
291		return push_inst(compiler, SLD | RC(flags) | S(src1) | A(dst) | B(src2));
292
293	case SLJIT_LSHR:
294		if (flags & ALT_FORM1) {
295			SLJIT_ASSERT(src2 == TMP_REG2);
296			if (flags & ALT_FORM2) {
297				compiler->imm &= 0x1f;
298				return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
299			}
300			else {
301				compiler->imm &= 0x3f;
302				return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags));
303			}
304		}
305		if (flags & ALT_FORM2)
306			return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2));
307		return push_inst(compiler, SRD | RC(flags) | S(src1) | A(dst) | B(src2));
308
309	case SLJIT_ASHR:
310		if (flags & ALT_FORM1) {
311			SLJIT_ASSERT(src2 == TMP_REG2);
312			if (flags & ALT_FORM2) {
313				compiler->imm &= 0x1f;
314				return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11));
315			}
316			else {
317				compiler->imm &= 0x3f;
318				return push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4));
319			}
320		}
321		if (flags & ALT_FORM2)
322			return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2));
323		return push_inst(compiler, SRAD | RC(flags) | S(src1) | A(dst) | B(src2));
324
325	case SLJIT_MOV:
326		SLJIT_ASSERT(src1 == TMP_REG1);
327		if (dst != src2)
328			return push_inst(compiler, OR | S(src2) | A(dst) | B(src2));
329		return SLJIT_SUCCESS;
330
331	case SLJIT_MOV_UI:
332	case SLJIT_MOV_SI:
333		SLJIT_ASSERT(src1 == TMP_REG1);
334		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
335			if (op == SLJIT_MOV_SI)
336				return push_inst(compiler, EXTSW | S(src2) | A(dst));
337			return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0));
338		}
339		else if (dst != src2)
340			SLJIT_ASSERT_STOP();
341		return SLJIT_SUCCESS;
342
343	case SLJIT_MOV_UB:
344	case SLJIT_MOV_SB:
345		SLJIT_ASSERT(src1 == TMP_REG1);
346		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
347			if (op == SLJIT_MOV_SB)
348				return push_inst(compiler, EXTSB | S(src2) | A(dst));
349			return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
350		}
351		else if ((flags & REG_DEST) && op == SLJIT_MOV_SB)
352			return push_inst(compiler, EXTSB | S(src2) | A(dst));
353		else if (dst != src2)
354			SLJIT_ASSERT_STOP();
355		return SLJIT_SUCCESS;
356
357	case SLJIT_MOV_UH:
358	case SLJIT_MOV_SH:
359		SLJIT_ASSERT(src1 == TMP_REG1);
360		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
361			if (op == SLJIT_MOV_SH)
362				return push_inst(compiler, EXTSH | S(src2) | A(dst));
363			return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
364		}
365		else if (dst != src2)
366			SLJIT_ASSERT_STOP();
367		return SLJIT_SUCCESS;
368
369	case SLJIT_NOT:
370		SLJIT_ASSERT(src1 == TMP_REG1);
371		UN_EXTS();
372		return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
373
374	case SLJIT_NEG:
375		SLJIT_ASSERT(src1 == TMP_REG1);
376		UN_EXTS();
377		return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
378
379	case SLJIT_CLZ:
380		SLJIT_ASSERT(src1 == TMP_REG1);
381		if (flags & ALT_FORM1)
382			return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
383		return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst));
384	}
385
386	SLJIT_ASSERT_STOP();
387	return SLJIT_SUCCESS;
388}
389
390static SLJIT_INLINE int emit_const(struct sljit_compiler *compiler, int reg, sljit_w init_value)
391{
392	FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48)));
393	FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32)));
394	FAIL_IF(PUSH_RLDICR(reg, 31));
395	FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16)));
396	return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
397}
398
399SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
400{
401	sljit_ins *inst = (sljit_ins*)addr;
402
403	inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff);
404	inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff);
405	inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
406	inst[4] = (inst[4] & 0xffff0000) | (new_addr & 0xffff);
407	SLJIT_CACHE_FLUSH(inst, inst + 5);
408}
409
410SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
411{
412	sljit_ins *inst = (sljit_ins*)addr;
413
414	inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff);
415	inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff);
416	inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
417	inst[4] = (inst[4] & 0xffff0000) | (new_constant & 0xffff);
418	SLJIT_CACHE_FLUSH(inst, inst + 5);
419}
420