floatundixf.S revision 1.1.1.2
1// This file is dual licensed under the MIT and the University of Illinois Open
2// Source Licenses. See LICENSE.TXT for details.
3
4#include "../assembly.h"
5
6// long double __floatundixf(du_int a);
7
8#ifdef __x86_64__
9
10#ifndef __ELF__
11.const
12#endif
13.balign 4
14twop64: .quad 0x43f0000000000000
15
16#define REL_ADDR(_a)	(_a)(%rip)
17
18.text
19.balign 4
20DEFINE_COMPILERRT_FUNCTION(__floatundixf)
21	movq	%rdi,	 -8(%rsp)
22	fildq	-8(%rsp)
23	test	%rdi,		%rdi
24	js		1f
25	ret
261:	faddl	REL_ADDR(twop64)
27	ret
28END_COMPILERRT_FUNCTION(__floatundixf)
29
30#endif // __x86_64__
31
32
33/* Branch-free implementation is ever so slightly slower, but more beautiful.
34   It is likely superior for inlining, so I kept it around for future reference.
35
36#ifdef __x86_64__
37
38.const
39.balign 4
40twop52: .quad 0x4330000000000000
41twop84_plus_twop52_neg:
42		.quad 0xc530000000100000
43twop84: .quad 0x4530000000000000
44
45#define REL_ADDR(_a)	(_a)(%rip)
46
47.text
48.balign 4
49DEFINE_COMPILERRT_FUNCTION(__floatundixf)
50	movl	%edi,				%esi			// low 32 bits of input
51	shrq	$32,				%rdi			// hi 32 bits of input
52	orq		REL_ADDR(twop84),	%rdi			// 2^84 + hi (as a double)
53	orq		REL_ADDR(twop52),	%rsi			// 2^52 + lo (as a double)
54	movq	%rdi,			 -8(%rsp)
55	movq	%rsi,			-16(%rsp)
56	fldl	REL_ADDR(twop84_plus_twop52_neg)
57	faddl	-8(%rsp)	// hi - 2^52 (as double extended, no rounding occurs)
58	faddl	-16(%rsp)	// hi + lo (as double extended)
59	ret
60END_COMPILERRT_FUNCTION(__floatundixf)
61
62#endif // __x86_64__
63
64*/
65