1301301Sdelphij// This file is dual licensed under the MIT and the University of Illinois Open
2275970Scy// Source Licenses. See LICENSE.TXT for details.
3275970Scy
4275970Scy#include "../assembly.h"
5275970Scy
6301301Sdelphij// long double __floatundixf(du_int a);
7275970Scy
8275970Scy#ifdef __x86_64__
9275970Scy
10275970Scy#ifndef __ELF__
11275970Scy.const
12275970Scy#endif
13275970Scy.align 4
14275970Scytwop64: .quad 0x43f0000000000000
15275970Scy
16275970Scy#define REL_ADDR(_a)	(_a)(%rip)
17275970Scy
18275970Scy.text
19275970Scy.align 4
20275970ScyDEFINE_COMPILERRT_FUNCTION(__floatundixf)
21275970Scy	movq	%rdi,	 -8(%rsp)
22275970Scy	fildq	-8(%rsp)
23275970Scy	test	%rdi,		%rdi
24275970Scy	js		1f
25275970Scy	ret
26275970Scy1:	faddl	REL_ADDR(twop64)
27275970Scy	ret
28275970Scy
29275970Scy#endif // __x86_64__
30275970Scy
31275970Scy
32275970Scy/* Branch-free implementation is ever so slightly slower, but more beautiful.
33275970Scy   It is likely superior for inlining, so I kept it around for future reference.
34275970Scy
35275970Scy#ifdef __x86_64__
36275970Scy
37275970Scy.const
38275970Scy.align 4
39275970Scytwop52: .quad 0x4330000000000000
40275970Scytwop84_plus_twop52_neg:
41275970Scy		.quad 0xc530000000100000
42275970Scytwop84: .quad 0x4530000000000000
43275970Scy
44275970Scy#define REL_ADDR(_a)	(_a)(%rip)
45275970Scy
46275970Scy.text
47275970Scy.align 4
48275970ScyDEFINE_COMPILERRT_FUNCTION(__floatundixf)
49275970Scy	movl	%edi,				%esi			// low 32 bits of input
50275970Scy	shrq	$32,				%rdi			// hi 32 bits of input
51275970Scy	orq		REL_ADDR(twop84),	%rdi			// 2^84 + hi (as a double)
52275970Scy	orq		REL_ADDR(twop52),	%rsi			// 2^52 + lo (as a double)
53275970Scy	movq	%rdi,			 -8(%rsp)
54275970Scy	movq	%rsi,			-16(%rsp)
55275970Scy	fldl	REL_ADDR(twop84_plus_twop52_neg)
56275970Scy	faddl	-8(%rsp)	// hi - 2^52 (as double extended, no rounding occurs)
57275970Scy	faddl	-16(%rsp)	// hi + lo (as double extended)
58301301Sdelphij	ret
59275970Scy
60275970Scy#endif // __x86_64__
61275970Scy
62275970Scy*/
63275970Scy