1301301Sdelphij// This file is dual licensed under the MIT and the University of Illinois Open 2275970Scy// Source Licenses. See LICENSE.TXT for details. 3275970Scy 4275970Scy#include "../assembly.h" 5275970Scy 6301301Sdelphij// long double __floatundixf(du_int a); 7275970Scy 8275970Scy#ifdef __x86_64__ 9275970Scy 10275970Scy#ifndef __ELF__ 11275970Scy.const 12275970Scy#endif 13275970Scy.align 4 14275970Scytwop64: .quad 0x43f0000000000000 15275970Scy 16275970Scy#define REL_ADDR(_a) (_a)(%rip) 17275970Scy 18275970Scy.text 19275970Scy.align 4 20275970ScyDEFINE_COMPILERRT_FUNCTION(__floatundixf) 21275970Scy movq %rdi, -8(%rsp) 22275970Scy fildq -8(%rsp) 23275970Scy test %rdi, %rdi 24275970Scy js 1f 25275970Scy ret 26275970Scy1: faddl REL_ADDR(twop64) 27275970Scy ret 28275970Scy 29275970Scy#endif // __x86_64__ 30275970Scy 31275970Scy 32275970Scy/* Branch-free implementation is ever so slightly slower, but more beautiful. 33275970Scy It is likely superior for inlining, so I kept it around for future reference. 34275970Scy 35275970Scy#ifdef __x86_64__ 36275970Scy 37275970Scy.const 38275970Scy.align 4 39275970Scytwop52: .quad 0x4330000000000000 40275970Scytwop84_plus_twop52_neg: 41275970Scy .quad 0xc530000000100000 42275970Scytwop84: .quad 0x4530000000000000 43275970Scy 44275970Scy#define REL_ADDR(_a) (_a)(%rip) 45275970Scy 46275970Scy.text 47275970Scy.align 4 48275970ScyDEFINE_COMPILERRT_FUNCTION(__floatundixf) 49275970Scy movl %edi, %esi // low 32 bits of input 50275970Scy shrq $32, %rdi // hi 32 bits of input 51275970Scy orq REL_ADDR(twop84), %rdi // 2^84 + hi (as a double) 52275970Scy orq REL_ADDR(twop52), %rsi // 2^52 + lo (as a double) 53275970Scy movq %rdi, -8(%rsp) 54275970Scy movq %rsi, -16(%rsp) 55275970Scy fldl REL_ADDR(twop84_plus_twop52_neg) 56275970Scy faddl -8(%rsp) // hi - 2^52 (as double extended, no rounding occurs) 57275970Scy faddl -16(%rsp) // hi + lo (as double extended) 58301301Sdelphij ret 59275970Scy 60275970Scy#endif // __x86_64__ 61275970Scy 62275970Scy*/ 63275970Scy