1214152Sed//===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===// 2214152Sed// 3214152Sed// The LLVM Compiler Infrastructure 4214152Sed// 5222656Sed// This file is dual licensed under the MIT and the University of Illinois Open 6222656Sed// Source Licenses. See LICENSE.TXT for details. 7214152Sed// 8214152Sed//===----------------------------------------------------------------------===// 9214152Sed// 10214152Sed// This file implements a fairly generic conversion from a narrower to a wider 11214152Sed// IEEE-754 floating-point type. The constants and types defined following the 12214152Sed// includes below parameterize the conversion. 13214152Sed// 14214152Sed// This routine can be trivially adapted to support conversions from 15214152Sed// half-precision or to quad-precision. It does not support types that don't 16214152Sed// use the usual IEEE-754 interchange formats; specifically, some work would be 17214152Sed// needed to adapt it to (for example) the Intel 80-bit format or PowerPC 18214152Sed// double-double format. 19214152Sed// 20214152Sed// Note please, however, that this implementation is only intended to support 21214152Sed// *widening* operations; if you need to convert to a *narrower* floating-point 22214152Sed// type (e.g. double -> float), then this routine will not do what you want it 23214152Sed// to. 24214152Sed// 25214152Sed// It also requires that integer types at least as large as both formats 26214152Sed// are available on the target platform; this may pose a problem when trying 27214152Sed// to add support for quad on some 32-bit systems, for example. You also may 28214152Sed// run into trouble finding an appropriate CLZ function for wide source types; 29214152Sed// you will likely need to roll your own on some platforms. 30214152Sed// 31214152Sed// Finally, the following assumptions are made: 32214152Sed// 33214152Sed// 1. floating-point types and integer types have the same endianness on the 34214152Sed// target platform 35214152Sed// 36214152Sed// 2. quiet NaNs, if supported, are indicated by the leading bit of the 37214152Sed// significand field being set 38214152Sed// 39214152Sed//===----------------------------------------------------------------------===// 40214152Sed 41236011Smarius#include "int_lib.h" 42214152Sed 43214152Sedtypedef float src_t; 44214152Sedtypedef uint32_t src_rep_t; 45214152Sed#define SRC_REP_C UINT32_C 46214152Sedstatic const int srcSigBits = 23; 47214152Sed#define src_rep_t_clz __builtin_clz 48214152Sed 49214152Sedtypedef double dst_t; 50214152Sedtypedef uint64_t dst_rep_t; 51214152Sed#define DST_REP_C UINT64_C 52214152Sedstatic const int dstSigBits = 52; 53214152Sed 54214152Sed// End of specialization parameters. Two helper routines for conversion to and 55214152Sed// from the representation of floating-point data as integer values follow. 56214152Sed 57214152Sedstatic inline src_rep_t srcToRep(src_t x) { 58214152Sed const union { src_t f; src_rep_t i; } rep = {.f = x}; 59214152Sed return rep.i; 60214152Sed} 61214152Sed 62214152Sedstatic inline dst_t dstFromRep(dst_rep_t x) { 63214152Sed const union { dst_t f; dst_rep_t i; } rep = {.i = x}; 64214152Sed return rep.f; 65214152Sed} 66214152Sed 67214152Sed// End helper routines. Conversion implementation follows. 68214152Sed 69263560SdimARM_EABI_FNALIAS(f2d, extendsfdf2) 70222656Sed 71214152Seddst_t __extendsfdf2(src_t a) { 72214152Sed 73214152Sed // Various constants whose values follow from the type parameters. 74214152Sed // Any reasonable optimizer will fold and propagate all of these. 75214152Sed const int srcBits = sizeof(src_t)*CHAR_BIT; 76214152Sed const int srcExpBits = srcBits - srcSigBits - 1; 77214152Sed const int srcInfExp = (1 << srcExpBits) - 1; 78214152Sed const int srcExpBias = srcInfExp >> 1; 79214152Sed 80214152Sed const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; 81214152Sed const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; 82214152Sed const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); 83214152Sed const src_rep_t srcAbsMask = srcSignMask - 1; 84214152Sed const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); 85214152Sed const src_rep_t srcNaNCode = srcQNaN - 1; 86214152Sed 87214152Sed const int dstBits = sizeof(dst_t)*CHAR_BIT; 88214152Sed const int dstExpBits = dstBits - dstSigBits - 1; 89214152Sed const int dstInfExp = (1 << dstExpBits) - 1; 90214152Sed const int dstExpBias = dstInfExp >> 1; 91214152Sed 92214152Sed const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; 93214152Sed 94214152Sed // Break a into a sign and representation of the absolute value 95214152Sed const src_rep_t aRep = srcToRep(a); 96214152Sed const src_rep_t aAbs = aRep & srcAbsMask; 97214152Sed const src_rep_t sign = aRep & srcSignMask; 98214152Sed dst_rep_t absResult; 99214152Sed 100214152Sed if (aAbs - srcMinNormal < srcInfinity - srcMinNormal) { 101214152Sed // a is a normal number. 102214152Sed // Extend to the destination type by shifting the significand and 103214152Sed // exponent into the proper position and rebiasing the exponent. 104214152Sed absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits); 105214152Sed absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits; 106214152Sed } 107214152Sed 108214152Sed else if (aAbs >= srcInfinity) { 109214152Sed // a is NaN or infinity. 110214152Sed // Conjure the result by beginning with infinity, then setting the qNaN 111214152Sed // bit (if needed) and right-aligning the rest of the trailing NaN 112214152Sed // payload field. 113214152Sed absResult = (dst_rep_t)dstInfExp << dstSigBits; 114214152Sed absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); 115214152Sed absResult |= aAbs & srcNaNCode; 116214152Sed } 117214152Sed 118214152Sed else if (aAbs) { 119214152Sed // a is denormal. 120214152Sed // renormalize the significand and clear the leading bit, then insert 121214152Sed // the correct adjusted exponent in the destination type. 122214152Sed const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal); 123214152Sed absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale); 124214152Sed absResult ^= dstMinNormal; 125214152Sed const int resultExponent = dstExpBias - srcExpBias - scale + 1; 126214152Sed absResult |= (dst_rep_t)resultExponent << dstSigBits; 127214152Sed } 128214152Sed 129214152Sed else { 130214152Sed // a is zero. 131214152Sed absResult = 0; 132214152Sed } 133214152Sed 134214152Sed // Apply the signbit to (dst_t)abs(a). 135214152Sed const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits); 136214152Sed return dstFromRep(result); 137214152Sed} 138