1/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __CLANG_CUDA_COMPLEX_BUILTINS 11#define __CLANG_CUDA_COMPLEX_BUILTINS 12 13// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are 14// libgcc functions that clang assumes are available when compiling c99 complex 15// operations. (These implementations come from libc++, and have been modified 16// to work with CUDA and OpenMP target offloading [in C and C++ mode].) 17 18#pragma push_macro("__DEVICE__") 19#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) 20#pragma omp declare target 21#define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) 22#else 23#define __DEVICE__ __device__ inline 24#endif 25 26// To make the algorithms available for C and C++ in CUDA and OpenMP we select 27// different but equivalent function versions. TODO: For OpenMP we currently 28// select the native builtins as the overload support for templates is lacking. 29#if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__) 30#define _ISNANd std::isnan 31#define _ISNANf std::isnan 32#define _ISINFd std::isinf 33#define _ISINFf std::isinf 34#define _ISFINITEd std::isfinite 35#define _ISFINITEf std::isfinite 36#define _COPYSIGNd std::copysign 37#define _COPYSIGNf std::copysign 38#define _SCALBNd std::scalbn 39#define _SCALBNf std::scalbn 40#define _ABSd std::abs 41#define _ABSf std::abs 42#define _LOGBd std::logb 43#define _LOGBf std::logb 44// Rather than pulling in std::max from algorithm everytime, use available ::max. 45#define _fmaxd max 46#define _fmaxf max 47#else 48#ifdef __AMDGCN__ 49#define _ISNANd __ocml_isnan_f64 50#define _ISNANf __ocml_isnan_f32 51#define _ISINFd __ocml_isinf_f64 52#define _ISINFf __ocml_isinf_f32 53#define _ISFINITEd __ocml_isfinite_f64 54#define _ISFINITEf __ocml_isfinite_f32 55#define _COPYSIGNd __ocml_copysign_f64 56#define _COPYSIGNf __ocml_copysign_f32 57#define _SCALBNd __ocml_scalbn_f64 58#define _SCALBNf __ocml_scalbn_f32 59#define _ABSd __ocml_fabs_f64 60#define _ABSf __ocml_fabs_f32 61#define _LOGBd __ocml_logb_f64 62#define _LOGBf __ocml_logb_f32 63#define _fmaxd __ocml_fmax_f64 64#define _fmaxf __ocml_fmax_f32 65#else 66#define _ISNANd __nv_isnand 67#define _ISNANf __nv_isnanf 68#define _ISINFd __nv_isinfd 69#define _ISINFf __nv_isinff 70#define _ISFINITEd __nv_isfinited 71#define _ISFINITEf __nv_finitef 72#define _COPYSIGNd __nv_copysign 73#define _COPYSIGNf __nv_copysignf 74#define _SCALBNd __nv_scalbn 75#define _SCALBNf __nv_scalbnf 76#define _ABSd __nv_fabs 77#define _ABSf __nv_fabsf 78#define _LOGBd __nv_logb 79#define _LOGBf __nv_logbf 80#define _fmaxd __nv_fmax 81#define _fmaxf __nv_fmaxf 82#endif 83#endif 84 85#if defined(__cplusplus) 86extern "C" { 87#endif 88 89__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, 90 double __d) { 91 double __ac = __a * __c; 92 double __bd = __b * __d; 93 double __ad = __a * __d; 94 double __bc = __b * __c; 95 double _Complex z; 96 __real__(z) = __ac - __bd; 97 __imag__(z) = __ad + __bc; 98 if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { 99 int __recalc = 0; 100 if (_ISINFd(__a) || _ISINFd(__b)) { 101 __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a); 102 __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b); 103 if (_ISNANd(__c)) 104 __c = _COPYSIGNd(0, __c); 105 if (_ISNANd(__d)) 106 __d = _COPYSIGNd(0, __d); 107 __recalc = 1; 108 } 109 if (_ISINFd(__c) || _ISINFd(__d)) { 110 __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c); 111 __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d); 112 if (_ISNANd(__a)) 113 __a = _COPYSIGNd(0, __a); 114 if (_ISNANd(__b)) 115 __b = _COPYSIGNd(0, __b); 116 __recalc = 1; 117 } 118 if (!__recalc && 119 (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) { 120 if (_ISNANd(__a)) 121 __a = _COPYSIGNd(0, __a); 122 if (_ISNANd(__b)) 123 __b = _COPYSIGNd(0, __b); 124 if (_ISNANd(__c)) 125 __c = _COPYSIGNd(0, __c); 126 if (_ISNANd(__d)) 127 __d = _COPYSIGNd(0, __d); 128 __recalc = 1; 129 } 130 if (__recalc) { 131 // Can't use std::numeric_limits<double>::infinity() -- that doesn't have 132 // a device overload (and isn't constexpr before C++11, naturally). 133 __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d); 134 __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c); 135 } 136 } 137 return z; 138} 139 140__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) { 141 float __ac = __a * __c; 142 float __bd = __b * __d; 143 float __ad = __a * __d; 144 float __bc = __b * __c; 145 float _Complex z; 146 __real__(z) = __ac - __bd; 147 __imag__(z) = __ad + __bc; 148 if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { 149 int __recalc = 0; 150 if (_ISINFf(__a) || _ISINFf(__b)) { 151 __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); 152 __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); 153 if (_ISNANf(__c)) 154 __c = _COPYSIGNf(0, __c); 155 if (_ISNANf(__d)) 156 __d = _COPYSIGNf(0, __d); 157 __recalc = 1; 158 } 159 if (_ISINFf(__c) || _ISINFf(__d)) { 160 __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); 161 __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); 162 if (_ISNANf(__a)) 163 __a = _COPYSIGNf(0, __a); 164 if (_ISNANf(__b)) 165 __b = _COPYSIGNf(0, __b); 166 __recalc = 1; 167 } 168 if (!__recalc && 169 (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) { 170 if (_ISNANf(__a)) 171 __a = _COPYSIGNf(0, __a); 172 if (_ISNANf(__b)) 173 __b = _COPYSIGNf(0, __b); 174 if (_ISNANf(__c)) 175 __c = _COPYSIGNf(0, __c); 176 if (_ISNANf(__d)) 177 __d = _COPYSIGNf(0, __d); 178 __recalc = 1; 179 } 180 if (__recalc) { 181 __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); 182 __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); 183 } 184 } 185 return z; 186} 187 188__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, 189 double __d) { 190 int __ilogbw = 0; 191 // Can't use std::max, because that's defined in <algorithm>, and we don't 192 // want to pull that in for every compile. The CUDA headers define 193 // ::max(float, float) and ::max(double, double), which is sufficient for us. 194 double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d))); 195 if (_ISFINITEd(__logbw)) { 196 __ilogbw = (int)__logbw; 197 __c = _SCALBNd(__c, -__ilogbw); 198 __d = _SCALBNd(__d, -__ilogbw); 199 } 200 double __denom = __c * __c + __d * __d; 201 double _Complex z; 202 __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw); 203 __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw); 204 if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { 205 if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) { 206 __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a; 207 __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b; 208 } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) && 209 _ISFINITEd(__d)) { 210 __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a); 211 __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b); 212 __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d); 213 __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d); 214 } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) && 215 _ISFINITEd(__b)) { 216 __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c); 217 __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d); 218 __real__(z) = 0.0 * (__a * __c + __b * __d); 219 __imag__(z) = 0.0 * (__b * __c - __a * __d); 220 } 221 } 222 return z; 223} 224 225__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { 226 int __ilogbw = 0; 227 float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d))); 228 if (_ISFINITEf(__logbw)) { 229 __ilogbw = (int)__logbw; 230 __c = _SCALBNf(__c, -__ilogbw); 231 __d = _SCALBNf(__d, -__ilogbw); 232 } 233 float __denom = __c * __c + __d * __d; 234 float _Complex z; 235 __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw); 236 __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw); 237 if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { 238 if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) { 239 __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a; 240 __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b; 241 } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) && 242 _ISFINITEf(__d)) { 243 __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); 244 __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); 245 __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); 246 __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); 247 } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) && 248 _ISFINITEf(__b)) { 249 __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); 250 __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); 251 __real__(z) = 0 * (__a * __c + __b * __d); 252 __imag__(z) = 0 * (__b * __c - __a * __d); 253 } 254 } 255 return z; 256} 257 258#if defined(__cplusplus) 259} // extern "C" 260#endif 261 262#undef _ISNANd 263#undef _ISNANf 264#undef _ISINFd 265#undef _ISINFf 266#undef _COPYSIGNd 267#undef _COPYSIGNf 268#undef _ISFINITEd 269#undef _ISFINITEf 270#undef _SCALBNd 271#undef _SCALBNf 272#undef _ABSd 273#undef _ABSf 274#undef _LOGBd 275#undef _LOGBf 276#undef _fmaxd 277#undef _fmaxf 278 279#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) 280#pragma omp end declare target 281#endif 282 283#pragma pop_macro("__DEVICE__") 284 285#endif // __CLANG_CUDA_COMPLEX_BUILTINS 286