1/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9#ifndef __CLANG_CUDA_CMATH_H__ 10#define __CLANG_CUDA_CMATH_H__ 11#ifndef __CUDA__ 12#error "This file is for CUDA compilation only." 13#endif 14 15#include <limits> 16 17// CUDA lets us use various std math functions on the device side. This file 18// works in concert with __clang_cuda_math_forward_declares.h to make this work. 19// 20// Specifically, the forward-declares header declares __device__ overloads for 21// these functions in the global namespace, then pulls them into namespace std 22// with 'using' statements. Then this file implements those functions, after 23// their implementations have been pulled in. 24// 25// It's important that we declare the functions in the global namespace and pull 26// them into namespace std with using statements, as opposed to simply declaring 27// these functions in namespace std, because our device functions need to 28// overload the standard library functions, which may be declared in the global 29// namespace or in std, depending on the degree of conformance of the stdlib 30// implementation. Declaring in the global namespace and pulling into namespace 31// std covers all of the known knowns. 32 33#ifdef _OPENMP 34#define __DEVICE__ static __attribute__((always_inline)) 35#else 36#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) 37#endif 38 39// For C++ 17 we need to include noexcept attribute to be compatible 40// with the header-defined version. This may be removed once 41// variant is supported. 42#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L 43#define __NOEXCEPT noexcept 44#else 45#define __NOEXCEPT 46#endif 47 48#if !(defined(_OPENMP) && defined(__cplusplus)) 49__DEVICE__ long long abs(long long __n) { return ::llabs(__n); } 50__DEVICE__ long abs(long __n) { return ::labs(__n); } 51__DEVICE__ float abs(float __x) { return ::fabsf(__x); } 52__DEVICE__ double abs(double __x) { return ::fabs(__x); } 53#endif 54// TODO: remove once variat is supported. 55#if defined(_OPENMP) && defined(__cplusplus) 56__DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); } 57__DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); } 58#endif 59__DEVICE__ float acos(float __x) { return ::acosf(__x); } 60__DEVICE__ float asin(float __x) { return ::asinf(__x); } 61__DEVICE__ float atan(float __x) { return ::atanf(__x); } 62__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } 63__DEVICE__ float ceil(float __x) { return ::ceilf(__x); } 64__DEVICE__ float cos(float __x) { return ::cosf(__x); } 65__DEVICE__ float cosh(float __x) { return ::coshf(__x); } 66__DEVICE__ float exp(float __x) { return ::expf(__x); } 67__DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); } 68__DEVICE__ float floor(float __x) { return ::floorf(__x); } 69__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } 70// TODO: remove when variant is supported 71#ifndef _OPENMP 72__DEVICE__ int fpclassify(float __x) { 73 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 74 FP_ZERO, __x); 75} 76__DEVICE__ int fpclassify(double __x) { 77 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 78 FP_ZERO, __x); 79} 80#endif 81__DEVICE__ float frexp(float __arg, int *__exp) { 82 return ::frexpf(__arg, __exp); 83} 84 85// For inscrutable reasons, the CUDA headers define these functions for us on 86// Windows. 87#ifndef _MSC_VER 88__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } 89__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } 90__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } 91// For inscrutable reasons, __finite(), the double-precision version of 92// __finitef, does not exist when compiling for MacOS. __isfinited is available 93// everywhere and is just as good. 94__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } 95__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } 96__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } 97#endif 98 99__DEVICE__ bool isgreater(float __x, float __y) { 100 return __builtin_isgreater(__x, __y); 101} 102__DEVICE__ bool isgreater(double __x, double __y) { 103 return __builtin_isgreater(__x, __y); 104} 105__DEVICE__ bool isgreaterequal(float __x, float __y) { 106 return __builtin_isgreaterequal(__x, __y); 107} 108__DEVICE__ bool isgreaterequal(double __x, double __y) { 109 return __builtin_isgreaterequal(__x, __y); 110} 111__DEVICE__ bool isless(float __x, float __y) { 112 return __builtin_isless(__x, __y); 113} 114__DEVICE__ bool isless(double __x, double __y) { 115 return __builtin_isless(__x, __y); 116} 117__DEVICE__ bool islessequal(float __x, float __y) { 118 return __builtin_islessequal(__x, __y); 119} 120__DEVICE__ bool islessequal(double __x, double __y) { 121 return __builtin_islessequal(__x, __y); 122} 123__DEVICE__ bool islessgreater(float __x, float __y) { 124 return __builtin_islessgreater(__x, __y); 125} 126__DEVICE__ bool islessgreater(double __x, double __y) { 127 return __builtin_islessgreater(__x, __y); 128} 129__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } 130__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } 131__DEVICE__ bool isunordered(float __x, float __y) { 132 return __builtin_isunordered(__x, __y); 133} 134__DEVICE__ bool isunordered(double __x, double __y) { 135 return __builtin_isunordered(__x, __y); 136} 137__DEVICE__ float ldexp(float __arg, int __exp) { 138 return ::ldexpf(__arg, __exp); 139} 140__DEVICE__ float log(float __x) { return ::logf(__x); } 141__DEVICE__ float log10(float __x) { return ::log10f(__x); } 142__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } 143__DEVICE__ float pow(float __base, float __exp) { 144 return ::powf(__base, __exp); 145} 146__DEVICE__ float pow(float __base, int __iexp) { 147 return ::powif(__base, __iexp); 148} 149__DEVICE__ double pow(double __base, int __iexp) { 150 return ::powi(__base, __iexp); 151} 152__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } 153__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); } 154__DEVICE__ float sin(float __x) { return ::sinf(__x); } 155__DEVICE__ float sinh(float __x) { return ::sinhf(__x); } 156__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } 157__DEVICE__ float tan(float __x) { return ::tanf(__x); } 158__DEVICE__ float tanh(float __x) { return ::tanhf(__x); } 159 160// Notably missing above is nexttoward. We omit it because 161// libdevice doesn't provide an implementation, and we don't want to be in the 162// business of implementing tricky libm functions in this header. 163 164// Now we've defined everything we promised we'd define in 165// __clang_cuda_math_forward_declares.h. We need to do two additional things to 166// fix up our math functions. 167// 168// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define 169// only sin(float) and sin(double), which means that e.g. sin(0) is 170// ambiguous. 171// 172// 2) Pull the __device__ overloads of "foobarf" math functions into namespace 173// std. These are defined in the CUDA headers in the global namespace, 174// independent of everything else we've done here. 175 176// We can't use std::enable_if, because we want to be pre-C++11 compatible. But 177// we go ahead and unconditionally define functions that are only available when 178// compiling for C++11 to match the behavior of the CUDA headers. 179template<bool __B, class __T = void> 180struct __clang_cuda_enable_if {}; 181 182template <class __T> struct __clang_cuda_enable_if<true, __T> { 183 typedef __T type; 184}; 185 186// Defines an overload of __fn that accepts one integral argument, calls 187// __fn((double)x), and returns __retty. 188#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \ 189 template <typename __T> \ 190 __DEVICE__ \ 191 typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \ 192 __retty>::type \ 193 __fn(__T __x) { \ 194 return ::__fn((double)__x); \ 195 } 196 197// Defines an overload of __fn that accepts one two arithmetic arguments, calls 198// __fn((double)x, (double)y), and returns a double. 199// 200// Note this is different from OVERLOAD_1, which generates an overload that 201// accepts only *integral* arguments. 202#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ 203 template <typename __T1, typename __T2> \ 204 __DEVICE__ typename __clang_cuda_enable_if< \ 205 std::numeric_limits<__T1>::is_specialized && \ 206 std::numeric_limits<__T2>::is_specialized, \ 207 __retty>::type \ 208 __fn(__T1 __x, __T2 __y) { \ 209 return __fn((double)__x, (double)__y); \ 210 } 211 212__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos) 213__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh) 214__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin) 215__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh) 216__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan) 217__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2); 218__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh) 219__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt) 220__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil) 221__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign); 222__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos) 223__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh) 224__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf) 225__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc) 226__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp) 227__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2) 228__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1) 229__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs) 230__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim); 231__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor) 232__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax); 233__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin); 234__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod); 235__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify) 236__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot); 237__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb) 238__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite) 239__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater); 240__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal); 241__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf); 242__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless); 243__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal); 244__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater); 245__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan); 246__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal) 247__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered); 248__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma) 249__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log) 250__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10) 251__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p) 252__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2) 253__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb) 254__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint) 255__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround) 256__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint) 257__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround) 258__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint); 259__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter); 260__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow); 261__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder); 262__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint); 263__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round); 264__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit) 265__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin) 266__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh) 267__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt) 268__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan) 269__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh) 270__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma) 271__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc); 272 273#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1 274#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2 275 276// Overloads for functions that don't match the patterns expected by 277// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}. 278template <typename __T1, typename __T2, typename __T3> 279__DEVICE__ typename __clang_cuda_enable_if< 280 std::numeric_limits<__T1>::is_specialized && 281 std::numeric_limits<__T2>::is_specialized && 282 std::numeric_limits<__T3>::is_specialized, 283 double>::type 284fma(__T1 __x, __T2 __y, __T3 __z) { 285 return std::fma((double)__x, (double)__y, (double)__z); 286} 287 288template <typename __T> 289__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 290 double>::type 291frexp(__T __x, int *__exp) { 292 return std::frexp((double)__x, __exp); 293} 294 295template <typename __T> 296__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 297 double>::type 298ldexp(__T __x, int __exp) { 299 return std::ldexp((double)__x, __exp); 300} 301 302template <typename __T1, typename __T2> 303__DEVICE__ typename __clang_cuda_enable_if< 304 std::numeric_limits<__T1>::is_specialized && 305 std::numeric_limits<__T2>::is_specialized, 306 double>::type 307remquo(__T1 __x, __T2 __y, int *__quo) { 308 return std::remquo((double)__x, (double)__y, __quo); 309} 310 311template <typename __T> 312__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 313 double>::type 314scalbln(__T __x, long __exp) { 315 return std::scalbln((double)__x, __exp); 316} 317 318template <typename __T> 319__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 320 double>::type 321scalbn(__T __x, int __exp) { 322 return std::scalbn((double)__x, __exp); 323} 324 325// We need to define these overloads in exactly the namespace our standard 326// library uses (including the right inline namespace), otherwise they won't be 327// picked up by other functions in the standard library (e.g. functions in 328// <complex>). Thus the ugliness below. 329#ifdef _LIBCPP_BEGIN_NAMESPACE_STD 330_LIBCPP_BEGIN_NAMESPACE_STD 331#else 332namespace std { 333#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 334_GLIBCXX_BEGIN_NAMESPACE_VERSION 335#endif 336#endif 337 338// Pull the new overloads we defined above into namespace std. 339using ::acos; 340using ::acosh; 341using ::asin; 342using ::asinh; 343using ::atan; 344using ::atan2; 345using ::atanh; 346using ::cbrt; 347using ::ceil; 348using ::copysign; 349using ::cos; 350using ::cosh; 351using ::erf; 352using ::erfc; 353using ::exp; 354using ::exp2; 355using ::expm1; 356using ::fabs; 357using ::fdim; 358using ::floor; 359using ::fma; 360using ::fmax; 361using ::fmin; 362using ::fmod; 363using ::fpclassify; 364using ::frexp; 365using ::hypot; 366using ::ilogb; 367using ::isfinite; 368using ::isgreater; 369using ::isgreaterequal; 370using ::isless; 371using ::islessequal; 372using ::islessgreater; 373using ::isnormal; 374using ::isunordered; 375using ::ldexp; 376using ::lgamma; 377using ::llrint; 378using ::llround; 379using ::log; 380using ::log10; 381using ::log1p; 382using ::log2; 383using ::logb; 384using ::lrint; 385using ::lround; 386using ::nearbyint; 387using ::nextafter; 388using ::pow; 389using ::remainder; 390using ::remquo; 391using ::rint; 392using ::round; 393using ::scalbln; 394using ::scalbn; 395using ::signbit; 396using ::sin; 397using ::sinh; 398using ::sqrt; 399using ::tan; 400using ::tanh; 401using ::tgamma; 402using ::trunc; 403 404// Well this is fun: We need to pull these symbols in for libc++, but we can't 405// pull them in with libstdc++, because its ::isinf and ::isnan are different 406// than its std::isinf and std::isnan. 407#ifndef __GLIBCXX__ 408using ::isinf; 409using ::isnan; 410#endif 411 412// Finally, pull the "foobarf" functions that CUDA defines in its headers into 413// namespace std. 414using ::acosf; 415using ::acoshf; 416using ::asinf; 417using ::asinhf; 418using ::atan2f; 419using ::atanf; 420using ::atanhf; 421using ::cbrtf; 422using ::ceilf; 423using ::copysignf; 424using ::cosf; 425using ::coshf; 426using ::erfcf; 427using ::erff; 428using ::exp2f; 429using ::expf; 430using ::expm1f; 431using ::fabsf; 432using ::fdimf; 433using ::floorf; 434using ::fmaf; 435using ::fmaxf; 436using ::fminf; 437using ::fmodf; 438using ::frexpf; 439using ::hypotf; 440using ::ilogbf; 441using ::ldexpf; 442using ::lgammaf; 443using ::llrintf; 444using ::llroundf; 445using ::log10f; 446using ::log1pf; 447using ::log2f; 448using ::logbf; 449using ::logf; 450using ::lrintf; 451using ::lroundf; 452using ::modff; 453using ::nearbyintf; 454using ::nextafterf; 455using ::powf; 456using ::remainderf; 457using ::remquof; 458using ::rintf; 459using ::roundf; 460// TODO: remove once variant is supported 461#ifndef _OPENMP 462using ::scalblnf; 463#endif 464using ::scalbnf; 465using ::sinf; 466using ::sinhf; 467using ::sqrtf; 468using ::tanf; 469using ::tanhf; 470using ::tgammaf; 471using ::truncf; 472 473#ifdef _LIBCPP_END_NAMESPACE_STD 474_LIBCPP_END_NAMESPACE_STD 475#else 476#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 477_GLIBCXX_END_NAMESPACE_VERSION 478#endif 479} // namespace std 480#endif 481 482#undef __NOEXCEPT 483#undef __DEVICE__ 484 485#endif 486