1/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __CLANG_CUDA_CMATH_H__
10#define __CLANG_CUDA_CMATH_H__
11#ifndef __CUDA__
12#error "This file is for CUDA compilation only."
13#endif
14
15#include <limits>
16
17// CUDA lets us use various std math functions on the device side.  This file
18// works in concert with __clang_cuda_math_forward_declares.h to make this work.
19//
20// Specifically, the forward-declares header declares __device__ overloads for
21// these functions in the global namespace, then pulls them into namespace std
22// with 'using' statements.  Then this file implements those functions, after
23// their implementations have been pulled in.
24//
25// It's important that we declare the functions in the global namespace and pull
26// them into namespace std with using statements, as opposed to simply declaring
27// these functions in namespace std, because our device functions need to
28// overload the standard library functions, which may be declared in the global
29// namespace or in std, depending on the degree of conformance of the stdlib
30// implementation.  Declaring in the global namespace and pulling into namespace
31// std covers all of the known knowns.
32
33#ifdef _OPENMP
34#define __DEVICE__ static __attribute__((always_inline))
35#else
36#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
37#endif
38
39// For C++ 17 we need to include noexcept attribute to be compatible
40// with the header-defined version. This may be removed once
41// variant is supported.
42#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
43#define __NOEXCEPT noexcept
44#else
45#define __NOEXCEPT
46#endif
47
48#if !(defined(_OPENMP) && defined(__cplusplus))
49__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
50__DEVICE__ long abs(long __n) { return ::labs(__n); }
51__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
52__DEVICE__ double abs(double __x) { return ::fabs(__x); }
53#endif
54// TODO: remove once variat is supported.
55#if defined(_OPENMP) && defined(__cplusplus)
56__DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); }
57__DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); }
58#endif
59__DEVICE__ float acos(float __x) { return ::acosf(__x); }
60__DEVICE__ float asin(float __x) { return ::asinf(__x); }
61__DEVICE__ float atan(float __x) { return ::atanf(__x); }
62__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
63__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
64__DEVICE__ float cos(float __x) { return ::cosf(__x); }
65__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
66__DEVICE__ float exp(float __x) { return ::expf(__x); }
67__DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); }
68__DEVICE__ float floor(float __x) { return ::floorf(__x); }
69__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
70// TODO: remove when variant is supported
71#ifndef _OPENMP
72__DEVICE__ int fpclassify(float __x) {
73  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
74                              FP_ZERO, __x);
75}
76__DEVICE__ int fpclassify(double __x) {
77  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
78                              FP_ZERO, __x);
79}
80#endif
81__DEVICE__ float frexp(float __arg, int *__exp) {
82  return ::frexpf(__arg, __exp);
83}
84
85// For inscrutable reasons, the CUDA headers define these functions for us on
86// Windows.
87#ifndef _MSC_VER
88__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
89__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
90__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
91// For inscrutable reasons, __finite(), the double-precision version of
92// __finitef, does not exist when compiling for MacOS.  __isfinited is available
93// everywhere and is just as good.
94__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
95__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
96__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
97#endif
98
99__DEVICE__ bool isgreater(float __x, float __y) {
100  return __builtin_isgreater(__x, __y);
101}
102__DEVICE__ bool isgreater(double __x, double __y) {
103  return __builtin_isgreater(__x, __y);
104}
105__DEVICE__ bool isgreaterequal(float __x, float __y) {
106  return __builtin_isgreaterequal(__x, __y);
107}
108__DEVICE__ bool isgreaterequal(double __x, double __y) {
109  return __builtin_isgreaterequal(__x, __y);
110}
111__DEVICE__ bool isless(float __x, float __y) {
112  return __builtin_isless(__x, __y);
113}
114__DEVICE__ bool isless(double __x, double __y) {
115  return __builtin_isless(__x, __y);
116}
117__DEVICE__ bool islessequal(float __x, float __y) {
118  return __builtin_islessequal(__x, __y);
119}
120__DEVICE__ bool islessequal(double __x, double __y) {
121  return __builtin_islessequal(__x, __y);
122}
123__DEVICE__ bool islessgreater(float __x, float __y) {
124  return __builtin_islessgreater(__x, __y);
125}
126__DEVICE__ bool islessgreater(double __x, double __y) {
127  return __builtin_islessgreater(__x, __y);
128}
129__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
130__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
131__DEVICE__ bool isunordered(float __x, float __y) {
132  return __builtin_isunordered(__x, __y);
133}
134__DEVICE__ bool isunordered(double __x, double __y) {
135  return __builtin_isunordered(__x, __y);
136}
137__DEVICE__ float ldexp(float __arg, int __exp) {
138  return ::ldexpf(__arg, __exp);
139}
140__DEVICE__ float log(float __x) { return ::logf(__x); }
141__DEVICE__ float log10(float __x) { return ::log10f(__x); }
142__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
143__DEVICE__ float pow(float __base, float __exp) {
144  return ::powf(__base, __exp);
145}
146__DEVICE__ float pow(float __base, int __iexp) {
147  return ::powif(__base, __iexp);
148}
149__DEVICE__ double pow(double __base, int __iexp) {
150  return ::powi(__base, __iexp);
151}
152__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
153__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
154__DEVICE__ float sin(float __x) { return ::sinf(__x); }
155__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
156__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
157__DEVICE__ float tan(float __x) { return ::tanf(__x); }
158__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
159
160// Notably missing above is nexttoward.  We omit it because
161// libdevice doesn't provide an implementation, and we don't want to be in the
162// business of implementing tricky libm functions in this header.
163
164// Now we've defined everything we promised we'd define in
165// __clang_cuda_math_forward_declares.h.  We need to do two additional things to
166// fix up our math functions.
167//
168// 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
169//    only sin(float) and sin(double), which means that e.g. sin(0) is
170//    ambiguous.
171//
172// 2) Pull the __device__ overloads of "foobarf" math functions into namespace
173//    std.  These are defined in the CUDA headers in the global namespace,
174//    independent of everything else we've done here.
175
176// We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
177// we go ahead and unconditionally define functions that are only available when
178// compiling for C++11 to match the behavior of the CUDA headers.
179template<bool __B, class __T = void>
180struct __clang_cuda_enable_if {};
181
182template <class __T> struct __clang_cuda_enable_if<true, __T> {
183  typedef __T type;
184};
185
186// Defines an overload of __fn that accepts one integral argument, calls
187// __fn((double)x), and returns __retty.
188#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)                      \
189  template <typename __T>                                                      \
190  __DEVICE__                                                                   \
191      typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,    \
192                                      __retty>::type                           \
193      __fn(__T __x) {                                                          \
194    return ::__fn((double)__x);                                                \
195  }
196
197// Defines an overload of __fn that accepts one two arithmetic arguments, calls
198// __fn((double)x, (double)y), and returns a double.
199//
200// Note this is different from OVERLOAD_1, which generates an overload that
201// accepts only *integral* arguments.
202#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)                      \
203  template <typename __T1, typename __T2>                                      \
204  __DEVICE__ typename __clang_cuda_enable_if<                                  \
205      std::numeric_limits<__T1>::is_specialized &&                             \
206          std::numeric_limits<__T2>::is_specialized,                           \
207      __retty>::type                                                           \
208  __fn(__T1 __x, __T2 __y) {                                                   \
209    return __fn((double)__x, (double)__y);                                     \
210  }
211
212__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
213__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
214__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
215__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
216__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
217__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
218__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
219__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
220__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
221__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
222__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
223__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
224__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
225__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
226__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
227__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
228__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
229__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
230__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
231__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
232__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
233__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
234__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
235__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
236__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
237__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
238__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
239__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
240__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
241__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
242__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
243__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
244__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
245__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
246__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
247__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
248__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
249__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
250__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
251__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
252__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
253__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
254__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
255__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
256__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
257__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
258__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
259__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
260__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
261__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
262__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
263__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
264__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
265__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
266__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
267__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
268__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
269__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
270__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
271__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
272
273#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
274#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
275
276// Overloads for functions that don't match the patterns expected by
277// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
278template <typename __T1, typename __T2, typename __T3>
279__DEVICE__ typename __clang_cuda_enable_if<
280    std::numeric_limits<__T1>::is_specialized &&
281        std::numeric_limits<__T2>::is_specialized &&
282        std::numeric_limits<__T3>::is_specialized,
283    double>::type
284fma(__T1 __x, __T2 __y, __T3 __z) {
285  return std::fma((double)__x, (double)__y, (double)__z);
286}
287
288template <typename __T>
289__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
290                                           double>::type
291frexp(__T __x, int *__exp) {
292  return std::frexp((double)__x, __exp);
293}
294
295template <typename __T>
296__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
297                                           double>::type
298ldexp(__T __x, int __exp) {
299  return std::ldexp((double)__x, __exp);
300}
301
302template <typename __T1, typename __T2>
303__DEVICE__ typename __clang_cuda_enable_if<
304    std::numeric_limits<__T1>::is_specialized &&
305        std::numeric_limits<__T2>::is_specialized,
306    double>::type
307remquo(__T1 __x, __T2 __y, int *__quo) {
308  return std::remquo((double)__x, (double)__y, __quo);
309}
310
311template <typename __T>
312__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
313                                           double>::type
314scalbln(__T __x, long __exp) {
315  return std::scalbln((double)__x, __exp);
316}
317
318template <typename __T>
319__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
320                                           double>::type
321scalbn(__T __x, int __exp) {
322  return std::scalbn((double)__x, __exp);
323}
324
325// We need to define these overloads in exactly the namespace our standard
326// library uses (including the right inline namespace), otherwise they won't be
327// picked up by other functions in the standard library (e.g. functions in
328// <complex>).  Thus the ugliness below.
329#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
330_LIBCPP_BEGIN_NAMESPACE_STD
331#else
332namespace std {
333#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
334_GLIBCXX_BEGIN_NAMESPACE_VERSION
335#endif
336#endif
337
338// Pull the new overloads we defined above into namespace std.
339using ::acos;
340using ::acosh;
341using ::asin;
342using ::asinh;
343using ::atan;
344using ::atan2;
345using ::atanh;
346using ::cbrt;
347using ::ceil;
348using ::copysign;
349using ::cos;
350using ::cosh;
351using ::erf;
352using ::erfc;
353using ::exp;
354using ::exp2;
355using ::expm1;
356using ::fabs;
357using ::fdim;
358using ::floor;
359using ::fma;
360using ::fmax;
361using ::fmin;
362using ::fmod;
363using ::fpclassify;
364using ::frexp;
365using ::hypot;
366using ::ilogb;
367using ::isfinite;
368using ::isgreater;
369using ::isgreaterequal;
370using ::isless;
371using ::islessequal;
372using ::islessgreater;
373using ::isnormal;
374using ::isunordered;
375using ::ldexp;
376using ::lgamma;
377using ::llrint;
378using ::llround;
379using ::log;
380using ::log10;
381using ::log1p;
382using ::log2;
383using ::logb;
384using ::lrint;
385using ::lround;
386using ::nearbyint;
387using ::nextafter;
388using ::pow;
389using ::remainder;
390using ::remquo;
391using ::rint;
392using ::round;
393using ::scalbln;
394using ::scalbn;
395using ::signbit;
396using ::sin;
397using ::sinh;
398using ::sqrt;
399using ::tan;
400using ::tanh;
401using ::tgamma;
402using ::trunc;
403
404// Well this is fun: We need to pull these symbols in for libc++, but we can't
405// pull them in with libstdc++, because its ::isinf and ::isnan are different
406// than its std::isinf and std::isnan.
407#ifndef __GLIBCXX__
408using ::isinf;
409using ::isnan;
410#endif
411
412// Finally, pull the "foobarf" functions that CUDA defines in its headers into
413// namespace std.
414using ::acosf;
415using ::acoshf;
416using ::asinf;
417using ::asinhf;
418using ::atan2f;
419using ::atanf;
420using ::atanhf;
421using ::cbrtf;
422using ::ceilf;
423using ::copysignf;
424using ::cosf;
425using ::coshf;
426using ::erfcf;
427using ::erff;
428using ::exp2f;
429using ::expf;
430using ::expm1f;
431using ::fabsf;
432using ::fdimf;
433using ::floorf;
434using ::fmaf;
435using ::fmaxf;
436using ::fminf;
437using ::fmodf;
438using ::frexpf;
439using ::hypotf;
440using ::ilogbf;
441using ::ldexpf;
442using ::lgammaf;
443using ::llrintf;
444using ::llroundf;
445using ::log10f;
446using ::log1pf;
447using ::log2f;
448using ::logbf;
449using ::logf;
450using ::lrintf;
451using ::lroundf;
452using ::modff;
453using ::nearbyintf;
454using ::nextafterf;
455using ::powf;
456using ::remainderf;
457using ::remquof;
458using ::rintf;
459using ::roundf;
460// TODO: remove once variant is supported
461#ifndef _OPENMP
462using ::scalblnf;
463#endif
464using ::scalbnf;
465using ::sinf;
466using ::sinhf;
467using ::sqrtf;
468using ::tanf;
469using ::tanhf;
470using ::tgammaf;
471using ::truncf;
472
473#ifdef _LIBCPP_END_NAMESPACE_STD
474_LIBCPP_END_NAMESPACE_STD
475#else
476#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
477_GLIBCXX_END_NAMESPACE_VERSION
478#endif
479} // namespace std
480#endif
481
482#undef __NOEXCEPT
483#undef __DEVICE__
484
485#endif
486