1/*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __CLANG_HIP_MATH_H__
10#define __CLANG_HIP_MATH_H__
11
12#if !defined(__HIP__)
13#error "This file is for HIP and OpenMP AMDGCN device compilation only."
14#endif
15
16#if !defined(__HIPCC_RTC__)
17#if defined(__cplusplus)
18#include <algorithm>
19#endif
20#include <limits.h>
21#include <stdint.h>
22#endif // __HIPCC_RTC__
23
24#pragma push_macro("__DEVICE__")
25#define __DEVICE__ static __device__ inline __attribute__((always_inline))
26
27// A few functions return bool type starting only in C++11.
28#pragma push_macro("__RETURN_TYPE")
29#if defined(__cplusplus)
30#define __RETURN_TYPE bool
31#else
32#define __RETURN_TYPE int
33#endif
34
35#if defined (__cplusplus) && __cplusplus < 201103L
36// emulate static_assert on type sizes
37template<bool>
38struct __compare_result{};
39template<>
40struct __compare_result<true> {
41  static const __device__ bool valid;
42};
43
44__DEVICE__
45void __suppress_unused_warning(bool b){};
46template <unsigned int S, unsigned int T>
47__DEVICE__ void __static_assert_equal_size() {
48  __suppress_unused_warning(__compare_result<S == T>::valid);
49}
50
51#define __static_assert_type_size_equal(A, B) \
52  __static_assert_equal_size<A,B>()
53
54#else
55#define __static_assert_type_size_equal(A,B) \
56  static_assert((A) == (B), "")
57
58#endif
59
60__DEVICE__
61uint64_t __make_mantissa_base8(const char *__tagp) {
62  uint64_t __r = 0;
63  while (__tagp) {
64    char __tmp = *__tagp;
65
66    if (__tmp >= '0' && __tmp <= '7')
67      __r = (__r * 8u) + __tmp - '0';
68    else
69      return 0;
70
71    ++__tagp;
72  }
73
74  return __r;
75}
76
77__DEVICE__
78uint64_t __make_mantissa_base10(const char *__tagp) {
79  uint64_t __r = 0;
80  while (__tagp) {
81    char __tmp = *__tagp;
82
83    if (__tmp >= '0' && __tmp <= '9')
84      __r = (__r * 10u) + __tmp - '0';
85    else
86      return 0;
87
88    ++__tagp;
89  }
90
91  return __r;
92}
93
94__DEVICE__
95uint64_t __make_mantissa_base16(const char *__tagp) {
96  uint64_t __r = 0;
97  while (__tagp) {
98    char __tmp = *__tagp;
99
100    if (__tmp >= '0' && __tmp <= '9')
101      __r = (__r * 16u) + __tmp - '0';
102    else if (__tmp >= 'a' && __tmp <= 'f')
103      __r = (__r * 16u) + __tmp - 'a' + 10;
104    else if (__tmp >= 'A' && __tmp <= 'F')
105      __r = (__r * 16u) + __tmp - 'A' + 10;
106    else
107      return 0;
108
109    ++__tagp;
110  }
111
112  return __r;
113}
114
115__DEVICE__
116uint64_t __make_mantissa(const char *__tagp) {
117  if (!__tagp)
118    return 0u;
119
120  if (*__tagp == '0') {
121    ++__tagp;
122
123    if (*__tagp == 'x' || *__tagp == 'X')
124      return __make_mantissa_base16(__tagp);
125    else
126      return __make_mantissa_base8(__tagp);
127  }
128
129  return __make_mantissa_base10(__tagp);
130}
131
132// BEGIN FLOAT
133#if defined(__cplusplus)
134__DEVICE__
135int abs(int __x) {
136  int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
137  return (__x ^ __sgn) - __sgn;
138}
139__DEVICE__
140long labs(long __x) {
141  long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
142  return (__x ^ __sgn) - __sgn;
143}
144__DEVICE__
145long long llabs(long long __x) {
146  long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
147  return (__x ^ __sgn) - __sgn;
148}
149#endif
150
151__DEVICE__
152float acosf(float __x) { return __ocml_acos_f32(__x); }
153
154__DEVICE__
155float acoshf(float __x) { return __ocml_acosh_f32(__x); }
156
157__DEVICE__
158float asinf(float __x) { return __ocml_asin_f32(__x); }
159
160__DEVICE__
161float asinhf(float __x) { return __ocml_asinh_f32(__x); }
162
163__DEVICE__
164float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
165
166__DEVICE__
167float atanf(float __x) { return __ocml_atan_f32(__x); }
168
169__DEVICE__
170float atanhf(float __x) { return __ocml_atanh_f32(__x); }
171
172__DEVICE__
173float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
174
175__DEVICE__
176float ceilf(float __x) { return __ocml_ceil_f32(__x); }
177
178__DEVICE__
179float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); }
180
181__DEVICE__
182float cosf(float __x) { return __ocml_cos_f32(__x); }
183
184__DEVICE__
185float coshf(float __x) { return __ocml_cosh_f32(__x); }
186
187__DEVICE__
188float cospif(float __x) { return __ocml_cospi_f32(__x); }
189
190__DEVICE__
191float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
192
193__DEVICE__
194float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
195
196__DEVICE__
197float erfcf(float __x) { return __ocml_erfc_f32(__x); }
198
199__DEVICE__
200float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
201
202__DEVICE__
203float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
204
205__DEVICE__
206float erff(float __x) { return __ocml_erf_f32(__x); }
207
208__DEVICE__
209float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
210
211__DEVICE__
212float exp10f(float __x) { return __ocml_exp10_f32(__x); }
213
214__DEVICE__
215float exp2f(float __x) { return __ocml_exp2_f32(__x); }
216
217__DEVICE__
218float expf(float __x) { return __ocml_exp_f32(__x); }
219
220__DEVICE__
221float expm1f(float __x) { return __ocml_expm1_f32(__x); }
222
223__DEVICE__
224float fabsf(float __x) { return __ocml_fabs_f32(__x); }
225
226__DEVICE__
227float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
228
229__DEVICE__
230float fdividef(float __x, float __y) { return __x / __y; }
231
232__DEVICE__
233float floorf(float __x) { return __ocml_floor_f32(__x); }
234
235__DEVICE__
236float fmaf(float __x, float __y, float __z) {
237  return __ocml_fma_f32(__x, __y, __z);
238}
239
240__DEVICE__
241float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); }
242
243__DEVICE__
244float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); }
245
246__DEVICE__
247float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
248
249__DEVICE__
250float frexpf(float __x, int *__nptr) {
251  int __tmp;
252  float __r =
253      __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
254  *__nptr = __tmp;
255
256  return __r;
257}
258
259__DEVICE__
260float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
261
262__DEVICE__
263int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
264
265__DEVICE__
266__RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); }
267
268__DEVICE__
269__RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); }
270
271__DEVICE__
272__RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); }
273
274__DEVICE__
275float j0f(float __x) { return __ocml_j0_f32(__x); }
276
277__DEVICE__
278float j1f(float __x) { return __ocml_j1_f32(__x); }
279
280__DEVICE__
281float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
282                                // and the Miller & Brown algorithm
283  //       for linear recurrences to get O(log n) steps, but it's unclear if
284  //       it'd be beneficial in this case.
285  if (__n == 0)
286    return j0f(__x);
287  if (__n == 1)
288    return j1f(__x);
289
290  float __x0 = j0f(__x);
291  float __x1 = j1f(__x);
292  for (int __i = 1; __i < __n; ++__i) {
293    float __x2 = (2 * __i) / __x * __x1 - __x0;
294    __x0 = __x1;
295    __x1 = __x2;
296  }
297
298  return __x1;
299}
300
301__DEVICE__
302float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }
303
304__DEVICE__
305float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
306
307__DEVICE__
308long long int llrintf(float __x) { return __ocml_rint_f32(__x); }
309
310__DEVICE__
311long long int llroundf(float __x) { return __ocml_round_f32(__x); }
312
313__DEVICE__
314float log10f(float __x) { return __ocml_log10_f32(__x); }
315
316__DEVICE__
317float log1pf(float __x) { return __ocml_log1p_f32(__x); }
318
319__DEVICE__
320float log2f(float __x) { return __ocml_log2_f32(__x); }
321
322__DEVICE__
323float logbf(float __x) { return __ocml_logb_f32(__x); }
324
325__DEVICE__
326float logf(float __x) { return __ocml_log_f32(__x); }
327
328__DEVICE__
329long int lrintf(float __x) { return __ocml_rint_f32(__x); }
330
331__DEVICE__
332long int lroundf(float __x) { return __ocml_round_f32(__x); }
333
334__DEVICE__
335float modff(float __x, float *__iptr) {
336  float __tmp;
337  float __r =
338      __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
339  *__iptr = __tmp;
340  return __r;
341}
342
343__DEVICE__
344float nanf(const char *__tagp) {
345  union {
346    float val;
347    struct ieee_float {
348      unsigned int mantissa : 22;
349      unsigned int quiet : 1;
350      unsigned int exponent : 8;
351      unsigned int sign : 1;
352    } bits;
353  } __tmp;
354  __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
355
356  __tmp.bits.sign = 0u;
357  __tmp.bits.exponent = ~0u;
358  __tmp.bits.quiet = 1u;
359  __tmp.bits.mantissa = __make_mantissa(__tagp);
360
361  return __tmp.val;
362}
363
364__DEVICE__
365float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }
366
367__DEVICE__
368float nextafterf(float __x, float __y) {
369  return __ocml_nextafter_f32(__x, __y);
370}
371
372__DEVICE__
373float norm3df(float __x, float __y, float __z) {
374  return __ocml_len3_f32(__x, __y, __z);
375}
376
377__DEVICE__
378float norm4df(float __x, float __y, float __z, float __w) {
379  return __ocml_len4_f32(__x, __y, __z, __w);
380}
381
382__DEVICE__
383float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
384
385__DEVICE__
386float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
387
388__DEVICE__
389float normf(int __dim,
390            const float *__a) { // TODO: placeholder until OCML adds support.
391  float __r = 0;
392  while (__dim--) {
393    __r += __a[0] * __a[0];
394    ++__a;
395  }
396
397  return __ocml_sqrt_f32(__r);
398}
399
400__DEVICE__
401float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
402
403__DEVICE__
404float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
405
406__DEVICE__
407float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
408
409__DEVICE__
410float remainderf(float __x, float __y) {
411  return __ocml_remainder_f32(__x, __y);
412}
413
414__DEVICE__
415float remquof(float __x, float __y, int *__quo) {
416  int __tmp;
417  float __r = __ocml_remquo_f32(
418      __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
419  *__quo = __tmp;
420
421  return __r;
422}
423
424__DEVICE__
425float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
426
427__DEVICE__
428float rintf(float __x) { return __ocml_rint_f32(__x); }
429
430__DEVICE__
431float rnorm3df(float __x, float __y, float __z) {
432  return __ocml_rlen3_f32(__x, __y, __z);
433}
434
435__DEVICE__
436float rnorm4df(float __x, float __y, float __z, float __w) {
437  return __ocml_rlen4_f32(__x, __y, __z, __w);
438}
439
440__DEVICE__
441float rnormf(int __dim,
442             const float *__a) { // TODO: placeholder until OCML adds support.
443  float __r = 0;
444  while (__dim--) {
445    __r += __a[0] * __a[0];
446    ++__a;
447  }
448
449  return __ocml_rsqrt_f32(__r);
450}
451
452__DEVICE__
453float roundf(float __x) { return __ocml_round_f32(__x); }
454
455__DEVICE__
456float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
457
458__DEVICE__
459float scalblnf(float __x, long int __n) {
460  return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n)
461                         : __ocml_scalb_f32(__x, __n);
462}
463
464__DEVICE__
465float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }
466
467__DEVICE__
468__RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
469
470__DEVICE__
471void sincosf(float __x, float *__sinptr, float *__cosptr) {
472  float __tmp;
473  *__sinptr =
474      __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
475  *__cosptr = __tmp;
476}
477
478__DEVICE__
479void sincospif(float __x, float *__sinptr, float *__cosptr) {
480  float __tmp;
481  *__sinptr = __ocml_sincospi_f32(
482      __x, (__attribute__((address_space(5))) float *)&__tmp);
483  *__cosptr = __tmp;
484}
485
486__DEVICE__
487float sinf(float __x) { return __ocml_sin_f32(__x); }
488
489__DEVICE__
490float sinhf(float __x) { return __ocml_sinh_f32(__x); }
491
492__DEVICE__
493float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
494
495__DEVICE__
496float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
497
498__DEVICE__
499float tanf(float __x) { return __ocml_tan_f32(__x); }
500
501__DEVICE__
502float tanhf(float __x) { return __ocml_tanh_f32(__x); }
503
504__DEVICE__
505float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
506
507__DEVICE__
508float truncf(float __x) { return __ocml_trunc_f32(__x); }
509
510__DEVICE__
511float y0f(float __x) { return __ocml_y0_f32(__x); }
512
513__DEVICE__
514float y1f(float __x) { return __ocml_y1_f32(__x); }
515
516__DEVICE__
517float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
518                                // and the Miller & Brown algorithm
519  //       for linear recurrences to get O(log n) steps, but it's unclear if
520  //       it'd be beneficial in this case. Placeholder until OCML adds
521  //       support.
522  if (__n == 0)
523    return y0f(__x);
524  if (__n == 1)
525    return y1f(__x);
526
527  float __x0 = y0f(__x);
528  float __x1 = y1f(__x);
529  for (int __i = 1; __i < __n; ++__i) {
530    float __x2 = (2 * __i) / __x * __x1 - __x0;
531    __x0 = __x1;
532    __x1 = __x2;
533  }
534
535  return __x1;
536}
537
538// BEGIN INTRINSICS
539
540__DEVICE__
541float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
542
543__DEVICE__
544float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
545
546__DEVICE__
547float __expf(float __x) { return __ocml_native_exp_f32(__x); }
548
549#if defined OCML_BASIC_ROUNDED_OPERATIONS
550__DEVICE__
551float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
552__DEVICE__
553float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
554__DEVICE__
555float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
556__DEVICE__
557float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
558#else
559__DEVICE__
560float __fadd_rn(float __x, float __y) { return __x + __y; }
561#endif
562
563#if defined OCML_BASIC_ROUNDED_OPERATIONS
564__DEVICE__
565float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
566__DEVICE__
567float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
568__DEVICE__
569float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
570__DEVICE__
571float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
572#else
573__DEVICE__
574float __fdiv_rn(float __x, float __y) { return __x / __y; }
575#endif
576
577__DEVICE__
578float __fdividef(float __x, float __y) { return __x / __y; }
579
580#if defined OCML_BASIC_ROUNDED_OPERATIONS
581__DEVICE__
582float __fmaf_rd(float __x, float __y, float __z) {
583  return __ocml_fma_rtn_f32(__x, __y, __z);
584}
585__DEVICE__
586float __fmaf_rn(float __x, float __y, float __z) {
587  return __ocml_fma_rte_f32(__x, __y, __z);
588}
589__DEVICE__
590float __fmaf_ru(float __x, float __y, float __z) {
591  return __ocml_fma_rtp_f32(__x, __y, __z);
592}
593__DEVICE__
594float __fmaf_rz(float __x, float __y, float __z) {
595  return __ocml_fma_rtz_f32(__x, __y, __z);
596}
597#else
598__DEVICE__
599float __fmaf_rn(float __x, float __y, float __z) {
600  return __ocml_fma_f32(__x, __y, __z);
601}
602#endif
603
604#if defined OCML_BASIC_ROUNDED_OPERATIONS
605__DEVICE__
606float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
607__DEVICE__
608float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
609__DEVICE__
610float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
611__DEVICE__
612float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
613#else
614__DEVICE__
615float __fmul_rn(float __x, float __y) { return __x * __y; }
616#endif
617
618#if defined OCML_BASIC_ROUNDED_OPERATIONS
619__DEVICE__
620float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
621__DEVICE__
622float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
623__DEVICE__
624float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
625__DEVICE__
626float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
627#else
628__DEVICE__
629float __frcp_rn(float __x) { return 1.0f / __x; }
630#endif
631
632__DEVICE__
633float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
634
635#if defined OCML_BASIC_ROUNDED_OPERATIONS
636__DEVICE__
637float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
638__DEVICE__
639float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
640__DEVICE__
641float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
642__DEVICE__
643float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
644#else
645__DEVICE__
646float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
647#endif
648
649#if defined OCML_BASIC_ROUNDED_OPERATIONS
650__DEVICE__
651float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
652__DEVICE__
653float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
654__DEVICE__
655float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
656__DEVICE__
657float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
658#else
659__DEVICE__
660float __fsub_rn(float __x, float __y) { return __x - __y; }
661#endif
662
663__DEVICE__
664float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
665
666__DEVICE__
667float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
668
669__DEVICE__
670float __logf(float __x) { return __ocml_native_log_f32(__x); }
671
672__DEVICE__
673float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
674
675__DEVICE__
676float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
677
678__DEVICE__
679void __sincosf(float __x, float *__sinptr, float *__cosptr) {
680  *__sinptr = __ocml_native_sin_f32(__x);
681  *__cosptr = __ocml_native_cos_f32(__x);
682}
683
684__DEVICE__
685float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
686
687__DEVICE__
688float __tanf(float __x) { return __ocml_tan_f32(__x); }
689// END INTRINSICS
690// END FLOAT
691
692// BEGIN DOUBLE
693__DEVICE__
694double acos(double __x) { return __ocml_acos_f64(__x); }
695
696__DEVICE__
697double acosh(double __x) { return __ocml_acosh_f64(__x); }
698
699__DEVICE__
700double asin(double __x) { return __ocml_asin_f64(__x); }
701
702__DEVICE__
703double asinh(double __x) { return __ocml_asinh_f64(__x); }
704
705__DEVICE__
706double atan(double __x) { return __ocml_atan_f64(__x); }
707
708__DEVICE__
709double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
710
711__DEVICE__
712double atanh(double __x) { return __ocml_atanh_f64(__x); }
713
714__DEVICE__
715double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
716
717__DEVICE__
718double ceil(double __x) { return __ocml_ceil_f64(__x); }
719
720__DEVICE__
721double copysign(double __x, double __y) {
722  return __ocml_copysign_f64(__x, __y);
723}
724
725__DEVICE__
726double cos(double __x) { return __ocml_cos_f64(__x); }
727
728__DEVICE__
729double cosh(double __x) { return __ocml_cosh_f64(__x); }
730
731__DEVICE__
732double cospi(double __x) { return __ocml_cospi_f64(__x); }
733
734__DEVICE__
735double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
736
737__DEVICE__
738double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
739
740__DEVICE__
741double erf(double __x) { return __ocml_erf_f64(__x); }
742
743__DEVICE__
744double erfc(double __x) { return __ocml_erfc_f64(__x); }
745
746__DEVICE__
747double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
748
749__DEVICE__
750double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
751
752__DEVICE__
753double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
754
755__DEVICE__
756double exp(double __x) { return __ocml_exp_f64(__x); }
757
758__DEVICE__
759double exp10(double __x) { return __ocml_exp10_f64(__x); }
760
761__DEVICE__
762double exp2(double __x) { return __ocml_exp2_f64(__x); }
763
764__DEVICE__
765double expm1(double __x) { return __ocml_expm1_f64(__x); }
766
767__DEVICE__
768double fabs(double __x) { return __ocml_fabs_f64(__x); }
769
770__DEVICE__
771double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
772
773__DEVICE__
774double floor(double __x) { return __ocml_floor_f64(__x); }
775
776__DEVICE__
777double fma(double __x, double __y, double __z) {
778  return __ocml_fma_f64(__x, __y, __z);
779}
780
781__DEVICE__
782double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); }
783
784__DEVICE__
785double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); }
786
787__DEVICE__
788double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
789
790__DEVICE__
791double frexp(double __x, int *__nptr) {
792  int __tmp;
793  double __r =
794      __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
795  *__nptr = __tmp;
796  return __r;
797}
798
799__DEVICE__
800double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
801
802__DEVICE__
803int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
804
805__DEVICE__
806__RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); }
807
808__DEVICE__
809__RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); }
810
811__DEVICE__
812__RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); }
813
814__DEVICE__
815double j0(double __x) { return __ocml_j0_f64(__x); }
816
817__DEVICE__
818double j1(double __x) { return __ocml_j1_f64(__x); }
819
820__DEVICE__
821double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
822                                 // and the Miller & Brown algorithm
823  //       for linear recurrences to get O(log n) steps, but it's unclear if
824  //       it'd be beneficial in this case. Placeholder until OCML adds
825  //       support.
826  if (__n == 0)
827    return j0(__x);
828  if (__n == 1)
829    return j1(__x);
830
831  double __x0 = j0(__x);
832  double __x1 = j1(__x);
833  for (int __i = 1; __i < __n; ++__i) {
834    double __x2 = (2 * __i) / __x * __x1 - __x0;
835    __x0 = __x1;
836    __x1 = __x2;
837  }
838  return __x1;
839}
840
841__DEVICE__
842double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }
843
844__DEVICE__
845double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
846
847__DEVICE__
848long long int llrint(double __x) { return __ocml_rint_f64(__x); }
849
850__DEVICE__
851long long int llround(double __x) { return __ocml_round_f64(__x); }
852
853__DEVICE__
854double log(double __x) { return __ocml_log_f64(__x); }
855
856__DEVICE__
857double log10(double __x) { return __ocml_log10_f64(__x); }
858
859__DEVICE__
860double log1p(double __x) { return __ocml_log1p_f64(__x); }
861
862__DEVICE__
863double log2(double __x) { return __ocml_log2_f64(__x); }
864
865__DEVICE__
866double logb(double __x) { return __ocml_logb_f64(__x); }
867
868__DEVICE__
869long int lrint(double __x) { return __ocml_rint_f64(__x); }
870
871__DEVICE__
872long int lround(double __x) { return __ocml_round_f64(__x); }
873
874__DEVICE__
875double modf(double __x, double *__iptr) {
876  double __tmp;
877  double __r =
878      __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
879  *__iptr = __tmp;
880
881  return __r;
882}
883
884__DEVICE__
885double nan(const char *__tagp) {
886#if !_WIN32
887  union {
888    double val;
889    struct ieee_double {
890      uint64_t mantissa : 51;
891      uint32_t quiet : 1;
892      uint32_t exponent : 11;
893      uint32_t sign : 1;
894    } bits;
895  } __tmp;
896  __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
897
898  __tmp.bits.sign = 0u;
899  __tmp.bits.exponent = ~0u;
900  __tmp.bits.quiet = 1u;
901  __tmp.bits.mantissa = __make_mantissa(__tagp);
902
903  return __tmp.val;
904#else
905  __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
906  uint64_t __val = __make_mantissa(__tagp);
907  __val |= 0xFFF << 51;
908  return *reinterpret_cast<double *>(&__val);
909#endif
910}
911
912__DEVICE__
913double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }
914
915__DEVICE__
916double nextafter(double __x, double __y) {
917  return __ocml_nextafter_f64(__x, __y);
918}
919
920__DEVICE__
921double norm(int __dim,
922            const double *__a) { // TODO: placeholder until OCML adds support.
923  double __r = 0;
924  while (__dim--) {
925    __r += __a[0] * __a[0];
926    ++__a;
927  }
928
929  return __ocml_sqrt_f64(__r);
930}
931
932__DEVICE__
933double norm3d(double __x, double __y, double __z) {
934  return __ocml_len3_f64(__x, __y, __z);
935}
936
937__DEVICE__
938double norm4d(double __x, double __y, double __z, double __w) {
939  return __ocml_len4_f64(__x, __y, __z, __w);
940}
941
942__DEVICE__
943double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
944
945__DEVICE__
946double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
947
948__DEVICE__
949double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
950
951__DEVICE__
952double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
953
954__DEVICE__
955double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
956
957__DEVICE__
958double remainder(double __x, double __y) {
959  return __ocml_remainder_f64(__x, __y);
960}
961
962__DEVICE__
963double remquo(double __x, double __y, int *__quo) {
964  int __tmp;
965  double __r = __ocml_remquo_f64(
966      __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
967  *__quo = __tmp;
968
969  return __r;
970}
971
972__DEVICE__
973double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
974
975__DEVICE__
976double rint(double __x) { return __ocml_rint_f64(__x); }
977
978__DEVICE__
979double rnorm(int __dim,
980             const double *__a) { // TODO: placeholder until OCML adds support.
981  double __r = 0;
982  while (__dim--) {
983    __r += __a[0] * __a[0];
984    ++__a;
985  }
986
987  return __ocml_rsqrt_f64(__r);
988}
989
990__DEVICE__
991double rnorm3d(double __x, double __y, double __z) {
992  return __ocml_rlen3_f64(__x, __y, __z);
993}
994
995__DEVICE__
996double rnorm4d(double __x, double __y, double __z, double __w) {
997  return __ocml_rlen4_f64(__x, __y, __z, __w);
998}
999
1000__DEVICE__
1001double round(double __x) { return __ocml_round_f64(__x); }
1002
1003__DEVICE__
1004double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
1005
1006__DEVICE__
1007double scalbln(double __x, long int __n) {
1008  return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n)
1009                         : __ocml_scalb_f64(__x, __n);
1010}
1011__DEVICE__
1012double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); }
1013
1014__DEVICE__
1015__RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); }
1016
1017__DEVICE__
1018double sin(double __x) { return __ocml_sin_f64(__x); }
1019
1020__DEVICE__
1021void sincos(double __x, double *__sinptr, double *__cosptr) {
1022  double __tmp;
1023  *__sinptr = __ocml_sincos_f64(
1024      __x, (__attribute__((address_space(5))) double *)&__tmp);
1025  *__cosptr = __tmp;
1026}
1027
1028__DEVICE__
1029void sincospi(double __x, double *__sinptr, double *__cosptr) {
1030  double __tmp;
1031  *__sinptr = __ocml_sincospi_f64(
1032      __x, (__attribute__((address_space(5))) double *)&__tmp);
1033  *__cosptr = __tmp;
1034}
1035
1036__DEVICE__
1037double sinh(double __x) { return __ocml_sinh_f64(__x); }
1038
1039__DEVICE__
1040double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
1041
1042__DEVICE__
1043double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
1044
1045__DEVICE__
1046double tan(double __x) { return __ocml_tan_f64(__x); }
1047
1048__DEVICE__
1049double tanh(double __x) { return __ocml_tanh_f64(__x); }
1050
1051__DEVICE__
1052double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
1053
1054__DEVICE__
1055double trunc(double __x) { return __ocml_trunc_f64(__x); }
1056
1057__DEVICE__
1058double y0(double __x) { return __ocml_y0_f64(__x); }
1059
1060__DEVICE__
1061double y1(double __x) { return __ocml_y1_f64(__x); }
1062
1063__DEVICE__
1064double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
1065                                 // and the Miller & Brown algorithm
1066  //       for linear recurrences to get O(log n) steps, but it's unclear if
1067  //       it'd be beneficial in this case. Placeholder until OCML adds
1068  //       support.
1069  if (__n == 0)
1070    return y0(__x);
1071  if (__n == 1)
1072    return y1(__x);
1073
1074  double __x0 = y0(__x);
1075  double __x1 = y1(__x);
1076  for (int __i = 1; __i < __n; ++__i) {
1077    double __x2 = (2 * __i) / __x * __x1 - __x0;
1078    __x0 = __x1;
1079    __x1 = __x2;
1080  }
1081
1082  return __x1;
1083}
1084
1085// BEGIN INTRINSICS
1086#if defined OCML_BASIC_ROUNDED_OPERATIONS
1087__DEVICE__
1088double __dadd_rd(double __x, double __y) {
1089  return __ocml_add_rtn_f64(__x, __y);
1090}
1091__DEVICE__
1092double __dadd_rn(double __x, double __y) {
1093  return __ocml_add_rte_f64(__x, __y);
1094}
1095__DEVICE__
1096double __dadd_ru(double __x, double __y) {
1097  return __ocml_add_rtp_f64(__x, __y);
1098}
1099__DEVICE__
1100double __dadd_rz(double __x, double __y) {
1101  return __ocml_add_rtz_f64(__x, __y);
1102}
1103#else
1104__DEVICE__
1105double __dadd_rn(double __x, double __y) { return __x + __y; }
1106#endif
1107
1108#if defined OCML_BASIC_ROUNDED_OPERATIONS
1109__DEVICE__
1110double __ddiv_rd(double __x, double __y) {
1111  return __ocml_div_rtn_f64(__x, __y);
1112}
1113__DEVICE__
1114double __ddiv_rn(double __x, double __y) {
1115  return __ocml_div_rte_f64(__x, __y);
1116}
1117__DEVICE__
1118double __ddiv_ru(double __x, double __y) {
1119  return __ocml_div_rtp_f64(__x, __y);
1120}
1121__DEVICE__
1122double __ddiv_rz(double __x, double __y) {
1123  return __ocml_div_rtz_f64(__x, __y);
1124}
1125#else
1126__DEVICE__
1127double __ddiv_rn(double __x, double __y) { return __x / __y; }
1128#endif
1129
1130#if defined OCML_BASIC_ROUNDED_OPERATIONS
1131__DEVICE__
1132double __dmul_rd(double __x, double __y) {
1133  return __ocml_mul_rtn_f64(__x, __y);
1134}
1135__DEVICE__
1136double __dmul_rn(double __x, double __y) {
1137  return __ocml_mul_rte_f64(__x, __y);
1138}
1139__DEVICE__
1140double __dmul_ru(double __x, double __y) {
1141  return __ocml_mul_rtp_f64(__x, __y);
1142}
1143__DEVICE__
1144double __dmul_rz(double __x, double __y) {
1145  return __ocml_mul_rtz_f64(__x, __y);
1146}
1147#else
1148__DEVICE__
1149double __dmul_rn(double __x, double __y) { return __x * __y; }
1150#endif
1151
1152#if defined OCML_BASIC_ROUNDED_OPERATIONS
1153__DEVICE__
1154double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
1155__DEVICE__
1156double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
1157__DEVICE__
1158double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
1159__DEVICE__
1160double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
1161#else
1162__DEVICE__
1163double __drcp_rn(double __x) { return 1.0 / __x; }
1164#endif
1165
1166#if defined OCML_BASIC_ROUNDED_OPERATIONS
1167__DEVICE__
1168double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
1169__DEVICE__
1170double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
1171__DEVICE__
1172double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
1173__DEVICE__
1174double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
1175#else
1176__DEVICE__
1177double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
1178#endif
1179
1180#if defined OCML_BASIC_ROUNDED_OPERATIONS
1181__DEVICE__
1182double __dsub_rd(double __x, double __y) {
1183  return __ocml_sub_rtn_f64(__x, __y);
1184}
1185__DEVICE__
1186double __dsub_rn(double __x, double __y) {
1187  return __ocml_sub_rte_f64(__x, __y);
1188}
1189__DEVICE__
1190double __dsub_ru(double __x, double __y) {
1191  return __ocml_sub_rtp_f64(__x, __y);
1192}
1193__DEVICE__
1194double __dsub_rz(double __x, double __y) {
1195  return __ocml_sub_rtz_f64(__x, __y);
1196}
1197#else
1198__DEVICE__
1199double __dsub_rn(double __x, double __y) { return __x - __y; }
1200#endif
1201
1202#if defined OCML_BASIC_ROUNDED_OPERATIONS
1203__DEVICE__
1204double __fma_rd(double __x, double __y, double __z) {
1205  return __ocml_fma_rtn_f64(__x, __y, __z);
1206}
1207__DEVICE__
1208double __fma_rn(double __x, double __y, double __z) {
1209  return __ocml_fma_rte_f64(__x, __y, __z);
1210}
1211__DEVICE__
1212double __fma_ru(double __x, double __y, double __z) {
1213  return __ocml_fma_rtp_f64(__x, __y, __z);
1214}
1215__DEVICE__
1216double __fma_rz(double __x, double __y, double __z) {
1217  return __ocml_fma_rtz_f64(__x, __y, __z);
1218}
1219#else
1220__DEVICE__
1221double __fma_rn(double __x, double __y, double __z) {
1222  return __ocml_fma_f64(__x, __y, __z);
1223}
1224#endif
1225// END INTRINSICS
1226// END DOUBLE
1227
1228// C only macros
1229#if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1230#define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1231#define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1232#define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1233#define signbit(__x)                                                           \
1234  _Generic((__x), float : __signbitf, double : __signbit)(__x)
1235#endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1236
1237#if defined(__cplusplus)
1238template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
1239  return (__arg1 < __arg2) ? __arg1 : __arg2;
1240}
1241
1242template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
1243  return (__arg1 > __arg2) ? __arg1 : __arg2;
1244}
1245
1246__DEVICE__ int min(int __arg1, int __arg2) {
1247  return (__arg1 < __arg2) ? __arg1 : __arg2;
1248}
1249__DEVICE__ int max(int __arg1, int __arg2) {
1250  return (__arg1 > __arg2) ? __arg1 : __arg2;
1251}
1252
1253__DEVICE__
1254float max(float __x, float __y) { return fmaxf(__x, __y); }
1255
1256__DEVICE__
1257double max(double __x, double __y) { return fmax(__x, __y); }
1258
1259__DEVICE__
1260float min(float __x, float __y) { return fminf(__x, __y); }
1261
1262__DEVICE__
1263double min(double __x, double __y) { return fmin(__x, __y); }
1264
1265#if !defined(__HIPCC_RTC__)
1266__host__ inline static int min(int __arg1, int __arg2) {
1267  return std::min(__arg1, __arg2);
1268}
1269
1270__host__ inline static int max(int __arg1, int __arg2) {
1271  return std::max(__arg1, __arg2);
1272}
1273#endif // __HIPCC_RTC__
1274#endif
1275
1276#pragma pop_macro("__DEVICE__")
1277#pragma pop_macro("__RETURN_TYPE")
1278
1279#endif // __CLANG_HIP_MATH_H__
1280