1/*===---- __clang_hip_math.h - HIP math decls -------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __CLANG_HIP_MATH_H__
11#define __CLANG_HIP_MATH_H__
12
13#include <algorithm>
14#include <limits.h>
15#include <limits>
16#include <stdint.h>
17
18#pragma push_macro("__DEVICE__")
19#pragma push_macro("__RETURN_TYPE")
20
21// to be consistent with __clang_cuda_math_forward_declares
22#define __DEVICE__ static __device__
23#define __RETURN_TYPE bool
24
25__DEVICE__
26inline uint64_t __make_mantissa_base8(const char *__tagp) {
27  uint64_t __r = 0;
28  while (__tagp) {
29    char __tmp = *__tagp;
30
31    if (__tmp >= '0' && __tmp <= '7')
32      __r = (__r * 8u) + __tmp - '0';
33    else
34      return 0;
35
36    ++__tagp;
37  }
38
39  return __r;
40}
41
42__DEVICE__
43inline uint64_t __make_mantissa_base10(const char *__tagp) {
44  uint64_t __r = 0;
45  while (__tagp) {
46    char __tmp = *__tagp;
47
48    if (__tmp >= '0' && __tmp <= '9')
49      __r = (__r * 10u) + __tmp - '0';
50    else
51      return 0;
52
53    ++__tagp;
54  }
55
56  return __r;
57}
58
59__DEVICE__
60inline uint64_t __make_mantissa_base16(const char *__tagp) {
61  uint64_t __r = 0;
62  while (__tagp) {
63    char __tmp = *__tagp;
64
65    if (__tmp >= '0' && __tmp <= '9')
66      __r = (__r * 16u) + __tmp - '0';
67    else if (__tmp >= 'a' && __tmp <= 'f')
68      __r = (__r * 16u) + __tmp - 'a' + 10;
69    else if (__tmp >= 'A' && __tmp <= 'F')
70      __r = (__r * 16u) + __tmp - 'A' + 10;
71    else
72      return 0;
73
74    ++__tagp;
75  }
76
77  return __r;
78}
79
80__DEVICE__
81inline uint64_t __make_mantissa(const char *__tagp) {
82  if (!__tagp)
83    return 0u;
84
85  if (*__tagp == '0') {
86    ++__tagp;
87
88    if (*__tagp == 'x' || *__tagp == 'X')
89      return __make_mantissa_base16(__tagp);
90    else
91      return __make_mantissa_base8(__tagp);
92  }
93
94  return __make_mantissa_base10(__tagp);
95}
96
97// BEGIN FLOAT
98__DEVICE__
99inline float abs(float __x) { return __ocml_fabs_f32(__x); }
100__DEVICE__
101inline float acosf(float __x) { return __ocml_acos_f32(__x); }
102__DEVICE__
103inline float acoshf(float __x) { return __ocml_acosh_f32(__x); }
104__DEVICE__
105inline float asinf(float __x) { return __ocml_asin_f32(__x); }
106__DEVICE__
107inline float asinhf(float __x) { return __ocml_asinh_f32(__x); }
108__DEVICE__
109inline float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
110__DEVICE__
111inline float atanf(float __x) { return __ocml_atan_f32(__x); }
112__DEVICE__
113inline float atanhf(float __x) { return __ocml_atanh_f32(__x); }
114__DEVICE__
115inline float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
116__DEVICE__
117inline float ceilf(float __x) { return __ocml_ceil_f32(__x); }
118__DEVICE__
119inline float copysignf(float __x, float __y) {
120  return __ocml_copysign_f32(__x, __y);
121}
122__DEVICE__
123inline float cosf(float __x) { return __ocml_cos_f32(__x); }
124__DEVICE__
125inline float coshf(float __x) { return __ocml_cosh_f32(__x); }
126__DEVICE__
127inline float cospif(float __x) { return __ocml_cospi_f32(__x); }
128__DEVICE__
129inline float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
130__DEVICE__
131inline float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
132__DEVICE__
133inline float erfcf(float __x) { return __ocml_erfc_f32(__x); }
134__DEVICE__
135inline float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
136__DEVICE__
137inline float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
138__DEVICE__
139inline float erff(float __x) { return __ocml_erf_f32(__x); }
140__DEVICE__
141inline float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
142__DEVICE__
143inline float exp10f(float __x) { return __ocml_exp10_f32(__x); }
144__DEVICE__
145inline float exp2f(float __x) { return __ocml_exp2_f32(__x); }
146__DEVICE__
147inline float expf(float __x) { return __ocml_exp_f32(__x); }
148__DEVICE__
149inline float expm1f(float __x) { return __ocml_expm1_f32(__x); }
150__DEVICE__
151inline float fabsf(float __x) { return __ocml_fabs_f32(__x); }
152__DEVICE__
153inline float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
154__DEVICE__
155inline float fdividef(float __x, float __y) { return __x / __y; }
156__DEVICE__
157inline float floorf(float __x) { return __ocml_floor_f32(__x); }
158__DEVICE__
159inline float fmaf(float __x, float __y, float __z) {
160  return __ocml_fma_f32(__x, __y, __z);
161}
162__DEVICE__
163inline float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); }
164__DEVICE__
165inline float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); }
166__DEVICE__
167inline float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
168__DEVICE__
169inline float frexpf(float __x, int *__nptr) {
170  int __tmp;
171  float __r =
172      __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
173  *__nptr = __tmp;
174
175  return __r;
176}
177__DEVICE__
178inline float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
179__DEVICE__
180inline int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
181__DEVICE__
182inline __RETURN_TYPE isfinite(float __x) { return __ocml_isfinite_f32(__x); }
183__DEVICE__
184inline __RETURN_TYPE isinf(float __x) { return __ocml_isinf_f32(__x); }
185__DEVICE__
186inline __RETURN_TYPE isnan(float __x) { return __ocml_isnan_f32(__x); }
187__DEVICE__
188inline float j0f(float __x) { return __ocml_j0_f32(__x); }
189__DEVICE__
190inline float j1f(float __x) { return __ocml_j1_f32(__x); }
191__DEVICE__
192inline float jnf(int __n,
193                 float __x) { // TODO: we could use Ahmes multiplication
194                              // and the Miller & Brown algorithm
195  //       for linear recurrences to get O(log n) steps, but it's unclear if
196  //       it'd be beneficial in this case.
197  if (__n == 0)
198    return j0f(__x);
199  if (__n == 1)
200    return j1f(__x);
201
202  float __x0 = j0f(__x);
203  float __x1 = j1f(__x);
204  for (int __i = 1; __i < __n; ++__i) {
205    float __x2 = (2 * __i) / __x * __x1 - __x0;
206    __x0 = __x1;
207    __x1 = __x2;
208  }
209
210  return __x1;
211}
212__DEVICE__
213inline float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }
214__DEVICE__
215inline float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
216__DEVICE__
217inline long long int llrintf(float __x) { return __ocml_rint_f32(__x); }
218__DEVICE__
219inline long long int llroundf(float __x) { return __ocml_round_f32(__x); }
220__DEVICE__
221inline float log10f(float __x) { return __ocml_log10_f32(__x); }
222__DEVICE__
223inline float log1pf(float __x) { return __ocml_log1p_f32(__x); }
224__DEVICE__
225inline float log2f(float __x) { return __ocml_log2_f32(__x); }
226__DEVICE__
227inline float logbf(float __x) { return __ocml_logb_f32(__x); }
228__DEVICE__
229inline float logf(float __x) { return __ocml_log_f32(__x); }
230__DEVICE__
231inline long int lrintf(float __x) { return __ocml_rint_f32(__x); }
232__DEVICE__
233inline long int lroundf(float __x) { return __ocml_round_f32(__x); }
234__DEVICE__
235inline float modff(float __x, float *__iptr) {
236  float __tmp;
237  float __r =
238      __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
239  *__iptr = __tmp;
240
241  return __r;
242}
243__DEVICE__
244inline float nanf(const char *__tagp) {
245  union {
246    float val;
247    struct ieee_float {
248      uint32_t mantissa : 22;
249      uint32_t quiet : 1;
250      uint32_t exponent : 8;
251      uint32_t sign : 1;
252    } bits;
253
254    static_assert(sizeof(float) == sizeof(ieee_float), "");
255  } __tmp;
256
257  __tmp.bits.sign = 0u;
258  __tmp.bits.exponent = ~0u;
259  __tmp.bits.quiet = 1u;
260  __tmp.bits.mantissa = __make_mantissa(__tagp);
261
262  return __tmp.val;
263}
264__DEVICE__
265inline float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }
266__DEVICE__
267inline float nextafterf(float __x, float __y) {
268  return __ocml_nextafter_f32(__x, __y);
269}
270__DEVICE__
271inline float norm3df(float __x, float __y, float __z) {
272  return __ocml_len3_f32(__x, __y, __z);
273}
274__DEVICE__
275inline float norm4df(float __x, float __y, float __z, float __w) {
276  return __ocml_len4_f32(__x, __y, __z, __w);
277}
278__DEVICE__
279inline float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
280__DEVICE__
281inline float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
282__DEVICE__
283inline float
284normf(int __dim,
285      const float *__a) { // TODO: placeholder until OCML adds support.
286  float __r = 0;
287  while (__dim--) {
288    __r += __a[0] * __a[0];
289    ++__a;
290  }
291
292  return __ocml_sqrt_f32(__r);
293}
294__DEVICE__
295inline float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
296__DEVICE__
297inline float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
298__DEVICE__
299inline float remainderf(float __x, float __y) {
300  return __ocml_remainder_f32(__x, __y);
301}
302__DEVICE__
303inline float remquof(float __x, float __y, int *__quo) {
304  int __tmp;
305  float __r = __ocml_remquo_f32(
306      __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
307  *__quo = __tmp;
308
309  return __r;
310}
311__DEVICE__
312inline float rhypotf(float __x, float __y) {
313  return __ocml_rhypot_f32(__x, __y);
314}
315__DEVICE__
316inline float rintf(float __x) { return __ocml_rint_f32(__x); }
317__DEVICE__
318inline float rnorm3df(float __x, float __y, float __z) {
319  return __ocml_rlen3_f32(__x, __y, __z);
320}
321
322__DEVICE__
323inline float rnorm4df(float __x, float __y, float __z, float __w) {
324  return __ocml_rlen4_f32(__x, __y, __z, __w);
325}
326__DEVICE__
327inline float
328rnormf(int __dim,
329       const float *__a) { // TODO: placeholder until OCML adds support.
330  float __r = 0;
331  while (__dim--) {
332    __r += __a[0] * __a[0];
333    ++__a;
334  }
335
336  return __ocml_rsqrt_f32(__r);
337}
338__DEVICE__
339inline float roundf(float __x) { return __ocml_round_f32(__x); }
340__DEVICE__
341inline float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
342__DEVICE__
343inline float scalblnf(float __x, long int __n) {
344  return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n)
345                         : __ocml_scalb_f32(__x, __n);
346}
347__DEVICE__
348inline float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }
349__DEVICE__
350inline __RETURN_TYPE signbit(float __x) { return __ocml_signbit_f32(__x); }
351__DEVICE__
352inline void sincosf(float __x, float *__sinptr, float *__cosptr) {
353  float __tmp;
354
355  *__sinptr =
356      __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
357  *__cosptr = __tmp;
358}
359__DEVICE__
360inline void sincospif(float __x, float *__sinptr, float *__cosptr) {
361  float __tmp;
362
363  *__sinptr = __ocml_sincospi_f32(
364      __x, (__attribute__((address_space(5))) float *)&__tmp);
365  *__cosptr = __tmp;
366}
367__DEVICE__
368inline float sinf(float __x) { return __ocml_sin_f32(__x); }
369__DEVICE__
370inline float sinhf(float __x) { return __ocml_sinh_f32(__x); }
371__DEVICE__
372inline float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
373__DEVICE__
374inline float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
375__DEVICE__
376inline float tanf(float __x) { return __ocml_tan_f32(__x); }
377__DEVICE__
378inline float tanhf(float __x) { return __ocml_tanh_f32(__x); }
379__DEVICE__
380inline float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
381__DEVICE__
382inline float truncf(float __x) { return __ocml_trunc_f32(__x); }
383__DEVICE__
384inline float y0f(float __x) { return __ocml_y0_f32(__x); }
385__DEVICE__
386inline float y1f(float __x) { return __ocml_y1_f32(__x); }
387__DEVICE__
388inline float ynf(int __n,
389                 float __x) { // TODO: we could use Ahmes multiplication
390                              // and the Miller & Brown algorithm
391  //       for linear recurrences to get O(log n) steps, but it's unclear if
392  //       it'd be beneficial in this case. Placeholder until OCML adds
393  //       support.
394  if (__n == 0)
395    return y0f(__x);
396  if (__n == 1)
397    return y1f(__x);
398
399  float __x0 = y0f(__x);
400  float __x1 = y1f(__x);
401  for (int __i = 1; __i < __n; ++__i) {
402    float __x2 = (2 * __i) / __x * __x1 - __x0;
403    __x0 = __x1;
404    __x1 = __x2;
405  }
406
407  return __x1;
408}
409
410// BEGIN INTRINSICS
411__DEVICE__
412inline float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
413__DEVICE__
414inline float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
415__DEVICE__
416inline float __expf(float __x) { return __ocml_native_exp_f32(__x); }
417#if defined OCML_BASIC_ROUNDED_OPERATIONS
418__DEVICE__
419inline float __fadd_rd(float __x, float __y) {
420  return __ocml_add_rtn_f32(__x, __y);
421}
422#endif
423__DEVICE__
424inline float __fadd_rn(float __x, float __y) { return __x + __y; }
425#if defined OCML_BASIC_ROUNDED_OPERATIONS
426__DEVICE__
427inline float __fadd_ru(float __x, float __y) {
428  return __ocml_add_rtp_f32(__x, __y);
429}
430__DEVICE__
431inline float __fadd_rz(float __x, float __y) {
432  return __ocml_add_rtz_f32(__x, __y);
433}
434__DEVICE__
435inline float __fdiv_rd(float __x, float __y) {
436  return __ocml_div_rtn_f32(__x, __y);
437}
438#endif
439__DEVICE__
440inline float __fdiv_rn(float __x, float __y) { return __x / __y; }
441#if defined OCML_BASIC_ROUNDED_OPERATIONS
442__DEVICE__
443inline float __fdiv_ru(float __x, float __y) {
444  return __ocml_div_rtp_f32(__x, __y);
445}
446__DEVICE__
447inline float __fdiv_rz(float __x, float __y) {
448  return __ocml_div_rtz_f32(__x, __y);
449}
450#endif
451__DEVICE__
452inline float __fdividef(float __x, float __y) { return __x / __y; }
453#if defined OCML_BASIC_ROUNDED_OPERATIONS
454__DEVICE__
455inline float __fmaf_rd(float __x, float __y, float __z) {
456  return __ocml_fma_rtn_f32(__x, __y, __z);
457}
458#endif
459__DEVICE__
460inline float __fmaf_rn(float __x, float __y, float __z) {
461  return __ocml_fma_f32(__x, __y, __z);
462}
463#if defined OCML_BASIC_ROUNDED_OPERATIONS
464__DEVICE__
465inline float __fmaf_ru(float __x, float __y, float __z) {
466  return __ocml_fma_rtp_f32(__x, __y, __z);
467}
468__DEVICE__
469inline float __fmaf_rz(float __x, float __y, float __z) {
470  return __ocml_fma_rtz_f32(__x, __y, __z);
471}
472__DEVICE__
473inline float __fmul_rd(float __x, float __y) {
474  return __ocml_mul_rtn_f32(__x, __y);
475}
476#endif
477__DEVICE__
478inline float __fmul_rn(float __x, float __y) { return __x * __y; }
479#if defined OCML_BASIC_ROUNDED_OPERATIONS
480__DEVICE__
481inline float __fmul_ru(float __x, float __y) {
482  return __ocml_mul_rtp_f32(__x, __y);
483}
484__DEVICE__
485inline float __fmul_rz(float __x, float __y) {
486  return __ocml_mul_rtz_f32(__x, __y);
487}
488__DEVICE__
489inline float __frcp_rd(float __x) { return __llvm_amdgcn_rcp_f32(__x); }
490#endif
491__DEVICE__
492inline float __frcp_rn(float __x) { return __llvm_amdgcn_rcp_f32(__x); }
493#if defined OCML_BASIC_ROUNDED_OPERATIONS
494__DEVICE__
495inline float __frcp_ru(float __x) { return __llvm_amdgcn_rcp_f32(__x); }
496__DEVICE__
497inline float __frcp_rz(float __x) { return __llvm_amdgcn_rcp_f32(__x); }
498#endif
499__DEVICE__
500inline float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
501#if defined OCML_BASIC_ROUNDED_OPERATIONS
502__DEVICE__
503inline float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
504#endif
505__DEVICE__
506inline float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
507#if defined OCML_BASIC_ROUNDED_OPERATIONS
508__DEVICE__
509inline float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
510__DEVICE__
511inline float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
512__DEVICE__
513inline float __fsub_rd(float __x, float __y) {
514  return __ocml_sub_rtn_f32(__x, __y);
515}
516#endif
517__DEVICE__
518inline float __fsub_rn(float __x, float __y) { return __x - __y; }
519#if defined OCML_BASIC_ROUNDED_OPERATIONS
520__DEVICE__
521inline float __fsub_ru(float __x, float __y) {
522  return __ocml_sub_rtp_f32(__x, __y);
523}
524__DEVICE__
525inline float __fsub_rz(float __x, float __y) {
526  return __ocml_sub_rtz_f32(__x, __y);
527}
528#endif
529__DEVICE__
530inline float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
531__DEVICE__
532inline float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
533__DEVICE__
534inline float __logf(float __x) { return __ocml_native_log_f32(__x); }
535__DEVICE__
536inline float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
537__DEVICE__
538inline float __saturatef(float __x) {
539  return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x);
540}
541__DEVICE__
542inline void __sincosf(float __x, float *__sinptr, float *__cosptr) {
543  *__sinptr = __ocml_native_sin_f32(__x);
544  *__cosptr = __ocml_native_cos_f32(__x);
545}
546__DEVICE__
547inline float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
548__DEVICE__
549inline float __tanf(float __x) { return __ocml_tan_f32(__x); }
550// END INTRINSICS
551// END FLOAT
552
553// BEGIN DOUBLE
554__DEVICE__
555inline double abs(double __x) { return __ocml_fabs_f64(__x); }
556__DEVICE__
557inline double acos(double __x) { return __ocml_acos_f64(__x); }
558__DEVICE__
559inline double acosh(double __x) { return __ocml_acosh_f64(__x); }
560__DEVICE__
561inline double asin(double __x) { return __ocml_asin_f64(__x); }
562__DEVICE__
563inline double asinh(double __x) { return __ocml_asinh_f64(__x); }
564__DEVICE__
565inline double atan(double __x) { return __ocml_atan_f64(__x); }
566__DEVICE__
567inline double atan2(double __x, double __y) {
568  return __ocml_atan2_f64(__x, __y);
569}
570__DEVICE__
571inline double atanh(double __x) { return __ocml_atanh_f64(__x); }
572__DEVICE__
573inline double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
574__DEVICE__
575inline double ceil(double __x) { return __ocml_ceil_f64(__x); }
576__DEVICE__
577inline double copysign(double __x, double __y) {
578  return __ocml_copysign_f64(__x, __y);
579}
580__DEVICE__
581inline double cos(double __x) { return __ocml_cos_f64(__x); }
582__DEVICE__
583inline double cosh(double __x) { return __ocml_cosh_f64(__x); }
584__DEVICE__
585inline double cospi(double __x) { return __ocml_cospi_f64(__x); }
586__DEVICE__
587inline double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
588__DEVICE__
589inline double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
590__DEVICE__
591inline double erf(double __x) { return __ocml_erf_f64(__x); }
592__DEVICE__
593inline double erfc(double __x) { return __ocml_erfc_f64(__x); }
594__DEVICE__
595inline double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
596__DEVICE__
597inline double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
598__DEVICE__
599inline double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
600__DEVICE__
601inline double exp(double __x) { return __ocml_exp_f64(__x); }
602__DEVICE__
603inline double exp10(double __x) { return __ocml_exp10_f64(__x); }
604__DEVICE__
605inline double exp2(double __x) { return __ocml_exp2_f64(__x); }
606__DEVICE__
607inline double expm1(double __x) { return __ocml_expm1_f64(__x); }
608__DEVICE__
609inline double fabs(double __x) { return __ocml_fabs_f64(__x); }
610__DEVICE__
611inline double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
612__DEVICE__
613inline double floor(double __x) { return __ocml_floor_f64(__x); }
614__DEVICE__
615inline double fma(double __x, double __y, double __z) {
616  return __ocml_fma_f64(__x, __y, __z);
617}
618__DEVICE__
619inline double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); }
620__DEVICE__
621inline double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); }
622__DEVICE__
623inline double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
624__DEVICE__
625inline double frexp(double __x, int *__nptr) {
626  int __tmp;
627  double __r =
628      __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
629  *__nptr = __tmp;
630
631  return __r;
632}
633__DEVICE__
634inline double hypot(double __x, double __y) {
635  return __ocml_hypot_f64(__x, __y);
636}
637__DEVICE__
638inline int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
639__DEVICE__
640inline __RETURN_TYPE isfinite(double __x) { return __ocml_isfinite_f64(__x); }
641__DEVICE__
642inline __RETURN_TYPE isinf(double __x) { return __ocml_isinf_f64(__x); }
643__DEVICE__
644inline __RETURN_TYPE isnan(double __x) { return __ocml_isnan_f64(__x); }
645__DEVICE__
646inline double j0(double __x) { return __ocml_j0_f64(__x); }
647__DEVICE__
648inline double j1(double __x) { return __ocml_j1_f64(__x); }
649__DEVICE__
650inline double jn(int __n,
651                 double __x) { // TODO: we could use Ahmes multiplication
652                               // and the Miller & Brown algorithm
653  //       for linear recurrences to get O(log n) steps, but it's unclear if
654  //       it'd be beneficial in this case. Placeholder until OCML adds
655  //       support.
656  if (__n == 0)
657    return j0f(__x);
658  if (__n == 1)
659    return j1f(__x);
660
661  double __x0 = j0f(__x);
662  double __x1 = j1f(__x);
663  for (int __i = 1; __i < __n; ++__i) {
664    double __x2 = (2 * __i) / __x * __x1 - __x0;
665    __x0 = __x1;
666    __x1 = __x2;
667  }
668
669  return __x1;
670}
671__DEVICE__
672inline double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }
673__DEVICE__
674inline double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
675__DEVICE__
676inline long long int llrint(double __x) { return __ocml_rint_f64(__x); }
677__DEVICE__
678inline long long int llround(double __x) { return __ocml_round_f64(__x); }
679__DEVICE__
680inline double log(double __x) { return __ocml_log_f64(__x); }
681__DEVICE__
682inline double log10(double __x) { return __ocml_log10_f64(__x); }
683__DEVICE__
684inline double log1p(double __x) { return __ocml_log1p_f64(__x); }
685__DEVICE__
686inline double log2(double __x) { return __ocml_log2_f64(__x); }
687__DEVICE__
688inline double logb(double __x) { return __ocml_logb_f64(__x); }
689__DEVICE__
690inline long int lrint(double __x) { return __ocml_rint_f64(__x); }
691__DEVICE__
692inline long int lround(double __x) { return __ocml_round_f64(__x); }
693__DEVICE__
694inline double modf(double __x, double *__iptr) {
695  double __tmp;
696  double __r =
697      __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
698  *__iptr = __tmp;
699
700  return __r;
701}
702__DEVICE__
703inline double nan(const char *__tagp) {
704#if !_WIN32
705  union {
706    double val;
707    struct ieee_double {
708      uint64_t mantissa : 51;
709      uint32_t quiet : 1;
710      uint32_t exponent : 11;
711      uint32_t sign : 1;
712    } bits;
713    static_assert(sizeof(double) == sizeof(ieee_double), "");
714  } __tmp;
715
716  __tmp.bits.sign = 0u;
717  __tmp.bits.exponent = ~0u;
718  __tmp.bits.quiet = 1u;
719  __tmp.bits.mantissa = __make_mantissa(__tagp);
720
721  return __tmp.val;
722#else
723  static_assert(sizeof(uint64_t) == sizeof(double));
724  uint64_t val = __make_mantissa(__tagp);
725  val |= 0xFFF << 51;
726  return *reinterpret_cast<double *>(&val);
727#endif
728}
729__DEVICE__
730inline double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }
731__DEVICE__
732inline double nextafter(double __x, double __y) {
733  return __ocml_nextafter_f64(__x, __y);
734}
735__DEVICE__
736inline double
737norm(int __dim,
738     const double *__a) { // TODO: placeholder until OCML adds support.
739  double __r = 0;
740  while (__dim--) {
741    __r += __a[0] * __a[0];
742    ++__a;
743  }
744
745  return __ocml_sqrt_f64(__r);
746}
747__DEVICE__
748inline double norm3d(double __x, double __y, double __z) {
749  return __ocml_len3_f64(__x, __y, __z);
750}
751__DEVICE__
752inline double norm4d(double __x, double __y, double __z, double __w) {
753  return __ocml_len4_f64(__x, __y, __z, __w);
754}
755__DEVICE__
756inline double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
757__DEVICE__
758inline double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
759__DEVICE__
760inline double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
761__DEVICE__
762inline double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
763__DEVICE__
764inline double remainder(double __x, double __y) {
765  return __ocml_remainder_f64(__x, __y);
766}
767__DEVICE__
768inline double remquo(double __x, double __y, int *__quo) {
769  int __tmp;
770  double __r = __ocml_remquo_f64(
771      __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
772  *__quo = __tmp;
773
774  return __r;
775}
776__DEVICE__
777inline double rhypot(double __x, double __y) {
778  return __ocml_rhypot_f64(__x, __y);
779}
780__DEVICE__
781inline double rint(double __x) { return __ocml_rint_f64(__x); }
782__DEVICE__
783inline double
784rnorm(int __dim,
785      const double *__a) { // TODO: placeholder until OCML adds support.
786  double __r = 0;
787  while (__dim--) {
788    __r += __a[0] * __a[0];
789    ++__a;
790  }
791
792  return __ocml_rsqrt_f64(__r);
793}
794__DEVICE__
795inline double rnorm3d(double __x, double __y, double __z) {
796  return __ocml_rlen3_f64(__x, __y, __z);
797}
798__DEVICE__
799inline double rnorm4d(double __x, double __y, double __z, double __w) {
800  return __ocml_rlen4_f64(__x, __y, __z, __w);
801}
802__DEVICE__
803inline double round(double __x) { return __ocml_round_f64(__x); }
804__DEVICE__
805inline double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
806__DEVICE__
807inline double scalbln(double __x, long int __n) {
808  return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n)
809                         : __ocml_scalb_f64(__x, __n);
810}
811__DEVICE__
812inline double scalbn(double __x, int __n) {
813  return __ocml_scalbn_f64(__x, __n);
814}
815__DEVICE__
816inline __RETURN_TYPE signbit(double __x) { return __ocml_signbit_f64(__x); }
817__DEVICE__
818inline double sin(double __x) { return __ocml_sin_f64(__x); }
819__DEVICE__
820inline void sincos(double __x, double *__sinptr, double *__cosptr) {
821  double __tmp;
822  *__sinptr = __ocml_sincos_f64(
823      __x, (__attribute__((address_space(5))) double *)&__tmp);
824  *__cosptr = __tmp;
825}
826__DEVICE__
827inline void sincospi(double __x, double *__sinptr, double *__cosptr) {
828  double __tmp;
829  *__sinptr = __ocml_sincospi_f64(
830      __x, (__attribute__((address_space(5))) double *)&__tmp);
831  *__cosptr = __tmp;
832}
833__DEVICE__
834inline double sinh(double __x) { return __ocml_sinh_f64(__x); }
835__DEVICE__
836inline double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
837__DEVICE__
838inline double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
839__DEVICE__
840inline double tan(double __x) { return __ocml_tan_f64(__x); }
841__DEVICE__
842inline double tanh(double __x) { return __ocml_tanh_f64(__x); }
843__DEVICE__
844inline double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
845__DEVICE__
846inline double trunc(double __x) { return __ocml_trunc_f64(__x); }
847__DEVICE__
848inline double y0(double __x) { return __ocml_y0_f64(__x); }
849__DEVICE__
850inline double y1(double __x) { return __ocml_y1_f64(__x); }
851__DEVICE__
852inline double yn(int __n,
853                 double __x) { // TODO: we could use Ahmes multiplication
854                               // and the Miller & Brown algorithm
855  //       for linear recurrences to get O(log n) steps, but it's unclear if
856  //       it'd be beneficial in this case. Placeholder until OCML adds
857  //       support.
858  if (__n == 0)
859    return j0f(__x);
860  if (__n == 1)
861    return j1f(__x);
862
863  double __x0 = j0f(__x);
864  double __x1 = j1f(__x);
865  for (int __i = 1; __i < __n; ++__i) {
866    double __x2 = (2 * __i) / __x * __x1 - __x0;
867    __x0 = __x1;
868    __x1 = __x2;
869  }
870
871  return __x1;
872}
873
874// BEGIN INTRINSICS
875#if defined OCML_BASIC_ROUNDED_OPERATIONS
876__DEVICE__
877inline double __dadd_rd(double __x, double __y) {
878  return __ocml_add_rtn_f64(__x, __y);
879}
880#endif
881__DEVICE__
882inline double __dadd_rn(double __x, double __y) { return __x + __y; }
883#if defined OCML_BASIC_ROUNDED_OPERATIONS
884__DEVICE__
885inline double __dadd_ru(double __x, double __y) {
886  return __ocml_add_rtp_f64(__x, __y);
887}
888__DEVICE__
889inline double __dadd_rz(double __x, double __y) {
890  return __ocml_add_rtz_f64(__x, __y);
891}
892__DEVICE__
893inline double __ddiv_rd(double __x, double __y) {
894  return __ocml_div_rtn_f64(__x, __y);
895}
896#endif
897__DEVICE__
898inline double __ddiv_rn(double __x, double __y) { return __x / __y; }
899#if defined OCML_BASIC_ROUNDED_OPERATIONS
900__DEVICE__
901inline double __ddiv_ru(double __x, double __y) {
902  return __ocml_div_rtp_f64(__x, __y);
903}
904__DEVICE__
905inline double __ddiv_rz(double __x, double __y) {
906  return __ocml_div_rtz_f64(__x, __y);
907}
908__DEVICE__
909inline double __dmul_rd(double __x, double __y) {
910  return __ocml_mul_rtn_f64(__x, __y);
911}
912#endif
913__DEVICE__
914inline double __dmul_rn(double __x, double __y) { return __x * __y; }
915#if defined OCML_BASIC_ROUNDED_OPERATIONS
916__DEVICE__
917inline double __dmul_ru(double __x, double __y) {
918  return __ocml_mul_rtp_f64(__x, __y);
919}
920__DEVICE__
921inline double __dmul_rz(double __x, double __y) {
922  return __ocml_mul_rtz_f64(__x, __y);
923}
924__DEVICE__
925inline double __drcp_rd(double __x) { return __llvm_amdgcn_rcp_f64(__x); }
926#endif
927__DEVICE__
928inline double __drcp_rn(double __x) { return __llvm_amdgcn_rcp_f64(__x); }
929#if defined OCML_BASIC_ROUNDED_OPERATIONS
930__DEVICE__
931inline double __drcp_ru(double __x) { return __llvm_amdgcn_rcp_f64(__x); }
932__DEVICE__
933inline double __drcp_rz(double __x) { return __llvm_amdgcn_rcp_f64(__x); }
934__DEVICE__
935inline double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
936#endif
937__DEVICE__
938inline double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
939#if defined OCML_BASIC_ROUNDED_OPERATIONS
940__DEVICE__
941inline double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
942__DEVICE__
943inline double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
944__DEVICE__
945inline double __dsub_rd(double __x, double __y) {
946  return __ocml_sub_rtn_f64(__x, __y);
947}
948#endif
949__DEVICE__
950inline double __dsub_rn(double __x, double __y) { return __x - __y; }
951#if defined OCML_BASIC_ROUNDED_OPERATIONS
952__DEVICE__
953inline double __dsub_ru(double __x, double __y) {
954  return __ocml_sub_rtp_f64(__x, __y);
955}
956__DEVICE__
957inline double __dsub_rz(double __x, double __y) {
958  return __ocml_sub_rtz_f64(__x, __y);
959}
960__DEVICE__
961inline double __fma_rd(double __x, double __y, double __z) {
962  return __ocml_fma_rtn_f64(__x, __y, __z);
963}
964#endif
965__DEVICE__
966inline double __fma_rn(double __x, double __y, double __z) {
967  return __ocml_fma_f64(__x, __y, __z);
968}
969#if defined OCML_BASIC_ROUNDED_OPERATIONS
970__DEVICE__
971inline double __fma_ru(double __x, double __y, double __z) {
972  return __ocml_fma_rtp_f64(__x, __y, __z);
973}
974__DEVICE__
975inline double __fma_rz(double __x, double __y, double __z) {
976  return __ocml_fma_rtz_f64(__x, __y, __z);
977}
978#endif
979// END INTRINSICS
980// END DOUBLE
981
982// BEGIN INTEGER
983__DEVICE__
984inline int abs(int __x) {
985  int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
986  return (__x ^ __sgn) - __sgn;
987}
988__DEVICE__
989inline long labs(long __x) {
990  long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
991  return (__x ^ __sgn) - __sgn;
992}
993__DEVICE__
994inline long long llabs(long long __x) {
995  long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
996  return (__x ^ __sgn) - __sgn;
997}
998
999#if defined(__cplusplus)
1000__DEVICE__
1001inline long abs(long __x) { return labs(__x); }
1002__DEVICE__
1003inline long long abs(long long __x) { return llabs(__x); }
1004#endif
1005// END INTEGER
1006
1007__DEVICE__
1008inline _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) {
1009  return __ocml_fma_f16(__x, __y, __z);
1010}
1011
1012__DEVICE__
1013inline float fma(float __x, float __y, float __z) {
1014  return fmaf(__x, __y, __z);
1015}
1016
1017#pragma push_macro("__DEF_FUN1")
1018#pragma push_macro("__DEF_FUN2")
1019#pragma push_macro("__DEF_FUNI")
1020#pragma push_macro("__DEF_FLOAT_FUN2I")
1021#pragma push_macro("__HIP_OVERLOAD1")
1022#pragma push_macro("__HIP_OVERLOAD2")
1023
1024// __hip_enable_if::type is a type function which returns __T if __B is true.
1025template <bool __B, class __T = void> struct __hip_enable_if {};
1026
1027template <class __T> struct __hip_enable_if<true, __T> { typedef __T type; };
1028
1029// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
1030// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
1031// floor(double).
1032#define __HIP_OVERLOAD1(__retty, __fn)                                         \
1033  template <typename __T>                                                      \
1034  __DEVICE__ typename __hip_enable_if<std::numeric_limits<__T>::is_integer,    \
1035                                      __retty>::type                           \
1036  __fn(__T __x) {                                                              \
1037    return ::__fn((double)__x);                                                \
1038  }
1039
1040// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
1041// or integer argument to avoid compilation error due to ambibuity. e.g.
1042// max(5.0f, 6.0) is resolved with max(double, double).
1043#define __HIP_OVERLOAD2(__retty, __fn)                                         \
1044  template <typename __T1, typename __T2>                                      \
1045  __DEVICE__                                                                   \
1046      typename __hip_enable_if<std::numeric_limits<__T1>::is_specialized &&    \
1047                                   std::numeric_limits<__T2>::is_specialized,  \
1048                               __retty>::type                                  \
1049      __fn(__T1 __x, __T2 __y) {                                               \
1050    return __fn((double)__x, (double)__y);                                     \
1051  }
1052
1053// Define cmath functions with float argument and returns float.
1054#define __DEF_FUN1(__retty, __func)                                            \
1055  __DEVICE__                                                                   \
1056  inline float __func(float __x) { return __func##f(__x); }                    \
1057  __HIP_OVERLOAD1(__retty, __func)
1058
1059// Define cmath functions with float argument and returns __retty.
1060#define __DEF_FUNI(__retty, __func)                                            \
1061  __DEVICE__                                                                   \
1062  inline __retty __func(float __x) { return __func##f(__x); }                  \
1063  __HIP_OVERLOAD1(__retty, __func)
1064
1065// define cmath functions with two float arguments.
1066#define __DEF_FUN2(__retty, __func)                                            \
1067  __DEVICE__                                                                   \
1068  inline float __func(float __x, float __y) { return __func##f(__x, __y); }    \
1069  __HIP_OVERLOAD2(__retty, __func)
1070
1071__DEF_FUN1(double, acos)
1072__DEF_FUN1(double, acosh)
1073__DEF_FUN1(double, asin)
1074__DEF_FUN1(double, asinh)
1075__DEF_FUN1(double, atan)
1076__DEF_FUN2(double, atan2);
1077__DEF_FUN1(double, atanh)
1078__DEF_FUN1(double, cbrt)
1079__DEF_FUN1(double, ceil)
1080__DEF_FUN2(double, copysign);
1081__DEF_FUN1(double, cos)
1082__DEF_FUN1(double, cosh)
1083__DEF_FUN1(double, erf)
1084__DEF_FUN1(double, erfc)
1085__DEF_FUN1(double, exp)
1086__DEF_FUN1(double, exp2)
1087__DEF_FUN1(double, expm1)
1088__DEF_FUN1(double, fabs)
1089__DEF_FUN2(double, fdim);
1090__DEF_FUN1(double, floor)
1091__DEF_FUN2(double, fmax);
1092__DEF_FUN2(double, fmin);
1093__DEF_FUN2(double, fmod);
1094//__HIP_OVERLOAD1(int, fpclassify)
1095__DEF_FUN2(double, hypot);
1096__DEF_FUNI(int, ilogb)
1097__HIP_OVERLOAD1(bool, isfinite)
1098__HIP_OVERLOAD2(bool, isgreater);
1099__HIP_OVERLOAD2(bool, isgreaterequal);
1100__HIP_OVERLOAD1(bool, isinf);
1101__HIP_OVERLOAD2(bool, isless);
1102__HIP_OVERLOAD2(bool, islessequal);
1103__HIP_OVERLOAD2(bool, islessgreater);
1104__HIP_OVERLOAD1(bool, isnan);
1105//__HIP_OVERLOAD1(bool, isnormal)
1106__HIP_OVERLOAD2(bool, isunordered);
1107__DEF_FUN1(double, lgamma)
1108__DEF_FUN1(double, log)
1109__DEF_FUN1(double, log10)
1110__DEF_FUN1(double, log1p)
1111__DEF_FUN1(double, log2)
1112__DEF_FUN1(double, logb)
1113__DEF_FUNI(long long, llrint)
1114__DEF_FUNI(long long, llround)
1115__DEF_FUNI(long, lrint)
1116__DEF_FUNI(long, lround)
1117__DEF_FUN1(double, nearbyint);
1118__DEF_FUN2(double, nextafter);
1119__DEF_FUN2(double, pow);
1120__DEF_FUN2(double, remainder);
1121__DEF_FUN1(double, rint);
1122__DEF_FUN1(double, round);
1123__HIP_OVERLOAD1(bool, signbit)
1124__DEF_FUN1(double, sin)
1125__DEF_FUN1(double, sinh)
1126__DEF_FUN1(double, sqrt)
1127__DEF_FUN1(double, tan)
1128__DEF_FUN1(double, tanh)
1129__DEF_FUN1(double, tgamma)
1130__DEF_FUN1(double, trunc);
1131
1132// define cmath functions with a float and an integer argument.
1133#define __DEF_FLOAT_FUN2I(__func)                                              \
1134  __DEVICE__                                                                   \
1135  inline float __func(float __x, int __y) { return __func##f(__x, __y); }
1136__DEF_FLOAT_FUN2I(scalbn)
1137
1138template <class T> __DEVICE__ inline T min(T __arg1, T __arg2) {
1139  return (__arg1 < __arg2) ? __arg1 : __arg2;
1140}
1141
1142template <class T> __DEVICE__ inline T max(T __arg1, T __arg2) {
1143  return (__arg1 > __arg2) ? __arg1 : __arg2;
1144}
1145
1146__DEVICE__ inline int min(int __arg1, int __arg2) {
1147  return (__arg1 < __arg2) ? __arg1 : __arg2;
1148}
1149__DEVICE__ inline int max(int __arg1, int __arg2) {
1150  return (__arg1 > __arg2) ? __arg1 : __arg2;
1151}
1152
1153__DEVICE__
1154inline float max(float __x, float __y) { return fmaxf(__x, __y); }
1155
1156__DEVICE__
1157inline double max(double __x, double __y) { return fmax(__x, __y); }
1158
1159__DEVICE__
1160inline float min(float __x, float __y) { return fminf(__x, __y); }
1161
1162__DEVICE__
1163inline double min(double __x, double __y) { return fmin(__x, __y); }
1164
1165__HIP_OVERLOAD2(double, max)
1166__HIP_OVERLOAD2(double, min)
1167
1168__host__ inline static int min(int __arg1, int __arg2) {
1169  return std::min(__arg1, __arg2);
1170}
1171
1172__host__ inline static int max(int __arg1, int __arg2) {
1173  return std::max(__arg1, __arg2);
1174}
1175
1176#pragma pop_macro("__DEF_FUN1")
1177#pragma pop_macro("__DEF_FUN2")
1178#pragma pop_macro("__DEF_FUNI")
1179#pragma pop_macro("__DEF_FLOAT_FUN2I")
1180#pragma pop_macro("__HIP_OVERLOAD1")
1181#pragma pop_macro("__HIP_OVERLOAD2")
1182#pragma pop_macro("__DEVICE__")
1183#pragma pop_macro("__RETURN_TYPE")
1184
1185#endif // __CLANG_HIP_MATH_H__
1186