1/*===---- __clang_hip_math.h - HIP math decls -------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __CLANG_HIP_MATH_H__ 11#define __CLANG_HIP_MATH_H__ 12 13#include <algorithm> 14#include <limits.h> 15#include <limits> 16#include <stdint.h> 17 18#pragma push_macro("__DEVICE__") 19#pragma push_macro("__RETURN_TYPE") 20 21// to be consistent with __clang_cuda_math_forward_declares 22#define __DEVICE__ static __device__ 23#define __RETURN_TYPE bool 24 25__DEVICE__ 26inline uint64_t __make_mantissa_base8(const char *__tagp) { 27 uint64_t __r = 0; 28 while (__tagp) { 29 char __tmp = *__tagp; 30 31 if (__tmp >= '0' && __tmp <= '7') 32 __r = (__r * 8u) + __tmp - '0'; 33 else 34 return 0; 35 36 ++__tagp; 37 } 38 39 return __r; 40} 41 42__DEVICE__ 43inline uint64_t __make_mantissa_base10(const char *__tagp) { 44 uint64_t __r = 0; 45 while (__tagp) { 46 char __tmp = *__tagp; 47 48 if (__tmp >= '0' && __tmp <= '9') 49 __r = (__r * 10u) + __tmp - '0'; 50 else 51 return 0; 52 53 ++__tagp; 54 } 55 56 return __r; 57} 58 59__DEVICE__ 60inline uint64_t __make_mantissa_base16(const char *__tagp) { 61 uint64_t __r = 0; 62 while (__tagp) { 63 char __tmp = *__tagp; 64 65 if (__tmp >= '0' && __tmp <= '9') 66 __r = (__r * 16u) + __tmp - '0'; 67 else if (__tmp >= 'a' && __tmp <= 'f') 68 __r = (__r * 16u) + __tmp - 'a' + 10; 69 else if (__tmp >= 'A' && __tmp <= 'F') 70 __r = (__r * 16u) + __tmp - 'A' + 10; 71 else 72 return 0; 73 74 ++__tagp; 75 } 76 77 return __r; 78} 79 80__DEVICE__ 81inline uint64_t __make_mantissa(const char *__tagp) { 82 if (!__tagp) 83 return 0u; 84 85 if (*__tagp == '0') { 86 ++__tagp; 87 88 if (*__tagp == 'x' || *__tagp == 'X') 89 return __make_mantissa_base16(__tagp); 90 else 91 return __make_mantissa_base8(__tagp); 92 } 93 94 return __make_mantissa_base10(__tagp); 95} 96 97// BEGIN FLOAT 98__DEVICE__ 99inline float abs(float __x) { return __ocml_fabs_f32(__x); } 100__DEVICE__ 101inline float acosf(float __x) { return __ocml_acos_f32(__x); } 102__DEVICE__ 103inline float acoshf(float __x) { return __ocml_acosh_f32(__x); } 104__DEVICE__ 105inline float asinf(float __x) { return __ocml_asin_f32(__x); } 106__DEVICE__ 107inline float asinhf(float __x) { return __ocml_asinh_f32(__x); } 108__DEVICE__ 109inline float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); } 110__DEVICE__ 111inline float atanf(float __x) { return __ocml_atan_f32(__x); } 112__DEVICE__ 113inline float atanhf(float __x) { return __ocml_atanh_f32(__x); } 114__DEVICE__ 115inline float cbrtf(float __x) { return __ocml_cbrt_f32(__x); } 116__DEVICE__ 117inline float ceilf(float __x) { return __ocml_ceil_f32(__x); } 118__DEVICE__ 119inline float copysignf(float __x, float __y) { 120 return __ocml_copysign_f32(__x, __y); 121} 122__DEVICE__ 123inline float cosf(float __x) { return __ocml_cos_f32(__x); } 124__DEVICE__ 125inline float coshf(float __x) { return __ocml_cosh_f32(__x); } 126__DEVICE__ 127inline float cospif(float __x) { return __ocml_cospi_f32(__x); } 128__DEVICE__ 129inline float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); } 130__DEVICE__ 131inline float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); } 132__DEVICE__ 133inline float erfcf(float __x) { return __ocml_erfc_f32(__x); } 134__DEVICE__ 135inline float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); } 136__DEVICE__ 137inline float erfcxf(float __x) { return __ocml_erfcx_f32(__x); } 138__DEVICE__ 139inline float erff(float __x) { return __ocml_erf_f32(__x); } 140__DEVICE__ 141inline float erfinvf(float __x) { return __ocml_erfinv_f32(__x); } 142__DEVICE__ 143inline float exp10f(float __x) { return __ocml_exp10_f32(__x); } 144__DEVICE__ 145inline float exp2f(float __x) { return __ocml_exp2_f32(__x); } 146__DEVICE__ 147inline float expf(float __x) { return __ocml_exp_f32(__x); } 148__DEVICE__ 149inline float expm1f(float __x) { return __ocml_expm1_f32(__x); } 150__DEVICE__ 151inline float fabsf(float __x) { return __ocml_fabs_f32(__x); } 152__DEVICE__ 153inline float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); } 154__DEVICE__ 155inline float fdividef(float __x, float __y) { return __x / __y; } 156__DEVICE__ 157inline float floorf(float __x) { return __ocml_floor_f32(__x); } 158__DEVICE__ 159inline float fmaf(float __x, float __y, float __z) { 160 return __ocml_fma_f32(__x, __y, __z); 161} 162__DEVICE__ 163inline float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); } 164__DEVICE__ 165inline float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); } 166__DEVICE__ 167inline float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } 168__DEVICE__ 169inline float frexpf(float __x, int *__nptr) { 170 int __tmp; 171 float __r = 172 __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp); 173 *__nptr = __tmp; 174 175 return __r; 176} 177__DEVICE__ 178inline float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); } 179__DEVICE__ 180inline int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } 181__DEVICE__ 182inline __RETURN_TYPE isfinite(float __x) { return __ocml_isfinite_f32(__x); } 183__DEVICE__ 184inline __RETURN_TYPE isinf(float __x) { return __ocml_isinf_f32(__x); } 185__DEVICE__ 186inline __RETURN_TYPE isnan(float __x) { return __ocml_isnan_f32(__x); } 187__DEVICE__ 188inline float j0f(float __x) { return __ocml_j0_f32(__x); } 189__DEVICE__ 190inline float j1f(float __x) { return __ocml_j1_f32(__x); } 191__DEVICE__ 192inline float jnf(int __n, 193 float __x) { // TODO: we could use Ahmes multiplication 194 // and the Miller & Brown algorithm 195 // for linear recurrences to get O(log n) steps, but it's unclear if 196 // it'd be beneficial in this case. 197 if (__n == 0) 198 return j0f(__x); 199 if (__n == 1) 200 return j1f(__x); 201 202 float __x0 = j0f(__x); 203 float __x1 = j1f(__x); 204 for (int __i = 1; __i < __n; ++__i) { 205 float __x2 = (2 * __i) / __x * __x1 - __x0; 206 __x0 = __x1; 207 __x1 = __x2; 208 } 209 210 return __x1; 211} 212__DEVICE__ 213inline float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); } 214__DEVICE__ 215inline float lgammaf(float __x) { return __ocml_lgamma_f32(__x); } 216__DEVICE__ 217inline long long int llrintf(float __x) { return __ocml_rint_f32(__x); } 218__DEVICE__ 219inline long long int llroundf(float __x) { return __ocml_round_f32(__x); } 220__DEVICE__ 221inline float log10f(float __x) { return __ocml_log10_f32(__x); } 222__DEVICE__ 223inline float log1pf(float __x) { return __ocml_log1p_f32(__x); } 224__DEVICE__ 225inline float log2f(float __x) { return __ocml_log2_f32(__x); } 226__DEVICE__ 227inline float logbf(float __x) { return __ocml_logb_f32(__x); } 228__DEVICE__ 229inline float logf(float __x) { return __ocml_log_f32(__x); } 230__DEVICE__ 231inline long int lrintf(float __x) { return __ocml_rint_f32(__x); } 232__DEVICE__ 233inline long int lroundf(float __x) { return __ocml_round_f32(__x); } 234__DEVICE__ 235inline float modff(float __x, float *__iptr) { 236 float __tmp; 237 float __r = 238 __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); 239 *__iptr = __tmp; 240 241 return __r; 242} 243__DEVICE__ 244inline float nanf(const char *__tagp) { 245 union { 246 float val; 247 struct ieee_float { 248 uint32_t mantissa : 22; 249 uint32_t quiet : 1; 250 uint32_t exponent : 8; 251 uint32_t sign : 1; 252 } bits; 253 254 static_assert(sizeof(float) == sizeof(ieee_float), ""); 255 } __tmp; 256 257 __tmp.bits.sign = 0u; 258 __tmp.bits.exponent = ~0u; 259 __tmp.bits.quiet = 1u; 260 __tmp.bits.mantissa = __make_mantissa(__tagp); 261 262 return __tmp.val; 263} 264__DEVICE__ 265inline float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); } 266__DEVICE__ 267inline float nextafterf(float __x, float __y) { 268 return __ocml_nextafter_f32(__x, __y); 269} 270__DEVICE__ 271inline float norm3df(float __x, float __y, float __z) { 272 return __ocml_len3_f32(__x, __y, __z); 273} 274__DEVICE__ 275inline float norm4df(float __x, float __y, float __z, float __w) { 276 return __ocml_len4_f32(__x, __y, __z, __w); 277} 278__DEVICE__ 279inline float normcdff(float __x) { return __ocml_ncdf_f32(__x); } 280__DEVICE__ 281inline float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); } 282__DEVICE__ 283inline float 284normf(int __dim, 285 const float *__a) { // TODO: placeholder until OCML adds support. 286 float __r = 0; 287 while (__dim--) { 288 __r += __a[0] * __a[0]; 289 ++__a; 290 } 291 292 return __ocml_sqrt_f32(__r); 293} 294__DEVICE__ 295inline float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } 296__DEVICE__ 297inline float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); } 298__DEVICE__ 299inline float remainderf(float __x, float __y) { 300 return __ocml_remainder_f32(__x, __y); 301} 302__DEVICE__ 303inline float remquof(float __x, float __y, int *__quo) { 304 int __tmp; 305 float __r = __ocml_remquo_f32( 306 __x, __y, (__attribute__((address_space(5))) int *)&__tmp); 307 *__quo = __tmp; 308 309 return __r; 310} 311__DEVICE__ 312inline float rhypotf(float __x, float __y) { 313 return __ocml_rhypot_f32(__x, __y); 314} 315__DEVICE__ 316inline float rintf(float __x) { return __ocml_rint_f32(__x); } 317__DEVICE__ 318inline float rnorm3df(float __x, float __y, float __z) { 319 return __ocml_rlen3_f32(__x, __y, __z); 320} 321 322__DEVICE__ 323inline float rnorm4df(float __x, float __y, float __z, float __w) { 324 return __ocml_rlen4_f32(__x, __y, __z, __w); 325} 326__DEVICE__ 327inline float 328rnormf(int __dim, 329 const float *__a) { // TODO: placeholder until OCML adds support. 330 float __r = 0; 331 while (__dim--) { 332 __r += __a[0] * __a[0]; 333 ++__a; 334 } 335 336 return __ocml_rsqrt_f32(__r); 337} 338__DEVICE__ 339inline float roundf(float __x) { return __ocml_round_f32(__x); } 340__DEVICE__ 341inline float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } 342__DEVICE__ 343inline float scalblnf(float __x, long int __n) { 344 return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n) 345 : __ocml_scalb_f32(__x, __n); 346} 347__DEVICE__ 348inline float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); } 349__DEVICE__ 350inline __RETURN_TYPE signbit(float __x) { return __ocml_signbit_f32(__x); } 351__DEVICE__ 352inline void sincosf(float __x, float *__sinptr, float *__cosptr) { 353 float __tmp; 354 355 *__sinptr = 356 __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); 357 *__cosptr = __tmp; 358} 359__DEVICE__ 360inline void sincospif(float __x, float *__sinptr, float *__cosptr) { 361 float __tmp; 362 363 *__sinptr = __ocml_sincospi_f32( 364 __x, (__attribute__((address_space(5))) float *)&__tmp); 365 *__cosptr = __tmp; 366} 367__DEVICE__ 368inline float sinf(float __x) { return __ocml_sin_f32(__x); } 369__DEVICE__ 370inline float sinhf(float __x) { return __ocml_sinh_f32(__x); } 371__DEVICE__ 372inline float sinpif(float __x) { return __ocml_sinpi_f32(__x); } 373__DEVICE__ 374inline float sqrtf(float __x) { return __ocml_sqrt_f32(__x); } 375__DEVICE__ 376inline float tanf(float __x) { return __ocml_tan_f32(__x); } 377__DEVICE__ 378inline float tanhf(float __x) { return __ocml_tanh_f32(__x); } 379__DEVICE__ 380inline float tgammaf(float __x) { return __ocml_tgamma_f32(__x); } 381__DEVICE__ 382inline float truncf(float __x) { return __ocml_trunc_f32(__x); } 383__DEVICE__ 384inline float y0f(float __x) { return __ocml_y0_f32(__x); } 385__DEVICE__ 386inline float y1f(float __x) { return __ocml_y1_f32(__x); } 387__DEVICE__ 388inline float ynf(int __n, 389 float __x) { // TODO: we could use Ahmes multiplication 390 // and the Miller & Brown algorithm 391 // for linear recurrences to get O(log n) steps, but it's unclear if 392 // it'd be beneficial in this case. Placeholder until OCML adds 393 // support. 394 if (__n == 0) 395 return y0f(__x); 396 if (__n == 1) 397 return y1f(__x); 398 399 float __x0 = y0f(__x); 400 float __x1 = y1f(__x); 401 for (int __i = 1; __i < __n; ++__i) { 402 float __x2 = (2 * __i) / __x * __x1 - __x0; 403 __x0 = __x1; 404 __x1 = __x2; 405 } 406 407 return __x1; 408} 409 410// BEGIN INTRINSICS 411__DEVICE__ 412inline float __cosf(float __x) { return __ocml_native_cos_f32(__x); } 413__DEVICE__ 414inline float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); } 415__DEVICE__ 416inline float __expf(float __x) { return __ocml_native_exp_f32(__x); } 417#if defined OCML_BASIC_ROUNDED_OPERATIONS 418__DEVICE__ 419inline float __fadd_rd(float __x, float __y) { 420 return __ocml_add_rtn_f32(__x, __y); 421} 422#endif 423__DEVICE__ 424inline float __fadd_rn(float __x, float __y) { return __x + __y; } 425#if defined OCML_BASIC_ROUNDED_OPERATIONS 426__DEVICE__ 427inline float __fadd_ru(float __x, float __y) { 428 return __ocml_add_rtp_f32(__x, __y); 429} 430__DEVICE__ 431inline float __fadd_rz(float __x, float __y) { 432 return __ocml_add_rtz_f32(__x, __y); 433} 434__DEVICE__ 435inline float __fdiv_rd(float __x, float __y) { 436 return __ocml_div_rtn_f32(__x, __y); 437} 438#endif 439__DEVICE__ 440inline float __fdiv_rn(float __x, float __y) { return __x / __y; } 441#if defined OCML_BASIC_ROUNDED_OPERATIONS 442__DEVICE__ 443inline float __fdiv_ru(float __x, float __y) { 444 return __ocml_div_rtp_f32(__x, __y); 445} 446__DEVICE__ 447inline float __fdiv_rz(float __x, float __y) { 448 return __ocml_div_rtz_f32(__x, __y); 449} 450#endif 451__DEVICE__ 452inline float __fdividef(float __x, float __y) { return __x / __y; } 453#if defined OCML_BASIC_ROUNDED_OPERATIONS 454__DEVICE__ 455inline float __fmaf_rd(float __x, float __y, float __z) { 456 return __ocml_fma_rtn_f32(__x, __y, __z); 457} 458#endif 459__DEVICE__ 460inline float __fmaf_rn(float __x, float __y, float __z) { 461 return __ocml_fma_f32(__x, __y, __z); 462} 463#if defined OCML_BASIC_ROUNDED_OPERATIONS 464__DEVICE__ 465inline float __fmaf_ru(float __x, float __y, float __z) { 466 return __ocml_fma_rtp_f32(__x, __y, __z); 467} 468__DEVICE__ 469inline float __fmaf_rz(float __x, float __y, float __z) { 470 return __ocml_fma_rtz_f32(__x, __y, __z); 471} 472__DEVICE__ 473inline float __fmul_rd(float __x, float __y) { 474 return __ocml_mul_rtn_f32(__x, __y); 475} 476#endif 477__DEVICE__ 478inline float __fmul_rn(float __x, float __y) { return __x * __y; } 479#if defined OCML_BASIC_ROUNDED_OPERATIONS 480__DEVICE__ 481inline float __fmul_ru(float __x, float __y) { 482 return __ocml_mul_rtp_f32(__x, __y); 483} 484__DEVICE__ 485inline float __fmul_rz(float __x, float __y) { 486 return __ocml_mul_rtz_f32(__x, __y); 487} 488__DEVICE__ 489inline float __frcp_rd(float __x) { return __llvm_amdgcn_rcp_f32(__x); } 490#endif 491__DEVICE__ 492inline float __frcp_rn(float __x) { return __llvm_amdgcn_rcp_f32(__x); } 493#if defined OCML_BASIC_ROUNDED_OPERATIONS 494__DEVICE__ 495inline float __frcp_ru(float __x) { return __llvm_amdgcn_rcp_f32(__x); } 496__DEVICE__ 497inline float __frcp_rz(float __x) { return __llvm_amdgcn_rcp_f32(__x); } 498#endif 499__DEVICE__ 500inline float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); } 501#if defined OCML_BASIC_ROUNDED_OPERATIONS 502__DEVICE__ 503inline float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } 504#endif 505__DEVICE__ 506inline float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } 507#if defined OCML_BASIC_ROUNDED_OPERATIONS 508__DEVICE__ 509inline float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } 510__DEVICE__ 511inline float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } 512__DEVICE__ 513inline float __fsub_rd(float __x, float __y) { 514 return __ocml_sub_rtn_f32(__x, __y); 515} 516#endif 517__DEVICE__ 518inline float __fsub_rn(float __x, float __y) { return __x - __y; } 519#if defined OCML_BASIC_ROUNDED_OPERATIONS 520__DEVICE__ 521inline float __fsub_ru(float __x, float __y) { 522 return __ocml_sub_rtp_f32(__x, __y); 523} 524__DEVICE__ 525inline float __fsub_rz(float __x, float __y) { 526 return __ocml_sub_rtz_f32(__x, __y); 527} 528#endif 529__DEVICE__ 530inline float __log10f(float __x) { return __ocml_native_log10_f32(__x); } 531__DEVICE__ 532inline float __log2f(float __x) { return __ocml_native_log2_f32(__x); } 533__DEVICE__ 534inline float __logf(float __x) { return __ocml_native_log_f32(__x); } 535__DEVICE__ 536inline float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } 537__DEVICE__ 538inline float __saturatef(float __x) { 539 return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); 540} 541__DEVICE__ 542inline void __sincosf(float __x, float *__sinptr, float *__cosptr) { 543 *__sinptr = __ocml_native_sin_f32(__x); 544 *__cosptr = __ocml_native_cos_f32(__x); 545} 546__DEVICE__ 547inline float __sinf(float __x) { return __ocml_native_sin_f32(__x); } 548__DEVICE__ 549inline float __tanf(float __x) { return __ocml_tan_f32(__x); } 550// END INTRINSICS 551// END FLOAT 552 553// BEGIN DOUBLE 554__DEVICE__ 555inline double abs(double __x) { return __ocml_fabs_f64(__x); } 556__DEVICE__ 557inline double acos(double __x) { return __ocml_acos_f64(__x); } 558__DEVICE__ 559inline double acosh(double __x) { return __ocml_acosh_f64(__x); } 560__DEVICE__ 561inline double asin(double __x) { return __ocml_asin_f64(__x); } 562__DEVICE__ 563inline double asinh(double __x) { return __ocml_asinh_f64(__x); } 564__DEVICE__ 565inline double atan(double __x) { return __ocml_atan_f64(__x); } 566__DEVICE__ 567inline double atan2(double __x, double __y) { 568 return __ocml_atan2_f64(__x, __y); 569} 570__DEVICE__ 571inline double atanh(double __x) { return __ocml_atanh_f64(__x); } 572__DEVICE__ 573inline double cbrt(double __x) { return __ocml_cbrt_f64(__x); } 574__DEVICE__ 575inline double ceil(double __x) { return __ocml_ceil_f64(__x); } 576__DEVICE__ 577inline double copysign(double __x, double __y) { 578 return __ocml_copysign_f64(__x, __y); 579} 580__DEVICE__ 581inline double cos(double __x) { return __ocml_cos_f64(__x); } 582__DEVICE__ 583inline double cosh(double __x) { return __ocml_cosh_f64(__x); } 584__DEVICE__ 585inline double cospi(double __x) { return __ocml_cospi_f64(__x); } 586__DEVICE__ 587inline double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); } 588__DEVICE__ 589inline double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); } 590__DEVICE__ 591inline double erf(double __x) { return __ocml_erf_f64(__x); } 592__DEVICE__ 593inline double erfc(double __x) { return __ocml_erfc_f64(__x); } 594__DEVICE__ 595inline double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); } 596__DEVICE__ 597inline double erfcx(double __x) { return __ocml_erfcx_f64(__x); } 598__DEVICE__ 599inline double erfinv(double __x) { return __ocml_erfinv_f64(__x); } 600__DEVICE__ 601inline double exp(double __x) { return __ocml_exp_f64(__x); } 602__DEVICE__ 603inline double exp10(double __x) { return __ocml_exp10_f64(__x); } 604__DEVICE__ 605inline double exp2(double __x) { return __ocml_exp2_f64(__x); } 606__DEVICE__ 607inline double expm1(double __x) { return __ocml_expm1_f64(__x); } 608__DEVICE__ 609inline double fabs(double __x) { return __ocml_fabs_f64(__x); } 610__DEVICE__ 611inline double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); } 612__DEVICE__ 613inline double floor(double __x) { return __ocml_floor_f64(__x); } 614__DEVICE__ 615inline double fma(double __x, double __y, double __z) { 616 return __ocml_fma_f64(__x, __y, __z); 617} 618__DEVICE__ 619inline double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); } 620__DEVICE__ 621inline double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); } 622__DEVICE__ 623inline double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } 624__DEVICE__ 625inline double frexp(double __x, int *__nptr) { 626 int __tmp; 627 double __r = 628 __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp); 629 *__nptr = __tmp; 630 631 return __r; 632} 633__DEVICE__ 634inline double hypot(double __x, double __y) { 635 return __ocml_hypot_f64(__x, __y); 636} 637__DEVICE__ 638inline int ilogb(double __x) { return __ocml_ilogb_f64(__x); } 639__DEVICE__ 640inline __RETURN_TYPE isfinite(double __x) { return __ocml_isfinite_f64(__x); } 641__DEVICE__ 642inline __RETURN_TYPE isinf(double __x) { return __ocml_isinf_f64(__x); } 643__DEVICE__ 644inline __RETURN_TYPE isnan(double __x) { return __ocml_isnan_f64(__x); } 645__DEVICE__ 646inline double j0(double __x) { return __ocml_j0_f64(__x); } 647__DEVICE__ 648inline double j1(double __x) { return __ocml_j1_f64(__x); } 649__DEVICE__ 650inline double jn(int __n, 651 double __x) { // TODO: we could use Ahmes multiplication 652 // and the Miller & Brown algorithm 653 // for linear recurrences to get O(log n) steps, but it's unclear if 654 // it'd be beneficial in this case. Placeholder until OCML adds 655 // support. 656 if (__n == 0) 657 return j0f(__x); 658 if (__n == 1) 659 return j1f(__x); 660 661 double __x0 = j0f(__x); 662 double __x1 = j1f(__x); 663 for (int __i = 1; __i < __n; ++__i) { 664 double __x2 = (2 * __i) / __x * __x1 - __x0; 665 __x0 = __x1; 666 __x1 = __x2; 667 } 668 669 return __x1; 670} 671__DEVICE__ 672inline double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); } 673__DEVICE__ 674inline double lgamma(double __x) { return __ocml_lgamma_f64(__x); } 675__DEVICE__ 676inline long long int llrint(double __x) { return __ocml_rint_f64(__x); } 677__DEVICE__ 678inline long long int llround(double __x) { return __ocml_round_f64(__x); } 679__DEVICE__ 680inline double log(double __x) { return __ocml_log_f64(__x); } 681__DEVICE__ 682inline double log10(double __x) { return __ocml_log10_f64(__x); } 683__DEVICE__ 684inline double log1p(double __x) { return __ocml_log1p_f64(__x); } 685__DEVICE__ 686inline double log2(double __x) { return __ocml_log2_f64(__x); } 687__DEVICE__ 688inline double logb(double __x) { return __ocml_logb_f64(__x); } 689__DEVICE__ 690inline long int lrint(double __x) { return __ocml_rint_f64(__x); } 691__DEVICE__ 692inline long int lround(double __x) { return __ocml_round_f64(__x); } 693__DEVICE__ 694inline double modf(double __x, double *__iptr) { 695 double __tmp; 696 double __r = 697 __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); 698 *__iptr = __tmp; 699 700 return __r; 701} 702__DEVICE__ 703inline double nan(const char *__tagp) { 704#if !_WIN32 705 union { 706 double val; 707 struct ieee_double { 708 uint64_t mantissa : 51; 709 uint32_t quiet : 1; 710 uint32_t exponent : 11; 711 uint32_t sign : 1; 712 } bits; 713 static_assert(sizeof(double) == sizeof(ieee_double), ""); 714 } __tmp; 715 716 __tmp.bits.sign = 0u; 717 __tmp.bits.exponent = ~0u; 718 __tmp.bits.quiet = 1u; 719 __tmp.bits.mantissa = __make_mantissa(__tagp); 720 721 return __tmp.val; 722#else 723 static_assert(sizeof(uint64_t) == sizeof(double)); 724 uint64_t val = __make_mantissa(__tagp); 725 val |= 0xFFF << 51; 726 return *reinterpret_cast<double *>(&val); 727#endif 728} 729__DEVICE__ 730inline double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); } 731__DEVICE__ 732inline double nextafter(double __x, double __y) { 733 return __ocml_nextafter_f64(__x, __y); 734} 735__DEVICE__ 736inline double 737norm(int __dim, 738 const double *__a) { // TODO: placeholder until OCML adds support. 739 double __r = 0; 740 while (__dim--) { 741 __r += __a[0] * __a[0]; 742 ++__a; 743 } 744 745 return __ocml_sqrt_f64(__r); 746} 747__DEVICE__ 748inline double norm3d(double __x, double __y, double __z) { 749 return __ocml_len3_f64(__x, __y, __z); 750} 751__DEVICE__ 752inline double norm4d(double __x, double __y, double __z, double __w) { 753 return __ocml_len4_f64(__x, __y, __z, __w); 754} 755__DEVICE__ 756inline double normcdf(double __x) { return __ocml_ncdf_f64(__x); } 757__DEVICE__ 758inline double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); } 759__DEVICE__ 760inline double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); } 761__DEVICE__ 762inline double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); } 763__DEVICE__ 764inline double remainder(double __x, double __y) { 765 return __ocml_remainder_f64(__x, __y); 766} 767__DEVICE__ 768inline double remquo(double __x, double __y, int *__quo) { 769 int __tmp; 770 double __r = __ocml_remquo_f64( 771 __x, __y, (__attribute__((address_space(5))) int *)&__tmp); 772 *__quo = __tmp; 773 774 return __r; 775} 776__DEVICE__ 777inline double rhypot(double __x, double __y) { 778 return __ocml_rhypot_f64(__x, __y); 779} 780__DEVICE__ 781inline double rint(double __x) { return __ocml_rint_f64(__x); } 782__DEVICE__ 783inline double 784rnorm(int __dim, 785 const double *__a) { // TODO: placeholder until OCML adds support. 786 double __r = 0; 787 while (__dim--) { 788 __r += __a[0] * __a[0]; 789 ++__a; 790 } 791 792 return __ocml_rsqrt_f64(__r); 793} 794__DEVICE__ 795inline double rnorm3d(double __x, double __y, double __z) { 796 return __ocml_rlen3_f64(__x, __y, __z); 797} 798__DEVICE__ 799inline double rnorm4d(double __x, double __y, double __z, double __w) { 800 return __ocml_rlen4_f64(__x, __y, __z, __w); 801} 802__DEVICE__ 803inline double round(double __x) { return __ocml_round_f64(__x); } 804__DEVICE__ 805inline double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } 806__DEVICE__ 807inline double scalbln(double __x, long int __n) { 808 return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n) 809 : __ocml_scalb_f64(__x, __n); 810} 811__DEVICE__ 812inline double scalbn(double __x, int __n) { 813 return __ocml_scalbn_f64(__x, __n); 814} 815__DEVICE__ 816inline __RETURN_TYPE signbit(double __x) { return __ocml_signbit_f64(__x); } 817__DEVICE__ 818inline double sin(double __x) { return __ocml_sin_f64(__x); } 819__DEVICE__ 820inline void sincos(double __x, double *__sinptr, double *__cosptr) { 821 double __tmp; 822 *__sinptr = __ocml_sincos_f64( 823 __x, (__attribute__((address_space(5))) double *)&__tmp); 824 *__cosptr = __tmp; 825} 826__DEVICE__ 827inline void sincospi(double __x, double *__sinptr, double *__cosptr) { 828 double __tmp; 829 *__sinptr = __ocml_sincospi_f64( 830 __x, (__attribute__((address_space(5))) double *)&__tmp); 831 *__cosptr = __tmp; 832} 833__DEVICE__ 834inline double sinh(double __x) { return __ocml_sinh_f64(__x); } 835__DEVICE__ 836inline double sinpi(double __x) { return __ocml_sinpi_f64(__x); } 837__DEVICE__ 838inline double sqrt(double __x) { return __ocml_sqrt_f64(__x); } 839__DEVICE__ 840inline double tan(double __x) { return __ocml_tan_f64(__x); } 841__DEVICE__ 842inline double tanh(double __x) { return __ocml_tanh_f64(__x); } 843__DEVICE__ 844inline double tgamma(double __x) { return __ocml_tgamma_f64(__x); } 845__DEVICE__ 846inline double trunc(double __x) { return __ocml_trunc_f64(__x); } 847__DEVICE__ 848inline double y0(double __x) { return __ocml_y0_f64(__x); } 849__DEVICE__ 850inline double y1(double __x) { return __ocml_y1_f64(__x); } 851__DEVICE__ 852inline double yn(int __n, 853 double __x) { // TODO: we could use Ahmes multiplication 854 // and the Miller & Brown algorithm 855 // for linear recurrences to get O(log n) steps, but it's unclear if 856 // it'd be beneficial in this case. Placeholder until OCML adds 857 // support. 858 if (__n == 0) 859 return j0f(__x); 860 if (__n == 1) 861 return j1f(__x); 862 863 double __x0 = j0f(__x); 864 double __x1 = j1f(__x); 865 for (int __i = 1; __i < __n; ++__i) { 866 double __x2 = (2 * __i) / __x * __x1 - __x0; 867 __x0 = __x1; 868 __x1 = __x2; 869 } 870 871 return __x1; 872} 873 874// BEGIN INTRINSICS 875#if defined OCML_BASIC_ROUNDED_OPERATIONS 876__DEVICE__ 877inline double __dadd_rd(double __x, double __y) { 878 return __ocml_add_rtn_f64(__x, __y); 879} 880#endif 881__DEVICE__ 882inline double __dadd_rn(double __x, double __y) { return __x + __y; } 883#if defined OCML_BASIC_ROUNDED_OPERATIONS 884__DEVICE__ 885inline double __dadd_ru(double __x, double __y) { 886 return __ocml_add_rtp_f64(__x, __y); 887} 888__DEVICE__ 889inline double __dadd_rz(double __x, double __y) { 890 return __ocml_add_rtz_f64(__x, __y); 891} 892__DEVICE__ 893inline double __ddiv_rd(double __x, double __y) { 894 return __ocml_div_rtn_f64(__x, __y); 895} 896#endif 897__DEVICE__ 898inline double __ddiv_rn(double __x, double __y) { return __x / __y; } 899#if defined OCML_BASIC_ROUNDED_OPERATIONS 900__DEVICE__ 901inline double __ddiv_ru(double __x, double __y) { 902 return __ocml_div_rtp_f64(__x, __y); 903} 904__DEVICE__ 905inline double __ddiv_rz(double __x, double __y) { 906 return __ocml_div_rtz_f64(__x, __y); 907} 908__DEVICE__ 909inline double __dmul_rd(double __x, double __y) { 910 return __ocml_mul_rtn_f64(__x, __y); 911} 912#endif 913__DEVICE__ 914inline double __dmul_rn(double __x, double __y) { return __x * __y; } 915#if defined OCML_BASIC_ROUNDED_OPERATIONS 916__DEVICE__ 917inline double __dmul_ru(double __x, double __y) { 918 return __ocml_mul_rtp_f64(__x, __y); 919} 920__DEVICE__ 921inline double __dmul_rz(double __x, double __y) { 922 return __ocml_mul_rtz_f64(__x, __y); 923} 924__DEVICE__ 925inline double __drcp_rd(double __x) { return __llvm_amdgcn_rcp_f64(__x); } 926#endif 927__DEVICE__ 928inline double __drcp_rn(double __x) { return __llvm_amdgcn_rcp_f64(__x); } 929#if defined OCML_BASIC_ROUNDED_OPERATIONS 930__DEVICE__ 931inline double __drcp_ru(double __x) { return __llvm_amdgcn_rcp_f64(__x); } 932__DEVICE__ 933inline double __drcp_rz(double __x) { return __llvm_amdgcn_rcp_f64(__x); } 934__DEVICE__ 935inline double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); } 936#endif 937__DEVICE__ 938inline double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); } 939#if defined OCML_BASIC_ROUNDED_OPERATIONS 940__DEVICE__ 941inline double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); } 942__DEVICE__ 943inline double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } 944__DEVICE__ 945inline double __dsub_rd(double __x, double __y) { 946 return __ocml_sub_rtn_f64(__x, __y); 947} 948#endif 949__DEVICE__ 950inline double __dsub_rn(double __x, double __y) { return __x - __y; } 951#if defined OCML_BASIC_ROUNDED_OPERATIONS 952__DEVICE__ 953inline double __dsub_ru(double __x, double __y) { 954 return __ocml_sub_rtp_f64(__x, __y); 955} 956__DEVICE__ 957inline double __dsub_rz(double __x, double __y) { 958 return __ocml_sub_rtz_f64(__x, __y); 959} 960__DEVICE__ 961inline double __fma_rd(double __x, double __y, double __z) { 962 return __ocml_fma_rtn_f64(__x, __y, __z); 963} 964#endif 965__DEVICE__ 966inline double __fma_rn(double __x, double __y, double __z) { 967 return __ocml_fma_f64(__x, __y, __z); 968} 969#if defined OCML_BASIC_ROUNDED_OPERATIONS 970__DEVICE__ 971inline double __fma_ru(double __x, double __y, double __z) { 972 return __ocml_fma_rtp_f64(__x, __y, __z); 973} 974__DEVICE__ 975inline double __fma_rz(double __x, double __y, double __z) { 976 return __ocml_fma_rtz_f64(__x, __y, __z); 977} 978#endif 979// END INTRINSICS 980// END DOUBLE 981 982// BEGIN INTEGER 983__DEVICE__ 984inline int abs(int __x) { 985 int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); 986 return (__x ^ __sgn) - __sgn; 987} 988__DEVICE__ 989inline long labs(long __x) { 990 long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); 991 return (__x ^ __sgn) - __sgn; 992} 993__DEVICE__ 994inline long long llabs(long long __x) { 995 long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); 996 return (__x ^ __sgn) - __sgn; 997} 998 999#if defined(__cplusplus) 1000__DEVICE__ 1001inline long abs(long __x) { return labs(__x); } 1002__DEVICE__ 1003inline long long abs(long long __x) { return llabs(__x); } 1004#endif 1005// END INTEGER 1006 1007__DEVICE__ 1008inline _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) { 1009 return __ocml_fma_f16(__x, __y, __z); 1010} 1011 1012__DEVICE__ 1013inline float fma(float __x, float __y, float __z) { 1014 return fmaf(__x, __y, __z); 1015} 1016 1017#pragma push_macro("__DEF_FUN1") 1018#pragma push_macro("__DEF_FUN2") 1019#pragma push_macro("__DEF_FUNI") 1020#pragma push_macro("__DEF_FLOAT_FUN2I") 1021#pragma push_macro("__HIP_OVERLOAD1") 1022#pragma push_macro("__HIP_OVERLOAD2") 1023 1024// __hip_enable_if::type is a type function which returns __T if __B is true. 1025template <bool __B, class __T = void> struct __hip_enable_if {}; 1026 1027template <class __T> struct __hip_enable_if<true, __T> { typedef __T type; }; 1028 1029// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to 1030// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with 1031// floor(double). 1032#define __HIP_OVERLOAD1(__retty, __fn) \ 1033 template <typename __T> \ 1034 __DEVICE__ typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \ 1035 __retty>::type \ 1036 __fn(__T __x) { \ 1037 return ::__fn((double)__x); \ 1038 } 1039 1040// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double 1041// or integer argument to avoid compilation error due to ambibuity. e.g. 1042// max(5.0f, 6.0) is resolved with max(double, double). 1043#define __HIP_OVERLOAD2(__retty, __fn) \ 1044 template <typename __T1, typename __T2> \ 1045 __DEVICE__ \ 1046 typename __hip_enable_if<std::numeric_limits<__T1>::is_specialized && \ 1047 std::numeric_limits<__T2>::is_specialized, \ 1048 __retty>::type \ 1049 __fn(__T1 __x, __T2 __y) { \ 1050 return __fn((double)__x, (double)__y); \ 1051 } 1052 1053// Define cmath functions with float argument and returns float. 1054#define __DEF_FUN1(__retty, __func) \ 1055 __DEVICE__ \ 1056 inline float __func(float __x) { return __func##f(__x); } \ 1057 __HIP_OVERLOAD1(__retty, __func) 1058 1059// Define cmath functions with float argument and returns __retty. 1060#define __DEF_FUNI(__retty, __func) \ 1061 __DEVICE__ \ 1062 inline __retty __func(float __x) { return __func##f(__x); } \ 1063 __HIP_OVERLOAD1(__retty, __func) 1064 1065// define cmath functions with two float arguments. 1066#define __DEF_FUN2(__retty, __func) \ 1067 __DEVICE__ \ 1068 inline float __func(float __x, float __y) { return __func##f(__x, __y); } \ 1069 __HIP_OVERLOAD2(__retty, __func) 1070 1071__DEF_FUN1(double, acos) 1072__DEF_FUN1(double, acosh) 1073__DEF_FUN1(double, asin) 1074__DEF_FUN1(double, asinh) 1075__DEF_FUN1(double, atan) 1076__DEF_FUN2(double, atan2); 1077__DEF_FUN1(double, atanh) 1078__DEF_FUN1(double, cbrt) 1079__DEF_FUN1(double, ceil) 1080__DEF_FUN2(double, copysign); 1081__DEF_FUN1(double, cos) 1082__DEF_FUN1(double, cosh) 1083__DEF_FUN1(double, erf) 1084__DEF_FUN1(double, erfc) 1085__DEF_FUN1(double, exp) 1086__DEF_FUN1(double, exp2) 1087__DEF_FUN1(double, expm1) 1088__DEF_FUN1(double, fabs) 1089__DEF_FUN2(double, fdim); 1090__DEF_FUN1(double, floor) 1091__DEF_FUN2(double, fmax); 1092__DEF_FUN2(double, fmin); 1093__DEF_FUN2(double, fmod); 1094//__HIP_OVERLOAD1(int, fpclassify) 1095__DEF_FUN2(double, hypot); 1096__DEF_FUNI(int, ilogb) 1097__HIP_OVERLOAD1(bool, isfinite) 1098__HIP_OVERLOAD2(bool, isgreater); 1099__HIP_OVERLOAD2(bool, isgreaterequal); 1100__HIP_OVERLOAD1(bool, isinf); 1101__HIP_OVERLOAD2(bool, isless); 1102__HIP_OVERLOAD2(bool, islessequal); 1103__HIP_OVERLOAD2(bool, islessgreater); 1104__HIP_OVERLOAD1(bool, isnan); 1105//__HIP_OVERLOAD1(bool, isnormal) 1106__HIP_OVERLOAD2(bool, isunordered); 1107__DEF_FUN1(double, lgamma) 1108__DEF_FUN1(double, log) 1109__DEF_FUN1(double, log10) 1110__DEF_FUN1(double, log1p) 1111__DEF_FUN1(double, log2) 1112__DEF_FUN1(double, logb) 1113__DEF_FUNI(long long, llrint) 1114__DEF_FUNI(long long, llround) 1115__DEF_FUNI(long, lrint) 1116__DEF_FUNI(long, lround) 1117__DEF_FUN1(double, nearbyint); 1118__DEF_FUN2(double, nextafter); 1119__DEF_FUN2(double, pow); 1120__DEF_FUN2(double, remainder); 1121__DEF_FUN1(double, rint); 1122__DEF_FUN1(double, round); 1123__HIP_OVERLOAD1(bool, signbit) 1124__DEF_FUN1(double, sin) 1125__DEF_FUN1(double, sinh) 1126__DEF_FUN1(double, sqrt) 1127__DEF_FUN1(double, tan) 1128__DEF_FUN1(double, tanh) 1129__DEF_FUN1(double, tgamma) 1130__DEF_FUN1(double, trunc); 1131 1132// define cmath functions with a float and an integer argument. 1133#define __DEF_FLOAT_FUN2I(__func) \ 1134 __DEVICE__ \ 1135 inline float __func(float __x, int __y) { return __func##f(__x, __y); } 1136__DEF_FLOAT_FUN2I(scalbn) 1137 1138template <class T> __DEVICE__ inline T min(T __arg1, T __arg2) { 1139 return (__arg1 < __arg2) ? __arg1 : __arg2; 1140} 1141 1142template <class T> __DEVICE__ inline T max(T __arg1, T __arg2) { 1143 return (__arg1 > __arg2) ? __arg1 : __arg2; 1144} 1145 1146__DEVICE__ inline int min(int __arg1, int __arg2) { 1147 return (__arg1 < __arg2) ? __arg1 : __arg2; 1148} 1149__DEVICE__ inline int max(int __arg1, int __arg2) { 1150 return (__arg1 > __arg2) ? __arg1 : __arg2; 1151} 1152 1153__DEVICE__ 1154inline float max(float __x, float __y) { return fmaxf(__x, __y); } 1155 1156__DEVICE__ 1157inline double max(double __x, double __y) { return fmax(__x, __y); } 1158 1159__DEVICE__ 1160inline float min(float __x, float __y) { return fminf(__x, __y); } 1161 1162__DEVICE__ 1163inline double min(double __x, double __y) { return fmin(__x, __y); } 1164 1165__HIP_OVERLOAD2(double, max) 1166__HIP_OVERLOAD2(double, min) 1167 1168__host__ inline static int min(int __arg1, int __arg2) { 1169 return std::min(__arg1, __arg2); 1170} 1171 1172__host__ inline static int max(int __arg1, int __arg2) { 1173 return std::max(__arg1, __arg2); 1174} 1175 1176#pragma pop_macro("__DEF_FUN1") 1177#pragma pop_macro("__DEF_FUN2") 1178#pragma pop_macro("__DEF_FUNI") 1179#pragma pop_macro("__DEF_FLOAT_FUN2I") 1180#pragma pop_macro("__HIP_OVERLOAD1") 1181#pragma pop_macro("__HIP_OVERLOAD2") 1182#pragma pop_macro("__DEVICE__") 1183#pragma pop_macro("__RETURN_TYPE") 1184 1185#endif // __CLANG_HIP_MATH_H__ 1186