1// Math overloads for simd -*- C++ -*- 2 3// Copyright (C) 2020-2022 Free Software Foundation, Inc. 4// 5// This file is part of the GNU ISO C++ Library. This library is free 6// software; you can redistribute it and/or modify it under the 7// terms of the GNU General Public License as published by the 8// Free Software Foundation; either version 3, or (at your option) 9// any later version. 10 11// This library is distributed in the hope that it will be useful, 12// but WITHOUT ANY WARRANTY; without even the implied warranty of 13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14// GNU General Public License for more details. 15 16// Under Section 7 of GPL version 3, you are granted additional 17// permissions described in the GCC Runtime Library Exception, version 18// 3.1, as published by the Free Software Foundation. 19 20// You should have received a copy of the GNU General Public License and 21// a copy of the GCC Runtime Library Exception along with this program; 22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23// <http://www.gnu.org/licenses/>. 24 25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_ 26#define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_ 27 28#if __cplusplus >= 201703L 29 30#include <utility> 31#include <iomanip> 32 33_GLIBCXX_SIMD_BEGIN_NAMESPACE 34template <typename _Tp, typename _V> 35 using _Samesize = fixed_size_simd<_Tp, _V::size()>; 36 37// _Math_return_type {{{ 38template <typename _DoubleR, typename _Tp, typename _Abi> 39 struct _Math_return_type; 40 41template <typename _DoubleR, typename _Tp, typename _Abi> 42 using _Math_return_type_t = 43 typename _Math_return_type<_DoubleR, _Tp, _Abi>::type; 44 45template <typename _Tp, typename _Abi> 46 struct _Math_return_type<double, _Tp, _Abi> 47 { using type = simd<_Tp, _Abi>; }; 48 49template <typename _Tp, typename _Abi> 50 struct _Math_return_type<bool, _Tp, _Abi> 51 { using type = simd_mask<_Tp, _Abi>; }; 52 53template <typename _DoubleR, typename _Tp, typename _Abi> 54 struct _Math_return_type 55 { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; }; 56 57//}}} 58// _GLIBCXX_SIMD_MATH_CALL_ {{{ 59#define _GLIBCXX_SIMD_MATH_CALL_(__name) \ 60template <typename _Tp, typename _Abi, typename..., \ 61 typename _R = _Math_return_type_t< \ 62 decltype(std::__name(declval<double>())), _Tp, _Abi>> \ 63 _GLIBCXX_SIMD_ALWAYS_INLINE \ 64 enable_if_t<is_floating_point_v<_Tp>, _R> \ 65 __name(simd<_Tp, _Abi> __x) \ 66 { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; } 67 68// }}} 69//_Extra_argument_type{{{ 70template <typename _Up, typename _Tp, typename _Abi> 71 struct _Extra_argument_type; 72 73template <typename _Tp, typename _Abi> 74 struct _Extra_argument_type<_Tp*, _Tp, _Abi> 75 { 76 using type = simd<_Tp, _Abi>*; 77 static constexpr double* declval(); 78 static constexpr bool __needs_temporary_scalar = true; 79 80 _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x) 81 { return &__data(*__x); } 82 }; 83 84template <typename _Up, typename _Tp, typename _Abi> 85 struct _Extra_argument_type<_Up*, _Tp, _Abi> 86 { 87 static_assert(is_integral_v<_Up>); 88 using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*; 89 static constexpr _Up* declval(); 90 static constexpr bool __needs_temporary_scalar = true; 91 92 _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x) 93 { return &__data(*__x); } 94 }; 95 96template <typename _Tp, typename _Abi> 97 struct _Extra_argument_type<_Tp, _Tp, _Abi> 98 { 99 using type = simd<_Tp, _Abi>; 100 static constexpr double declval(); 101 static constexpr bool __needs_temporary_scalar = false; 102 103 _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto) 104 _S_data(const type& __x) 105 { return __data(__x); } 106 }; 107 108template <typename _Up, typename _Tp, typename _Abi> 109 struct _Extra_argument_type 110 { 111 static_assert(is_integral_v<_Up>); 112 using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>; 113 static constexpr _Up declval(); 114 static constexpr bool __needs_temporary_scalar = false; 115 116 _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto) 117 _S_data(const type& __x) 118 { return __data(__x); } 119 }; 120 121//}}} 122// _GLIBCXX_SIMD_MATH_CALL2_ {{{ 123#define _GLIBCXX_SIMD_MATH_CALL2_(__name, __arg2) \ 124template < \ 125 typename _Tp, typename _Abi, typename..., \ 126 typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \ 127 typename _R = _Math_return_type_t< \ 128 decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \ 129 _GLIBCXX_SIMD_ALWAYS_INLINE \ 130 enable_if_t<is_floating_point_v<_Tp>, _R> \ 131 __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \ 132 { \ 133 return {__private_init, \ 134 _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \ 135 } \ 136template <typename _Up, typename _Tp, typename _Abi> \ 137 _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \ 138 decltype(std::__name( \ 139 declval<double>(), \ 140 declval<enable_if_t< \ 141 conjunction_v< \ 142 is_same<__arg2, _Tp>, \ 143 negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \ 144 is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \ 145 double>>())), \ 146 _Tp, _Abi> \ 147 __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \ 148 { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); } 149 150// }}} 151// _GLIBCXX_SIMD_MATH_CALL3_ {{{ 152#define _GLIBCXX_SIMD_MATH_CALL3_(__name, __arg2, __arg3) \ 153template <typename _Tp, typename _Abi, typename..., \ 154 typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \ 155 typename _Arg3 = _Extra_argument_type<__arg3, _Tp, _Abi>, \ 156 typename _R = _Math_return_type_t< \ 157 decltype(std::__name(declval<double>(), _Arg2::declval(), \ 158 _Arg3::declval())), \ 159 _Tp, _Abi>> \ 160 _GLIBCXX_SIMD_ALWAYS_INLINE \ 161 enable_if_t<is_floating_point_v<_Tp>, _R> \ 162 __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \ 163 const typename _Arg3::type& __z) \ 164 { \ 165 return {__private_init, \ 166 _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \ 167 _Arg3::_S_data(__z))}; \ 168 } \ 169template < \ 170 typename _T0, typename _T1, typename _T2, typename..., \ 171 typename _U0 = __remove_cvref_t<_T0>, \ 172 typename _U1 = __remove_cvref_t<_T1>, \ 173 typename _U2 = __remove_cvref_t<_T2>, \ 174 typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \ 175 typename = enable_if_t<conjunction_v< \ 176 is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \ 177 is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \ 178 negation<conjunction< \ 179 is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \ 180 _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \ 181 declval<const _Simd&>(), \ 182 declval<const _Simd&>())) \ 183 __name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \ 184 { \ 185 return __name(_Simd(static_cast<_T0&&>(__xx)), \ 186 _Simd(static_cast<_T1&&>(__yy)), \ 187 _Simd(static_cast<_T2&&>(__zz))); \ 188 } 189 190// }}} 191// __cosSeries {{{ 192template <typename _Abi> 193 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi> 194 __cosSeries(const simd<float, _Abi>& __x) 195 { 196 const simd<float, _Abi> __x2 = __x * __x; 197 simd<float, _Abi> __y; 198 __y = 0x1.ap-16f; // 1/8! 199 __y = __y * __x2 - 0x1.6c1p-10f; // -1/6! 200 __y = __y * __x2 + 0x1.555556p-5f; // 1/4! 201 return __y * (__x2 * __x2) - .5f * __x2 + 1.f; 202 } 203 204template <typename _Abi> 205 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi> 206 __cosSeries(const simd<double, _Abi>& __x) 207 { 208 const simd<double, _Abi> __x2 = __x * __x; 209 simd<double, _Abi> __y; 210 __y = 0x1.AC00000000000p-45; // 1/16! 211 __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14! 212 __y = __y * __x2 + 0x1.1EED8C0000000p-29; // 1/12! 213 __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10! 214 __y = __y * __x2 + 0x1.A01A01A018000p-16; // 1/8! 215 __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6! 216 __y = __y * __x2 + 0x1.5555555555554p-5; // 1/4! 217 return (__y * __x2 - .5f) * __x2 + 1.f; 218 } 219 220// }}} 221// __sinSeries {{{ 222template <typename _Abi> 223 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi> 224 __sinSeries(const simd<float, _Abi>& __x) 225 { 226 const simd<float, _Abi> __x2 = __x * __x; 227 simd<float, _Abi> __y; 228 __y = -0x1.9CC000p-13f; // -1/7! 229 __y = __y * __x2 + 0x1.111100p-7f; // 1/5! 230 __y = __y * __x2 - 0x1.555556p-3f; // -1/3! 231 return __y * (__x2 * __x) + __x; 232 } 233 234template <typename _Abi> 235 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi> 236 __sinSeries(const simd<double, _Abi>& __x) 237 { 238 // __x = [0, 0.7854 = pi/4] 239 // __x�� = [0, 0.6169 = pi��/8] 240 const simd<double, _Abi> __x2 = __x * __x; 241 simd<double, _Abi> __y; 242 __y = -0x1.ACF0000000000p-41; // -1/15! 243 __y = __y * __x2 + 0x1.6124400000000p-33; // 1/13! 244 __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11! 245 __y = __y * __x2 + 0x1.71DE3A5540000p-19; // 1/9! 246 __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7! 247 __y = __y * __x2 + 0x1.1111111111110p-7; // 1/5! 248 __y = __y * __x2 - 0x1.5555555555555p-3; // -1/3! 249 return __y * (__x2 * __x) + __x; 250 } 251 252// }}} 253// __zero_low_bits {{{ 254template <int _Bits, typename _Tp, typename _Abi> 255 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> 256 __zero_low_bits(simd<_Tp, _Abi> __x) 257 { 258 const simd<_Tp, _Abi> __bitmask 259 = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits); 260 return {__private_init, 261 _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))}; 262 } 263 264// }}} 265// __fold_input {{{ 266 267/**@internal 268 * Fold @p x into [-����, ����] and remember the quadrant it came from: 269 * quadrant 0: [-����, ����] 270 * quadrant 1: [ ����, ����] 271 * quadrant 2: [ ����, 1����] 272 * quadrant 3: [1����, 1����] 273 * 274 * The algorithm determines `y` as the multiple `x - y * ���� = [-����, ����]`. Using 275 * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as 276 * ``` 277 * y = trunc(x / ����); 278 * y += fmod(y, 2); 279 * ``` 280 * This can be simplified by moving the (implicit) division by 2 into the 281 * truncation expression. The `+= fmod` effect can the be achieved by using 282 * rounding instead of truncation: `y = round(x / ����) * 2`. If precision allows, 283 * `2/�� * x` is better (faster). 284 */ 285template <typename _Tp, typename _Abi> 286 struct _Folded 287 { 288 simd<_Tp, _Abi> _M_x; 289 rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant; 290 }; 291 292namespace __math_float { 293inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // ��/4 294inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/�� 295inline constexpr float __pi_2_5bits0 296 = 0x1.921fc0p0f; // ��/2, 5 0-bits (least significant) 297inline constexpr float __pi_2_5bits0_rem 298 = -0x1.5777a6p-21f; // ��/2 - __pi_2_5bits0 299} // namespace __math_float 300namespace __math_double { 301inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // ��/4 302inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/�� 303inline constexpr double __pi_2 = 0x1.921fb54442d18p0; // ��/2 304} // namespace __math_double 305 306template <typename _Abi> 307 _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi> 308 __fold_input(const simd<float, _Abi>& __x) 309 { 310 using _V = simd<float, _Abi>; 311 using _IV = rebind_simd_t<int, _V>; 312 using namespace __math_float; 313 _Folded<float, _Abi> __r; 314 __r._M_x = abs(__x); 315#if 0 316 // zero most mantissa bits: 317 constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/�� 318 const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f; 319 // split �� into 4 parts, the first three with 13 trailing zeros (to make the 320 // following multiplications precise): 321 constexpr float __pi0 = 0x1.920000p1f; 322 constexpr float __pi1 = 0x1.fb4000p-11f; 323 constexpr float __pi2 = 0x1.444000p-23f; 324 constexpr float __pi3 = 0x1.68c234p-38f; 325 __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3 326#else 327 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4))) 328 __r._M_quadrant = 0; 329 else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4))) 330 { 331 const _V __y = nearbyint(__r._M_x * __2_over_pi); 332 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4 333 __r._M_x -= __y * __pi_2_5bits0; 334 __r._M_x -= __y * __pi_2_5bits0_rem; 335 } 336 else 337 { 338 using __math_double::__2_over_pi; 339 using __math_double::__pi_2; 340 using _VD = rebind_simd_t<double, _V>; 341 _VD __xd = static_simd_cast<_VD>(__r._M_x); 342 _VD __y = nearbyint(__xd * __2_over_pi); 343 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4 344 __r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2); 345 } 346#endif 347 return __r; 348 } 349 350template <typename _Abi> 351 _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi> 352 __fold_input(const simd<double, _Abi>& __x) 353 { 354 using _V = simd<double, _Abi>; 355 using _IV = rebind_simd_t<int, _V>; 356 using namespace __math_double; 357 358 _Folded<double, _Abi> __r; 359 __r._M_x = abs(__x); 360 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4))) 361 { 362 __r._M_quadrant = 0; 363 return __r; 364 } 365 const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4)); 366 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; 367 368 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4))) 369 { 370 // x - y * pi/2, y uses no more than 11 mantissa bits 371 __r._M_x -= __y * 0x1.921FB54443000p0; 372 __r._M_x -= __y * -0x1.73DCB3B39A000p-43; 373 __r._M_x -= __y * 0x1.45C06E0E68948p-86; 374 } 375 else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30))) 376 { 377 // x - y * pi/2, y uses no more than 29 mantissa bits 378 __r._M_x -= __y * 0x1.921FB40000000p0; 379 __r._M_x -= __y * 0x1.4442D00000000p-24; 380 __r._M_x -= __y * 0x1.8469898CC5170p-48; 381 } 382 else 383 { 384 // x - y * pi/2, y may require all mantissa bits 385 const _V __y_hi = __zero_low_bits<26>(__y); 386 const _V __y_lo = __y - __y_hi; 387 const auto __pi_2_1 = 0x1.921FB50000000p0; 388 const auto __pi_2_2 = 0x1.110B460000000p-26; 389 const auto __pi_2_3 = 0x1.1A62630000000p-54; 390 const auto __pi_2_4 = 0x1.8A2E03707344Ap-81; 391 __r._M_x = __r._M_x - __y_hi * __pi_2_1 392 - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1) 393 - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1) 394 - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2) 395 - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2) 396 - max(__y * __pi_2_4, __y_lo * __pi_2_3) 397 - min(__y * __pi_2_4, __y_lo * __pi_2_3); 398 } 399 return __r; 400 } 401 402// }}} 403// __extract_exponent_as_int {{{ 404template <typename _Tp, typename _Abi> 405 _GLIBCXX_SIMD_INTRINSIC 406 rebind_simd_t<int, simd<_Tp, _Abi>> 407 __extract_exponent_as_int(const simd<_Tp, _Abi>& __v) 408 { 409 using _Vp = simd<_Tp, _Abi>; 410 using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>; 411 using namespace std::experimental::__float_bitwise_operators; 412 using namespace std::experimental::__proposed; 413 const _Vp __exponent_mask 414 = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000 415 return static_simd_cast<rebind_simd_t<int, _Vp>>( 416 simd_bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask) 417 >> (__digits_v<_Tp> - 1)); 418 } 419 420// }}} 421// __impl_or_fallback {{{ 422template <typename ImplFun, typename FallbackFun, typename... _Args> 423 _GLIBCXX_SIMD_INTRINSIC auto 424 __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&, 425 _Args&&... __args) 426 -> decltype(__impl_fun(static_cast<_Args&&>(__args)...)) 427 { return __impl_fun(static_cast<_Args&&>(__args)...); } 428 429template <typename ImplFun, typename FallbackFun, typename... _Args, 430 typename = __detail::__odr_helper> 431 inline auto 432 __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun, 433 _Args&&... __args) 434 -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...)) 435 { return __fallback_fun(static_cast<_Args&&>(__args)...); } 436 437template <typename... _Args> 438 _GLIBCXX_SIMD_INTRINSIC auto 439 __impl_or_fallback(_Args&&... __args) 440 { 441 return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...); 442 } 443//}}} 444 445// trigonometric functions {{{ 446_GLIBCXX_SIMD_MATH_CALL_(acos) 447_GLIBCXX_SIMD_MATH_CALL_(asin) 448_GLIBCXX_SIMD_MATH_CALL_(atan) 449_GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp) 450 451/* 452 * algorithm for sine and cosine: 453 * 454 * The result can be calculated with sine or cosine depending on the ��/4 section 455 * the input is in. sine ��� __x + __x�� cosine ��� 1 - __x�� 456 * 457 * sine: 458 * Map -__x to __x and invert the output 459 * Extend precision of __x - n * ��/4 by calculating 460 * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = ��/4) 461 * 462 * Calculate Taylor series with tuned coefficients. 463 * Fix sign. 464 */ 465// cos{{{ 466template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 467 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 468 cos(const simd<_Tp, _Abi>& __x) 469 { 470 using _V = simd<_Tp, _Abi>; 471 if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>) 472 return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))}; 473 else 474 { 475 if constexpr (is_same_v<_Tp, float>) 476 if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382))) 477 return static_simd_cast<_V>( 478 cos(static_simd_cast<rebind_simd_t<double, _V>>(__x))); 479 480 const auto __f = __fold_input(__x); 481 // quadrant | effect 482 // 0 | cosSeries, + 483 // 1 | sinSeries, - 484 // 2 | cosSeries, - 485 // 3 | sinSeries, + 486 using namespace std::experimental::__float_bitwise_operators; 487 const _V __sign_flip 488 = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30); 489 490 const auto __need_cos = (__f._M_quadrant & 1) == 0; 491 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos))) 492 return __sign_flip ^ __cosSeries(__f._M_x); 493 else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos))) 494 return __sign_flip ^ __sinSeries(__f._M_x); 495 else // some_of(__need_cos) 496 { 497 _V __r = __sinSeries(__f._M_x); 498 where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x); 499 return __r ^ __sign_flip; 500 } 501 } 502 } 503 504template <typename _Tp> 505 _GLIBCXX_SIMD_ALWAYS_INLINE 506 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>> 507 cos(simd<_Tp, simd_abi::scalar> __x) 508 { return std::cos(__data(__x)); } 509 510//}}} 511// sin{{{ 512template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 513 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 514 sin(const simd<_Tp, _Abi>& __x) 515 { 516 using _V = simd<_Tp, _Abi>; 517 if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>) 518 return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))}; 519 else 520 { 521 if constexpr (is_same_v<_Tp, float>) 522 if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449))) 523 return static_simd_cast<_V>( 524 sin(static_simd_cast<rebind_simd_t<double, _V>>(__x))); 525 526 const auto __f = __fold_input(__x); 527 // quadrant | effect 528 // 0 | sinSeries 529 // 1 | cosSeries 530 // 2 | sinSeries, sign flip 531 // 3 | cosSeries, sign flip 532 using namespace std::experimental::__float_bitwise_operators; 533 const auto __sign_flip 534 = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.)); 535 536 const auto __need_sin = (__f._M_quadrant & 1) == 0; 537 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin))) 538 return __sign_flip ^ __sinSeries(__f._M_x); 539 else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin))) 540 return __sign_flip ^ __cosSeries(__f._M_x); 541 else // some_of(__need_sin) 542 { 543 _V __r = __cosSeries(__f._M_x); 544 where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x); 545 return __sign_flip ^ __r; 546 } 547 } 548 } 549 550template <typename _Tp> 551 _GLIBCXX_SIMD_ALWAYS_INLINE 552 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>> 553 sin(simd<_Tp, simd_abi::scalar> __x) 554 { return std::sin(__data(__x)); } 555 556//}}} 557_GLIBCXX_SIMD_MATH_CALL_(tan) 558_GLIBCXX_SIMD_MATH_CALL_(acosh) 559_GLIBCXX_SIMD_MATH_CALL_(asinh) 560_GLIBCXX_SIMD_MATH_CALL_(atanh) 561_GLIBCXX_SIMD_MATH_CALL_(cosh) 562_GLIBCXX_SIMD_MATH_CALL_(sinh) 563_GLIBCXX_SIMD_MATH_CALL_(tanh) 564// }}} 565// exponential functions {{{ 566_GLIBCXX_SIMD_MATH_CALL_(exp) 567_GLIBCXX_SIMD_MATH_CALL_(exp2) 568_GLIBCXX_SIMD_MATH_CALL_(expm1) 569 570// }}} 571// frexp {{{ 572#if _GLIBCXX_SIMD_X86INTRIN 573template <typename _Tp, size_t _Np> 574 _GLIBCXX_SIMD_INTRINSIC 575 _SimdWrapper<_Tp, _Np> 576 __getexp(_SimdWrapper<_Tp, _Np> __x) 577 { 578 if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>()) 579 return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x))); 580 else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>()) 581 return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x)))); 582 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>()) 583 return _mm_getexp_pd(__x); 584 else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>()) 585 return __lo128(_mm512_getexp_pd(__auto_bitcast(__x))); 586 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>()) 587 return _mm256_getexp_ps(__x); 588 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>()) 589 return __lo256(_mm512_getexp_ps(__auto_bitcast(__x))); 590 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>()) 591 return _mm256_getexp_pd(__x); 592 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>()) 593 return __lo256(_mm512_getexp_pd(__auto_bitcast(__x))); 594 else if constexpr (__is_avx512_ps<_Tp, _Np>()) 595 return _mm512_getexp_ps(__x); 596 else if constexpr (__is_avx512_pd<_Tp, _Np>()) 597 return _mm512_getexp_pd(__x); 598 else 599 __assert_unreachable<_Tp>(); 600 } 601 602template <typename _Tp, size_t _Np> 603 _GLIBCXX_SIMD_INTRINSIC 604 _SimdWrapper<_Tp, _Np> 605 __getmant_avx512(_SimdWrapper<_Tp, _Np> __x) 606 { 607 if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>()) 608 return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1, 609 _MM_MANT_SIGN_src)); 610 else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>()) 611 return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)), 612 _MM_MANT_NORM_p5_1, 613 _MM_MANT_SIGN_src)); 614 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>()) 615 return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); 616 else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>()) 617 return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1, 618 _MM_MANT_SIGN_src)); 619 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>()) 620 return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); 621 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>()) 622 return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1, 623 _MM_MANT_SIGN_src)); 624 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>()) 625 return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); 626 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>()) 627 return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1, 628 _MM_MANT_SIGN_src)); 629 else if constexpr (__is_avx512_ps<_Tp, _Np>()) 630 return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); 631 else if constexpr (__is_avx512_pd<_Tp, _Np>()) 632 return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); 633 else 634 __assert_unreachable<_Tp>(); 635 } 636#endif // _GLIBCXX_SIMD_X86INTRIN 637 638/** 639 * splits @p __v into exponent and mantissa, the sign is kept with the mantissa 640 * 641 * The return value will be in the range [0.5, 1.0[ 642 * The @p __e value will be an integer defining the power-of-two exponent 643 */ 644template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 645 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 646 frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp) 647 { 648 if constexpr (simd_size_v<_Tp, _Abi> == 1) 649 { 650 int __tmp; 651 const auto __r = std::frexp(__x[0], &__tmp); 652 (*__exp)[0] = __tmp; 653 return __r; 654 } 655 else if constexpr (__is_fixed_size_abi_v<_Abi>) 656 return {__private_init, _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))}; 657#if _GLIBCXX_SIMD_X86INTRIN 658 else if constexpr (__have_avx512f) 659 { 660 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 661 constexpr size_t _NI = _Np < 4 ? 4 : _Np; 662 const auto __v = __data(__x); 663 const auto __isnonzero 664 = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data); 665 const _SimdWrapper<int, _NI> __exp_plus1 666 = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data; 667 const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>( 668 _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero), 669 _SimdWrapper<int, _NI>(), __exp_plus1)); 670 simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp); 671 return {__private_init, 672 _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>( 673 __isnonzero), 674 __v, __getmant_avx512(__v))}; 675 } 676#endif // _GLIBCXX_SIMD_X86INTRIN 677 else 678 { 679 // fallback implementation 680 static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8); 681 using _V = simd<_Tp, _Abi>; 682 using _IV = rebind_simd_t<int, _V>; 683 using namespace std::experimental::__proposed; 684 using namespace std::experimental::__float_bitwise_operators; 685 686 constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe; 687 constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200; 688 constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512; 689 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask 690 = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000 691 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent 692 = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff 693 694 _V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1) 695 const _IV __exponent_bits = __extract_exponent_as_int(__x); 696 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x)))) 697 { 698 *__exp 699 = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust); 700 return __mant; 701 } 702 703#if __FINITE_MATH_ONLY__ 704 // at least one element of __x is 0 or subnormal, the rest is normal 705 // (inf and NaN are excluded by -ffinite-math-only) 706 const auto __iszero_inf_nan = __x == 0; 707#else 708 using _Ip = __int_for_sizeof_t<_Tp>; 709 const auto __as_int = simd_bit_cast<rebind_simd_t<_Ip, _V>>(abs(__x)); 710 const auto __inf = simd_bit_cast<rebind_simd_t<_Ip, _V>>(_V(__infinity_v<_Tp>)); 711 const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>( 712 __as_int == 0 || __as_int >= __inf); 713#endif 714 715 const _V __scaled_subnormal = __x * __subnorm_scale; 716 const _V __mant_subnormal 717 = __p5_1_exponent & (__exponent_mask | __scaled_subnormal); 718 where(!isnormal(__x), __mant) = __mant_subnormal; 719 where(__iszero_inf_nan, __mant) = __x; 720 _IV __e = __extract_exponent_as_int(__scaled_subnormal); 721 using _MaskType = 722 typename conditional_t<sizeof(typename _V::value_type) == sizeof(int), 723 _V, _IV>::mask_type; 724 const _MaskType __value_isnormal = isnormal(__x).__cvt(); 725 where(__value_isnormal.__cvt(), __e) = __exponent_bits; 726 static_assert(sizeof(_IV) == sizeof(__value_isnormal)); 727 const _IV __offset 728 = (simd_bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust)) 729 | (simd_bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0) 730 & static_simd_cast<_MaskType>(__x != 0)) 731 & _IV(__exp_adjust + __exp_offset)); 732 *__exp = simd_cast<_Samesize<int, _V>>(__e - __offset); 733 return __mant; 734 } 735 } 736 737// }}} 738_GLIBCXX_SIMD_MATH_CALL2_(ldexp, int) 739_GLIBCXX_SIMD_MATH_CALL_(ilogb) 740 741// logarithms {{{ 742_GLIBCXX_SIMD_MATH_CALL_(log) 743_GLIBCXX_SIMD_MATH_CALL_(log10) 744_GLIBCXX_SIMD_MATH_CALL_(log1p) 745_GLIBCXX_SIMD_MATH_CALL_(log2) 746 747//}}} 748// logb{{{ 749template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 750 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>> 751 logb(const simd<_Tp, _Abi>& __x) 752 { 753 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 754 if constexpr (_Np == 1) 755 return std::logb(__x[0]); 756 else if constexpr (__is_fixed_size_abi_v<_Abi>) 757 return {__private_init, _Abi::_SimdImpl::_S_logb(__data(__x))}; 758#if _GLIBCXX_SIMD_X86INTRIN // {{{ 759 else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>()) 760 return {__private_init, 761 __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))}; 762 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>()) 763 return {__private_init, _mm_getexp_pd(__data(__x))}; 764 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>()) 765 return {__private_init, _mm256_getexp_ps(__data(__x))}; 766 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>()) 767 return {__private_init, _mm256_getexp_pd(__data(__x))}; 768 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>()) 769 return {__private_init, 770 __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))}; 771 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>()) 772 return {__private_init, 773 __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))}; 774 else if constexpr (__is_avx512_ps<_Tp, _Np>()) 775 return {__private_init, _mm512_getexp_ps(__data(__x))}; 776 else if constexpr (__is_avx512_pd<_Tp, _Np>()) 777 return {__private_init, _mm512_getexp_pd(__data(__x))}; 778#endif // _GLIBCXX_SIMD_X86INTRIN }}} 779 else 780 { 781 using _V = simd<_Tp, _Abi>; 782 using namespace std::experimental::__proposed; 783 auto __is_normal = isnormal(__x); 784 785 // work on abs(__x) to reflect the return value on Linux for negative 786 // inputs (domain-error => implementation-defined value is returned) 787 const _V abs_x = abs(__x); 788 789 // __exponent(__x) returns the exponent value (bias removed) as 790 // simd<_Up> with integral _Up 791 auto&& __exponent = [](const _V& __v) { 792 using namespace std::experimental::__proposed; 793 using _IV = rebind_simd_t< 794 conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>; 795 return (simd_bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1)) 796 - (__max_exponent_v<_Tp> - 1); 797 }; 798 _V __r = static_simd_cast<_V>(__exponent(abs_x)); 799 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal))) 800 // without corner cases (nan, inf, subnormal, zero) we have our 801 // answer: 802 return __r; 803 const auto __is_zero = __x == 0; 804 const auto __is_nan = isnan(__x); 805 const auto __is_inf = isinf(__x); 806 where(__is_zero, __r) = -__infinity_v<_Tp>; 807 where(__is_nan, __r) = __x; 808 where(__is_inf, __r) = __infinity_v<_Tp>; 809 __is_normal |= __is_zero || __is_nan || __is_inf; 810 if (all_of(__is_normal)) 811 // at this point everything but subnormals is handled 812 return __r; 813 // subnormals repeat the exponent extraction after multiplication of the 814 // input with __a floating point value that has 112 (0x70) in its exponent 815 // (not too big for sp and large enough for dp) 816 const _V __scaled = abs_x * _Tp(0x1p112); 817 _V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112); 818 where(__is_normal, __scaled_exp) = __r; 819 return __scaled_exp; 820 } 821 } 822 823//}}} 824template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 825 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 826 modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr) 827 { 828 if constexpr (simd_size_v<_Tp, _Abi> == 1) 829 { 830 _Tp __tmp; 831 _Tp __r = std::modf(__x[0], &__tmp); 832 __iptr[0] = __tmp; 833 return __r; 834 } 835 else 836 { 837 const auto __integral = trunc(__x); 838 *__iptr = __integral; 839 auto __r = __x - __integral; 840#if !__FINITE_MATH_ONLY__ 841 where(isinf(__x), __r) = _Tp(); 842#endif 843 return copysign(__r, __x); 844 } 845 } 846 847_GLIBCXX_SIMD_MATH_CALL2_(scalbn, int) 848_GLIBCXX_SIMD_MATH_CALL2_(scalbln, long) 849 850_GLIBCXX_SIMD_MATH_CALL_(cbrt) 851 852_GLIBCXX_SIMD_MATH_CALL_(abs) 853_GLIBCXX_SIMD_MATH_CALL_(fabs) 854 855// [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to 856// allow signed integral _Tp 857template <typename _Tp, typename _Abi> 858 _GLIBCXX_SIMD_ALWAYS_INLINE 859 enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>> 860 abs(const simd<_Tp, _Abi>& __x) 861 { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; } 862 863#define _GLIBCXX_SIMD_CVTING2(_NAME) \ 864template <typename _Tp, typename _Abi> \ 865 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 866 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \ 867 { \ 868 return _NAME(__x, __y); \ 869 } \ 870 \ 871template <typename _Tp, typename _Abi> \ 872 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 873 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \ 874 { \ 875 return _NAME(__x, __y); \ 876 } 877 878#define _GLIBCXX_SIMD_CVTING3(_NAME) \ 879template <typename _Tp, typename _Abi> \ 880 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 881 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \ 882 const simd<_Tp, _Abi>& __z) \ 883 { \ 884 return _NAME(__x, __y, __z); \ 885 } \ 886 \ 887template <typename _Tp, typename _Abi> \ 888 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 889 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \ 890 const simd<_Tp, _Abi>& __z) \ 891 { \ 892 return _NAME(__x, __y, __z); \ 893 } \ 894 \ 895template <typename _Tp, typename _Abi> \ 896 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 897 const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \ 898 const __type_identity_t<simd<_Tp, _Abi>>& __z) \ 899 { \ 900 return _NAME(__x, __y, __z); \ 901 } \ 902 \ 903template <typename _Tp, typename _Abi> \ 904 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 905 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \ 906 const __type_identity_t<simd<_Tp, _Abi>>& __z) \ 907 { \ 908 return _NAME(__x, __y, __z); \ 909 } \ 910 \ 911template <typename _Tp, typename _Abi> \ 912 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 913 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \ 914 const __type_identity_t<simd<_Tp, _Abi>>& __z) \ 915 { \ 916 return _NAME(__x, __y, __z); \ 917 } \ 918 \ 919template <typename _Tp, typename _Abi> \ 920 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 921 const __type_identity_t<simd<_Tp, _Abi>>& __x, \ 922 const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \ 923 { \ 924 return _NAME(__x, __y, __z); \ 925 } 926 927template <typename _R, typename _ToApply, typename _Tp, typename... _Tps> 928 _GLIBCXX_SIMD_INTRINSIC _R 929 __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0, 930 const _Tps&... __args) 931 { 932 return {__private_init, 933 __data(__arg0)._M_apply_per_chunk( 934 [&](auto __impl, const auto&... __inner) { 935 using _V = typename decltype(__impl)::simd_type; 936 return __data(__apply(_V(__private_init, __inner)...)); 937 }, 938 __data(__args)...)}; 939 } 940 941template <typename _VV, typename = __detail::__odr_helper> 942 __remove_cvref_t<_VV> 943 __hypot(_VV __x, _VV __y) 944 { 945 using _V = __remove_cvref_t<_VV>; 946 using _Tp = typename _V::value_type; 947 if constexpr (_V::size() == 1) 948 return std::hypot(_Tp(__x[0]), _Tp(__y[0])); 949 else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>) 950 { 951 return __fixed_size_apply<_V>([](auto __a, 952 auto __b) { return hypot(__a, __b); }, 953 __x, __y); 954 } 955 else 956 { 957 // A simple solution for _Tp == float would be to cast to double and 958 // simply calculate sqrt(x��+y��) as it can't over-/underflow anymore with 959 // dp. It still needs the Annex F fixups though and isn't faster on 960 // Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for 961 // AVX-512). 962 using namespace __float_bitwise_operators; 963 using namespace __proposed; 964 _V __absx = abs(__x); // no error 965 _V __absy = abs(__y); // no error 966 _V __hi = max(__absx, __absy); // no error 967 _V __lo = min(__absy, __absx); // no error 968 969 // round __hi down to the next power-of-2: 970 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>); 971 972#ifndef __FAST_MATH__ 973 if constexpr (__have_neon && !__have_neon_a32) 974 { // With ARMv7 NEON, we have no subnormals and must use slightly 975 // different strategy 976 const _V __hi_exp = __hi & __inf; 977 _V __scale_back = __hi_exp; 978 // For large exponents (max & max/2) the inversion comes too close 979 // to subnormals. Subtract 3 from the exponent: 980 where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125); 981 // Invert and adjust for the off-by-one error of inversion via xor: 982 const _V __scale = (__scale_back ^ __inf) * _Tp(.5); 983 const _V __h1 = __hi * __scale; 984 const _V __l1 = __lo * __scale; 985 _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1); 986 // Fix up hypot(0, 0) to not be NaN: 987 where(__hi == 0, __r) = 0; 988 return __r; 989 } 990#endif 991 992#ifdef __FAST_MATH__ 993 // With fast-math, ignore precision of subnormals and inputs from 994 // __finite_max_v/2 to __finite_max_v. This removes all 995 // branching/masking. 996 if constexpr (true) 997#else 998 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x)) 999 && all_of(isnormal(__y)))) 1000#endif 1001 { 1002 const _V __hi_exp = __hi & __inf; 1003 //((__hi + __hi) & __inf) ^ __inf almost works for computing 1004 //__scale, 1005 // except when (__hi + __hi) & __inf == __inf, in which case __scale 1006 // becomes 0 (should be min/2 instead) and thus loses the 1007 // information from __lo. 1008#ifdef __FAST_MATH__ 1009 using _Ip = __int_for_sizeof_t<_Tp>; 1010 using _IV = rebind_simd_t<_Ip, _V>; 1011 const auto __as_int = simd_bit_cast<_IV>(__hi_exp); 1012 const _V __scale 1013 = simd_bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int); 1014#else 1015 const _V __scale = (__hi_exp ^ __inf) * _Tp(.5); 1016#endif 1017 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask 1018 = __norm_min_v<_Tp> - __denorm_min_v<_Tp>; 1019 const _V __h1 = (__hi & __mant_mask) | _V(1); 1020 const _V __l1 = __lo * __scale; 1021 return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1); 1022 } 1023 else 1024 { 1025 // slower path to support subnormals 1026 // if __hi is subnormal, avoid scaling by inf & final mul by 0 1027 // (which yields NaN) by using min() 1028 _V __scale = _V(1 / __norm_min_v<_Tp>); 1029 // invert exponent w/o error and w/o using the slow divider unit: 1030 // xor inverts the exponent but off by 1. Multiplication with .5 1031 // adjusts for the discrepancy. 1032 where(__hi >= __norm_min_v<_Tp>, __scale) 1033 = ((__hi & __inf) ^ __inf) * _Tp(.5); 1034 // adjust final exponent for subnormal inputs 1035 _V __hi_exp = __norm_min_v<_Tp>; 1036 where(__hi >= __norm_min_v<_Tp>, __hi_exp) 1037 = __hi & __inf; // no error 1038 _V __h1 = __hi * __scale; // no error 1039 _V __l1 = __lo * __scale; // no error 1040 1041 // sqrt(x��+y��) = e*sqrt((x/e)��+(y/e)��): 1042 // this ensures no overflow in the argument to sqrt 1043 _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1); 1044#ifdef __STDC_IEC_559__ 1045 // fixup for Annex F requirements 1046 // the naive fixup goes like this: 1047 // 1048 // where(__l1 == 0, __r) = __hi; 1049 // where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>; 1050 // where(isinf(__absx) || isinf(__absy), __r) = __inf; 1051 // 1052 // The fixup can be prepared in parallel with the sqrt, requiring a 1053 // single blend step after hi_exp * sqrt, reducing latency and 1054 // throughput: 1055 _V __fixup = __hi; // __lo == 0 1056 where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>; 1057 where(isinf(__absx) || isinf(__absy), __fixup) = __inf; 1058 where(!(__lo == 0 || isunordered(__x, __y) 1059 || (isinf(__absx) || isinf(__absy))), 1060 __fixup) 1061 = __r; 1062 __r = __fixup; 1063#endif 1064 return __r; 1065 } 1066 } 1067 } 1068 1069template <typename _Tp, typename _Abi> 1070 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> 1071 hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y) 1072 { 1073 return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>, 1074 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x, 1075 __y); 1076 } 1077 1078_GLIBCXX_SIMD_CVTING2(hypot) 1079 1080 template <typename _VV, typename = __detail::__odr_helper> 1081 __remove_cvref_t<_VV> 1082 __hypot(_VV __x, _VV __y, _VV __z) 1083 { 1084 using _V = __remove_cvref_t<_VV>; 1085 using _Abi = typename _V::abi_type; 1086 using _Tp = typename _V::value_type; 1087 /* FIXME: enable after PR77776 is resolved 1088 if constexpr (_V::size() == 1) 1089 return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0])); 1090 else 1091 */ 1092 if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1) 1093 { 1094 return __fixed_size_apply<simd<_Tp, _Abi>>( 1095 [](auto __a, auto __b, auto __c) { return hypot(__a, __b, __c); }, 1096 __x, __y, __z); 1097 } 1098 else 1099 { 1100 using namespace __float_bitwise_operators; 1101 using namespace __proposed; 1102 const _V __absx = abs(__x); // no error 1103 const _V __absy = abs(__y); // no error 1104 const _V __absz = abs(__z); // no error 1105 _V __hi = max(max(__absx, __absy), __absz); // no error 1106 _V __l0 = min(__absz, max(__absx, __absy)); // no error 1107 _V __l1 = min(__absy, __absx); // no error 1108 if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000 1109 && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1) 1110 { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or 1111 // NaN. In this case the bit-tricks don't work, they require IEC559 1112 // binary32 or binary64 format. 1113#ifdef __STDC_IEC_559__ 1114 // fixup for Annex F requirements 1115 if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0])) 1116 return __infinity_v<_Tp>; 1117 else if (isunordered(__absx[0], __absy[0] + __absz[0])) 1118 return __quiet_NaN_v<_Tp>; 1119 else if (__l0[0] == 0 && __l1[0] == 0) 1120 return __hi; 1121#endif 1122 _V __hi_exp = __hi; 1123 const _ULLong __tmp = 0x8000'0000'0000'0000ull; 1124 __builtin_memcpy(&__data(__hi_exp), &__tmp, 8); 1125 const _V __scale = 1 / __hi_exp; 1126 __hi *= __scale; 1127 __l0 *= __scale; 1128 __l1 *= __scale; 1129 return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi); 1130 } 1131 else 1132 { 1133 // round __hi down to the next power-of-2: 1134 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>); 1135 1136#ifndef __FAST_MATH__ 1137 if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32) 1138 { // With ARMv7 NEON, we have no subnormals and must use slightly 1139 // different strategy 1140 const _V __hi_exp = __hi & __inf; 1141 _V __scale_back = __hi_exp; 1142 // For large exponents (max & max/2) the inversion comes too 1143 // close to subnormals. Subtract 3 from the exponent: 1144 where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125); 1145 // Invert and adjust for the off-by-one error of inversion via 1146 // xor: 1147 const _V __scale = (__scale_back ^ __inf) * _Tp(.5); 1148 const _V __h1 = __hi * __scale; 1149 __l0 *= __scale; 1150 __l1 *= __scale; 1151 _V __lo = __l0 * __l0 1152 + __l1 * __l1; // add the two smaller values first 1153 asm("" : "+m"(__lo)); 1154 _V __r = __scale_back * sqrt(__h1 * __h1 + __lo); 1155 // Fix up hypot(0, 0, 0) to not be NaN: 1156 where(__hi == 0, __r) = 0; 1157 return __r; 1158 } 1159#endif 1160 1161#ifdef __FAST_MATH__ 1162 // With fast-math, ignore precision of subnormals and inputs from 1163 // __finite_max_v/2 to __finite_max_v. This removes all 1164 // branching/masking. 1165 if constexpr (true) 1166#else 1167 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x)) 1168 && all_of(isnormal(__y)) 1169 && all_of(isnormal(__z)))) 1170#endif 1171 { 1172 const _V __hi_exp = __hi & __inf; 1173 //((__hi + __hi) & __inf) ^ __inf almost works for computing 1174 //__scale, except when (__hi + __hi) & __inf == __inf, in which 1175 // case __scale 1176 // becomes 0 (should be min/2 instead) and thus loses the 1177 // information from __lo. 1178#ifdef __FAST_MATH__ 1179 using _Ip = __int_for_sizeof_t<_Tp>; 1180 using _IV = rebind_simd_t<_Ip, _V>; 1181 const auto __as_int = simd_bit_cast<_IV>(__hi_exp); 1182 const _V __scale 1183 = simd_bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int); 1184#else 1185 const _V __scale = (__hi_exp ^ __inf) * _Tp(.5); 1186#endif 1187 constexpr _Tp __mant_mask 1188 = __norm_min_v<_Tp> - __denorm_min_v<_Tp>; 1189 const _V __h1 = (__hi & _V(__mant_mask)) | _V(1); 1190 __l0 *= __scale; 1191 __l1 *= __scale; 1192 const _V __lo 1193 = __l0 * __l0 1194 + __l1 * __l1; // add the two smaller values first 1195 return __hi_exp * sqrt(__lo + __h1 * __h1); 1196 } 1197 else 1198 { 1199 // slower path to support subnormals 1200 // if __hi is subnormal, avoid scaling by inf & final mul by 0 1201 // (which yields NaN) by using min() 1202 _V __scale = _V(1 / __norm_min_v<_Tp>); 1203 // invert exponent w/o error and w/o using the slow divider 1204 // unit: xor inverts the exponent but off by 1. Multiplication 1205 // with .5 adjusts for the discrepancy. 1206 where(__hi >= __norm_min_v<_Tp>, __scale) 1207 = ((__hi & __inf) ^ __inf) * _Tp(.5); 1208 // adjust final exponent for subnormal inputs 1209 _V __hi_exp = __norm_min_v<_Tp>; 1210 where(__hi >= __norm_min_v<_Tp>, __hi_exp) 1211 = __hi & __inf; // no error 1212 _V __h1 = __hi * __scale; // no error 1213 __l0 *= __scale; // no error 1214 __l1 *= __scale; // no error 1215 _V __lo = __l0 * __l0 1216 + __l1 * __l1; // add the two smaller values first 1217 _V __r = __hi_exp * sqrt(__lo + __h1 * __h1); 1218#ifdef __STDC_IEC_559__ 1219 // fixup for Annex F requirements 1220 _V __fixup = __hi; // __lo == 0 1221 // where(__lo == 0, __fixup) = __hi; 1222 where(isunordered(__x, __y + __z), __fixup) 1223 = __quiet_NaN_v<_Tp>; 1224 where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup) 1225 = __inf; 1226 // Instead of __lo == 0, the following could depend on __h1�� == 1227 // __h1�� + __lo (i.e. __hi is so much larger than the other two 1228 // inputs that the result is exactly __hi). While this may 1229 // improve precision, it is likely to reduce efficiency if the 1230 // ISA has FMAs (because __h1�� + __lo is an FMA, but the 1231 // intermediate 1232 // __h1�� must be kept) 1233 where(!(__lo == 0 || isunordered(__x, __y + __z) 1234 || isinf(__absx) || isinf(__absy) || isinf(__absz)), 1235 __fixup) 1236 = __r; 1237 __r = __fixup; 1238#endif 1239 return __r; 1240 } 1241 } 1242 } 1243 } 1244 1245 template <typename _Tp, typename _Abi> 1246 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> 1247 hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, 1248 const simd<_Tp, _Abi>& __z) 1249 { 1250 return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>, 1251 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x, 1252 __y, 1253 __z); 1254 } 1255 1256_GLIBCXX_SIMD_CVTING3(hypot) 1257 1258_GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp) 1259 1260_GLIBCXX_SIMD_MATH_CALL_(sqrt) 1261_GLIBCXX_SIMD_MATH_CALL_(erf) 1262_GLIBCXX_SIMD_MATH_CALL_(erfc) 1263_GLIBCXX_SIMD_MATH_CALL_(lgamma) 1264_GLIBCXX_SIMD_MATH_CALL_(tgamma) 1265_GLIBCXX_SIMD_MATH_CALL_(ceil) 1266_GLIBCXX_SIMD_MATH_CALL_(floor) 1267_GLIBCXX_SIMD_MATH_CALL_(nearbyint) 1268_GLIBCXX_SIMD_MATH_CALL_(rint) 1269_GLIBCXX_SIMD_MATH_CALL_(lrint) 1270_GLIBCXX_SIMD_MATH_CALL_(llrint) 1271 1272_GLIBCXX_SIMD_MATH_CALL_(round) 1273_GLIBCXX_SIMD_MATH_CALL_(lround) 1274_GLIBCXX_SIMD_MATH_CALL_(llround) 1275 1276_GLIBCXX_SIMD_MATH_CALL_(trunc) 1277 1278_GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp) 1279_GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp) 1280_GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*) 1281 1282template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 1283 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1284 copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y) 1285 { 1286 if constexpr (simd_size_v<_Tp, _Abi> == 1) 1287 return std::copysign(__x[0], __y[0]); 1288 else if constexpr (__is_fixed_size_abi_v<_Abi>) 1289 return {__private_init, _Abi::_SimdImpl::_S_copysign(__data(__x), __data(__y))}; 1290 else 1291 { 1292 using _V = simd<_Tp, _Abi>; 1293 using namespace std::experimental::__float_bitwise_operators; 1294 _GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1); 1295 return (__x & ~__signmask) | (__y & __signmask); 1296 } 1297 } 1298 1299_GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp) 1300// not covered in [parallel.simd.math]: 1301// _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double) 1302_GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp) 1303_GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp) 1304_GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp) 1305 1306_GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp) 1307_GLIBCXX_SIMD_MATH_CALL_(fpclassify) 1308_GLIBCXX_SIMD_MATH_CALL_(isfinite) 1309 1310// isnan and isinf require special treatment because old glibc may declare 1311// `int isinf(double)`. 1312template <typename _Tp, typename _Abi, typename..., 1313 typename _R = _Math_return_type_t<bool, _Tp, _Abi>> 1314 _GLIBCXX_SIMD_ALWAYS_INLINE 1315 enable_if_t<is_floating_point_v<_Tp>, _R> 1316 isinf(simd<_Tp, _Abi> __x) 1317 { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; } 1318 1319template <typename _Tp, typename _Abi, typename..., 1320 typename _R = _Math_return_type_t<bool, _Tp, _Abi>> 1321 _GLIBCXX_SIMD_ALWAYS_INLINE 1322 enable_if_t<is_floating_point_v<_Tp>, _R> 1323 isnan(simd<_Tp, _Abi> __x) 1324 { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; } 1325 1326_GLIBCXX_SIMD_MATH_CALL_(isnormal) 1327 1328template <typename..., typename _Tp, typename _Abi> 1329 _GLIBCXX_SIMD_ALWAYS_INLINE 1330 simd_mask<_Tp, _Abi> 1331 signbit(simd<_Tp, _Abi> __x) 1332 { 1333 if constexpr (is_integral_v<_Tp>) 1334 { 1335 if constexpr (is_unsigned_v<_Tp>) 1336 return simd_mask<_Tp, _Abi>{}; // false 1337 else 1338 return __x < 0; 1339 } 1340 else 1341 return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))}; 1342 } 1343 1344_GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp) 1345_GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp) 1346_GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp) 1347_GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp) 1348_GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp) 1349_GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp) 1350 1351/* not covered in [parallel.simd.math] 1352template <typename _Abi> __doublev<_Abi> nan(const char* tagp); 1353template <typename _Abi> __floatv<_Abi> nanf(const char* tagp); 1354template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp); 1355 1356template <typename _V> struct simd_div_t { 1357 _V quot, rem; 1358}; 1359 1360template <typename _Abi> 1361simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer, 1362 _SCharv<_Abi> denom); 1363template <typename _Abi> 1364simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer, 1365 __shortv<_Abi> denom); 1366template <typename _Abi> 1367simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom); 1368template <typename _Abi> 1369simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer, 1370 __longv<_Abi> denom); 1371template <typename _Abi> 1372simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer, 1373 __llongv<_Abi> denom); 1374*/ 1375 1376// special math {{{ 1377template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 1378 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1379 assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1380 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m, 1381 const simd<_Tp, _Abi>& __x) 1382 { 1383 return simd<_Tp, _Abi>([&](auto __i) { 1384 return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]); 1385 }); 1386 } 1387 1388template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 1389 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1390 assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1391 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m, 1392 const simd<_Tp, _Abi>& __x) 1393 { 1394 return simd<_Tp, _Abi>([&](auto __i) { 1395 return std::assoc_legendre(__n[__i], __m[__i], __x[__i]); 1396 }); 1397 } 1398 1399_GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp) 1400_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1) 1401_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2) 1402_GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp) 1403_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp) 1404_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp) 1405_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp) 1406_GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp) 1407_GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp) 1408_GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp) 1409_GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp) 1410_GLIBCXX_SIMD_MATH_CALL_(expint) 1411 1412template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 1413 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1414 hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1415 const simd<_Tp, _Abi>& __x) 1416 { 1417 return simd<_Tp, _Abi>( 1418 [&](auto __i) { return std::hermite(__n[__i], __x[__i]); }); 1419 } 1420 1421template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 1422 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1423 laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1424 const simd<_Tp, _Abi>& __x) 1425 { 1426 return simd<_Tp, _Abi>( 1427 [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); }); 1428 } 1429 1430template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 1431 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1432 legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1433 const simd<_Tp, _Abi>& __x) 1434 { 1435 return simd<_Tp, _Abi>( 1436 [&](auto __i) { return std::legendre(__n[__i], __x[__i]); }); 1437 } 1438 1439_GLIBCXX_SIMD_MATH_CALL_(riemann_zeta) 1440 1441template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 1442 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1443 sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1444 const simd<_Tp, _Abi>& __x) 1445 { 1446 return simd<_Tp, _Abi>( 1447 [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); }); 1448 } 1449 1450template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 1451 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1452 sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l, 1453 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m, 1454 const simd<_Tp, _Abi>& theta) 1455 { 1456 return simd<_Tp, _Abi>([&](auto __i) { 1457 return std::assoc_legendre(__l[__i], __m[__i], theta[__i]); 1458 }); 1459 } 1460 1461template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> 1462 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1463 sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1464 const simd<_Tp, _Abi>& __x) 1465 { 1466 return simd<_Tp, _Abi>( 1467 [&](auto __i) { return std::sph_neumann(__n[__i], __x[__i]); }); 1468 } 1469// }}} 1470 1471#undef _GLIBCXX_SIMD_CVTING2 1472#undef _GLIBCXX_SIMD_CVTING3 1473#undef _GLIBCXX_SIMD_MATH_CALL_ 1474#undef _GLIBCXX_SIMD_MATH_CALL2_ 1475#undef _GLIBCXX_SIMD_MATH_CALL3_ 1476 1477_GLIBCXX_SIMD_END_NAMESPACE 1478 1479#endif // __cplusplus >= 201703L 1480#endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_ 1481 1482// vim: foldmethod=marker sw=2 ts=8 noet sts=2 1483