1// Math overloads for simd -*- C++ -*-
2
3// Copyright (C) 2020-2022 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
26#define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
27
28#if __cplusplus >= 201703L
29
30#include <utility>
31#include <iomanip>
32
33_GLIBCXX_SIMD_BEGIN_NAMESPACE
34template <typename _Tp, typename _V>
35  using _Samesize = fixed_size_simd<_Tp, _V::size()>;
36
37// _Math_return_type {{{
38template <typename _DoubleR, typename _Tp, typename _Abi>
39  struct _Math_return_type;
40
41template <typename _DoubleR, typename _Tp, typename _Abi>
42  using _Math_return_type_t =
43    typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
44
45template <typename _Tp, typename _Abi>
46  struct _Math_return_type<double, _Tp, _Abi>
47  { using type = simd<_Tp, _Abi>; };
48
49template <typename _Tp, typename _Abi>
50  struct _Math_return_type<bool, _Tp, _Abi>
51  { using type = simd_mask<_Tp, _Abi>; };
52
53template <typename _DoubleR, typename _Tp, typename _Abi>
54  struct _Math_return_type
55  { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
56
57//}}}
58// _GLIBCXX_SIMD_MATH_CALL_ {{{
59#define _GLIBCXX_SIMD_MATH_CALL_(__name)                                       \
60template <typename _Tp, typename _Abi, typename...,                            \
61	  typename _R = _Math_return_type_t<                                   \
62	    decltype(std::__name(declval<double>())), _Tp, _Abi>>              \
63  _GLIBCXX_SIMD_ALWAYS_INLINE                                                  \
64  enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
65  __name(simd<_Tp, _Abi> __x)                                                  \
66  { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
67
68// }}}
69//_Extra_argument_type{{{
70template <typename _Up, typename _Tp, typename _Abi>
71  struct _Extra_argument_type;
72
73template <typename _Tp, typename _Abi>
74  struct _Extra_argument_type<_Tp*, _Tp, _Abi>
75  {
76    using type = simd<_Tp, _Abi>*;
77    static constexpr double* declval();
78    static constexpr bool __needs_temporary_scalar = true;
79
80    _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
81    { return &__data(*__x); }
82  };
83
84template <typename _Up, typename _Tp, typename _Abi>
85  struct _Extra_argument_type<_Up*, _Tp, _Abi>
86  {
87    static_assert(is_integral_v<_Up>);
88    using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
89    static constexpr _Up* declval();
90    static constexpr bool __needs_temporary_scalar = true;
91
92    _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
93    { return &__data(*__x); }
94  };
95
96template <typename _Tp, typename _Abi>
97  struct _Extra_argument_type<_Tp, _Tp, _Abi>
98  {
99    using type = simd<_Tp, _Abi>;
100    static constexpr double declval();
101    static constexpr bool __needs_temporary_scalar = false;
102
103    _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
104    _S_data(const type& __x)
105    { return __data(__x); }
106  };
107
108template <typename _Up, typename _Tp, typename _Abi>
109  struct _Extra_argument_type
110  {
111    static_assert(is_integral_v<_Up>);
112    using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
113    static constexpr _Up declval();
114    static constexpr bool __needs_temporary_scalar = false;
115
116    _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
117    _S_data(const type& __x)
118    { return __data(__x); }
119  };
120
121//}}}
122// _GLIBCXX_SIMD_MATH_CALL2_ {{{
123#define _GLIBCXX_SIMD_MATH_CALL2_(__name, __arg2)                              \
124template <                                                                     \
125  typename _Tp, typename _Abi, typename...,                                    \
126  typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>,                    \
127  typename _R = _Math_return_type_t<                                           \
128    decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>>    \
129  _GLIBCXX_SIMD_ALWAYS_INLINE                                                  \
130  enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
131  __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y)          \
132  {                                                                            \
133    return {__private_init,                                                    \
134	    _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))};   \
135  }                                                                            \
136template <typename _Up, typename _Tp, typename _Abi>                           \
137  _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t<                                 \
138    decltype(std::__name(                                                      \
139      declval<double>(),                                                       \
140      declval<enable_if_t<                                                     \
141	conjunction_v<                                                         \
142	  is_same<__arg2, _Tp>,                                                \
143	  negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>,           \
144	  is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>,       \
145	double>>())),                                                          \
146    _Tp, _Abi>                                                                 \
147  __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy)                              \
148  { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
149
150// }}}
151// _GLIBCXX_SIMD_MATH_CALL3_ {{{
152#define _GLIBCXX_SIMD_MATH_CALL3_(__name, __arg2, __arg3)                      \
153template <typename _Tp, typename _Abi, typename...,                            \
154	  typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>,            \
155	  typename _Arg3 = _Extra_argument_type<__arg3, _Tp, _Abi>,            \
156	  typename _R = _Math_return_type_t<                                   \
157	    decltype(std::__name(declval<double>(), _Arg2::declval(),          \
158				 _Arg3::declval())),                           \
159	    _Tp, _Abi>>                                                        \
160  _GLIBCXX_SIMD_ALWAYS_INLINE                                                  \
161  enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
162  __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y,          \
163	 const typename _Arg3::type& __z)                                      \
164  {                                                                            \
165    return {__private_init,                                                    \
166	    _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y),     \
167					 _Arg3::_S_data(__z))};                \
168  }                                                                            \
169template <                                                                     \
170  typename _T0, typename _T1, typename _T2, typename...,                       \
171  typename _U0 = __remove_cvref_t<_T0>,                                        \
172  typename _U1 = __remove_cvref_t<_T1>,                                        \
173  typename _U2 = __remove_cvref_t<_T2>,                                        \
174  typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>,                    \
175  typename = enable_if_t<conjunction_v<                                        \
176    is_simd<_Simd>, is_convertible<_T0&&, _Simd>,                              \
177    is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>,                \
178    negation<conjunction<                                                      \
179      is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>>    \
180  _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(),             \
181					  declval<const _Simd&>(),             \
182					  declval<const _Simd&>()))            \
183  __name(_T0&& __xx, _T1&& __yy, _T2&& __zz)                                   \
184  {                                                                            \
185    return __name(_Simd(static_cast<_T0&&>(__xx)),                             \
186		  _Simd(static_cast<_T1&&>(__yy)),                             \
187		  _Simd(static_cast<_T2&&>(__zz)));                            \
188  }
189
190// }}}
191// __cosSeries {{{
192template <typename _Abi>
193  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
194  __cosSeries(const simd<float, _Abi>& __x)
195  {
196    const simd<float, _Abi> __x2 = __x * __x;
197    simd<float, _Abi> __y;
198    __y = 0x1.ap-16f;                  //  1/8!
199    __y = __y * __x2 - 0x1.6c1p-10f;   // -1/6!
200    __y = __y * __x2 + 0x1.555556p-5f; //  1/4!
201    return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
202  }
203
204template <typename _Abi>
205  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
206  __cosSeries(const simd<double, _Abi>& __x)
207  {
208    const simd<double, _Abi> __x2 = __x * __x;
209    simd<double, _Abi> __y;
210    __y = 0x1.AC00000000000p-45;              //  1/16!
211    __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
212    __y = __y * __x2 + 0x1.1EED8C0000000p-29; //  1/12!
213    __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
214    __y = __y * __x2 + 0x1.A01A01A018000p-16; //  1/8!
215    __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
216    __y = __y * __x2 + 0x1.5555555555554p-5;  //  1/4!
217    return (__y * __x2 - .5f) * __x2 + 1.f;
218  }
219
220// }}}
221// __sinSeries {{{
222template <typename _Abi>
223  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
224  __sinSeries(const simd<float, _Abi>& __x)
225  {
226    const simd<float, _Abi> __x2 = __x * __x;
227    simd<float, _Abi> __y;
228    __y = -0x1.9CC000p-13f;            // -1/7!
229    __y = __y * __x2 + 0x1.111100p-7f; //  1/5!
230    __y = __y * __x2 - 0x1.555556p-3f; // -1/3!
231    return __y * (__x2 * __x) + __x;
232  }
233
234template <typename _Abi>
235  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
236  __sinSeries(const simd<double, _Abi>& __x)
237  {
238    // __x  = [0, 0.7854 = pi/4]
239    // __x�� = [0, 0.6169 = pi��/8]
240    const simd<double, _Abi> __x2 = __x * __x;
241    simd<double, _Abi> __y;
242    __y = -0x1.ACF0000000000p-41;             // -1/15!
243    __y = __y * __x2 + 0x1.6124400000000p-33; //  1/13!
244    __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
245    __y = __y * __x2 + 0x1.71DE3A5540000p-19; //  1/9!
246    __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
247    __y = __y * __x2 + 0x1.1111111111110p-7;  //  1/5!
248    __y = __y * __x2 - 0x1.5555555555555p-3;  // -1/3!
249    return __y * (__x2 * __x) + __x;
250  }
251
252// }}}
253// __zero_low_bits {{{
254template <int _Bits, typename _Tp, typename _Abi>
255  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
256  __zero_low_bits(simd<_Tp, _Abi> __x)
257  {
258    const simd<_Tp, _Abi> __bitmask
259      = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
260    return {__private_init,
261	    _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
262  }
263
264// }}}
265// __fold_input {{{
266
267/**@internal
268 * Fold @p x into [-����, ����] and remember the quadrant it came from:
269 * quadrant 0: [-����,  ����]
270 * quadrant 1: [ ����,  ����]
271 * quadrant 2: [ ����, 1����]
272 * quadrant 3: [1����, 1����]
273 *
274 * The algorithm determines `y` as the multiple `x - y * ���� = [-����, ����]`. Using
275 * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
276 * ```
277 * y = trunc(x / ����);
278 * y += fmod(y, 2);
279 * ```
280 * This can be simplified by moving the (implicit) division by 2 into the
281 * truncation expression. The `+= fmod` effect can the be achieved by using
282 * rounding instead of truncation: `y = round(x / ����) * 2`. If precision allows,
283 * `2/�� * x` is better (faster).
284 */
285template <typename _Tp, typename _Abi>
286  struct _Folded
287  {
288    simd<_Tp, _Abi> _M_x;
289    rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
290  };
291
292namespace __math_float {
293inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // ��/4
294inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/��
295inline constexpr float __pi_2_5bits0
296  = 0x1.921fc0p0f; // ��/2, 5 0-bits (least significant)
297inline constexpr float __pi_2_5bits0_rem
298  = -0x1.5777a6p-21f; // ��/2 - __pi_2_5bits0
299} // namespace __math_float
300namespace __math_double {
301inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // ��/4
302inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/��
303inline constexpr double __pi_2 = 0x1.921fb54442d18p0;       // ��/2
304} // namespace __math_double
305
306template <typename _Abi>
307  _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
308  __fold_input(const simd<float, _Abi>& __x)
309  {
310    using _V = simd<float, _Abi>;
311    using _IV = rebind_simd_t<int, _V>;
312    using namespace __math_float;
313    _Folded<float, _Abi> __r;
314    __r._M_x = abs(__x);
315#if 0
316    // zero most mantissa bits:
317    constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/��
318    const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
319    // split �� into 4 parts, the first three with 13 trailing zeros (to make the
320    // following multiplications precise):
321    constexpr float __pi0 = 0x1.920000p1f;
322    constexpr float __pi1 = 0x1.fb4000p-11f;
323    constexpr float __pi2 = 0x1.444000p-23f;
324    constexpr float __pi3 = 0x1.68c234p-38f;
325    __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3
326#else
327    if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
328      __r._M_quadrant = 0;
329    else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
330      {
331	const _V __y = nearbyint(__r._M_x * __2_over_pi);
332	__r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
333	__r._M_x -= __y * __pi_2_5bits0;
334	__r._M_x -= __y * __pi_2_5bits0_rem;
335      }
336    else
337      {
338	using __math_double::__2_over_pi;
339	using __math_double::__pi_2;
340	using _VD = rebind_simd_t<double, _V>;
341	_VD __xd = static_simd_cast<_VD>(__r._M_x);
342	_VD __y = nearbyint(__xd * __2_over_pi);
343	__r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
344	__r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
345      }
346#endif
347    return __r;
348  }
349
350template <typename _Abi>
351  _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
352  __fold_input(const simd<double, _Abi>& __x)
353  {
354    using _V = simd<double, _Abi>;
355    using _IV = rebind_simd_t<int, _V>;
356    using namespace __math_double;
357
358    _Folded<double, _Abi> __r;
359    __r._M_x = abs(__x);
360    if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
361      {
362	__r._M_quadrant = 0;
363	return __r;
364      }
365    const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
366    __r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
367
368    if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
369      {
370	// x - y * pi/2, y uses no more than 11 mantissa bits
371	__r._M_x -= __y * 0x1.921FB54443000p0;
372	__r._M_x -= __y * -0x1.73DCB3B39A000p-43;
373	__r._M_x -= __y * 0x1.45C06E0E68948p-86;
374      }
375    else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
376      {
377	// x - y * pi/2, y uses no more than 29 mantissa bits
378	__r._M_x -= __y * 0x1.921FB40000000p0;
379	__r._M_x -= __y * 0x1.4442D00000000p-24;
380	__r._M_x -= __y * 0x1.8469898CC5170p-48;
381      }
382    else
383      {
384	// x - y * pi/2, y may require all mantissa bits
385	const _V __y_hi = __zero_low_bits<26>(__y);
386	const _V __y_lo = __y - __y_hi;
387	const auto __pi_2_1 = 0x1.921FB50000000p0;
388	const auto __pi_2_2 = 0x1.110B460000000p-26;
389	const auto __pi_2_3 = 0x1.1A62630000000p-54;
390	const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
391	__r._M_x = __r._M_x - __y_hi * __pi_2_1
392		   - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
393		   - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
394		   - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
395		   - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
396		   - max(__y * __pi_2_4, __y_lo * __pi_2_3)
397		   - min(__y * __pi_2_4, __y_lo * __pi_2_3);
398      }
399    return __r;
400  }
401
402// }}}
403// __extract_exponent_as_int {{{
404template <typename _Tp, typename _Abi>
405  _GLIBCXX_SIMD_INTRINSIC
406  rebind_simd_t<int, simd<_Tp, _Abi>>
407  __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
408  {
409    using _Vp = simd<_Tp, _Abi>;
410    using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
411    using namespace std::experimental::__float_bitwise_operators;
412    using namespace std::experimental::__proposed;
413    const _Vp __exponent_mask
414      = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
415    return static_simd_cast<rebind_simd_t<int, _Vp>>(
416	     simd_bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
417      >> (__digits_v<_Tp> - 1));
418  }
419
420// }}}
421// __impl_or_fallback {{{
422template <typename ImplFun, typename FallbackFun, typename... _Args>
423  _GLIBCXX_SIMD_INTRINSIC auto
424  __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
425			      _Args&&... __args)
426    -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
427  { return __impl_fun(static_cast<_Args&&>(__args)...); }
428
429template <typename ImplFun, typename FallbackFun, typename... _Args,
430	  typename = __detail::__odr_helper>
431  inline auto
432  __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
433			      _Args&&... __args)
434    -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
435  { return __fallback_fun(static_cast<_Args&&>(__args)...); }
436
437template <typename... _Args>
438  _GLIBCXX_SIMD_INTRINSIC auto
439  __impl_or_fallback(_Args&&... __args)
440  {
441    return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
442  }
443//}}}
444
445// trigonometric functions {{{
446_GLIBCXX_SIMD_MATH_CALL_(acos)
447_GLIBCXX_SIMD_MATH_CALL_(asin)
448_GLIBCXX_SIMD_MATH_CALL_(atan)
449_GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
450
451/*
452 * algorithm for sine and cosine:
453 *
454 * The result can be calculated with sine or cosine depending on the ��/4 section
455 * the input is in. sine   ��� __x + __x�� cosine ��� 1 - __x��
456 *
457 * sine:
458 * Map -__x to __x and invert the output
459 * Extend precision of __x - n * ��/4 by calculating
460 * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = ��/4)
461 *
462 * Calculate Taylor series with tuned coefficients.
463 * Fix sign.
464 */
465// cos{{{
466template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
467  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
468  cos(const simd<_Tp, _Abi>& __x)
469  {
470    using _V = simd<_Tp, _Abi>;
471    if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
472      return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
473    else
474      {
475	if constexpr (is_same_v<_Tp, float>)
476	  if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
477	    return static_simd_cast<_V>(
478	      cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
479
480	const auto __f = __fold_input(__x);
481	// quadrant | effect
482	//        0 | cosSeries, +
483	//        1 | sinSeries, -
484	//        2 | cosSeries, -
485	//        3 | sinSeries, +
486	using namespace std::experimental::__float_bitwise_operators;
487	const _V __sign_flip
488	  = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
489
490	const auto __need_cos = (__f._M_quadrant & 1) == 0;
491	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
492	  return __sign_flip ^ __cosSeries(__f._M_x);
493	else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
494	  return __sign_flip ^ __sinSeries(__f._M_x);
495	else // some_of(__need_cos)
496	  {
497	    _V __r = __sinSeries(__f._M_x);
498	    where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
499	    return __r ^ __sign_flip;
500	  }
501      }
502  }
503
504template <typename _Tp>
505  _GLIBCXX_SIMD_ALWAYS_INLINE
506  enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
507  cos(simd<_Tp, simd_abi::scalar> __x)
508  { return std::cos(__data(__x)); }
509
510//}}}
511// sin{{{
512template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
513  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
514  sin(const simd<_Tp, _Abi>& __x)
515  {
516    using _V = simd<_Tp, _Abi>;
517    if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
518      return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
519    else
520      {
521	if constexpr (is_same_v<_Tp, float>)
522	  if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
523	    return static_simd_cast<_V>(
524	      sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
525
526	const auto __f = __fold_input(__x);
527	// quadrant | effect
528	//        0 | sinSeries
529	//        1 | cosSeries
530	//        2 | sinSeries, sign flip
531	//        3 | cosSeries, sign flip
532	using namespace std::experimental::__float_bitwise_operators;
533	const auto __sign_flip
534	  = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
535
536	const auto __need_sin = (__f._M_quadrant & 1) == 0;
537	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
538	  return __sign_flip ^ __sinSeries(__f._M_x);
539	else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
540	  return __sign_flip ^ __cosSeries(__f._M_x);
541	else // some_of(__need_sin)
542	  {
543	    _V __r = __cosSeries(__f._M_x);
544	    where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
545	    return __sign_flip ^ __r;
546	  }
547      }
548  }
549
550template <typename _Tp>
551  _GLIBCXX_SIMD_ALWAYS_INLINE
552  enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
553  sin(simd<_Tp, simd_abi::scalar> __x)
554  { return std::sin(__data(__x)); }
555
556//}}}
557_GLIBCXX_SIMD_MATH_CALL_(tan)
558_GLIBCXX_SIMD_MATH_CALL_(acosh)
559_GLIBCXX_SIMD_MATH_CALL_(asinh)
560_GLIBCXX_SIMD_MATH_CALL_(atanh)
561_GLIBCXX_SIMD_MATH_CALL_(cosh)
562_GLIBCXX_SIMD_MATH_CALL_(sinh)
563_GLIBCXX_SIMD_MATH_CALL_(tanh)
564// }}}
565// exponential functions {{{
566_GLIBCXX_SIMD_MATH_CALL_(exp)
567_GLIBCXX_SIMD_MATH_CALL_(exp2)
568_GLIBCXX_SIMD_MATH_CALL_(expm1)
569
570// }}}
571// frexp {{{
572#if _GLIBCXX_SIMD_X86INTRIN
573template <typename _Tp, size_t _Np>
574  _GLIBCXX_SIMD_INTRINSIC
575  _SimdWrapper<_Tp, _Np>
576  __getexp(_SimdWrapper<_Tp, _Np> __x)
577  {
578    if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
579      return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
580    else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
581      return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
582    else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
583      return _mm_getexp_pd(__x);
584    else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
585      return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
586    else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
587      return _mm256_getexp_ps(__x);
588    else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
589      return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
590    else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
591      return _mm256_getexp_pd(__x);
592    else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
593      return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
594    else if constexpr (__is_avx512_ps<_Tp, _Np>())
595      return _mm512_getexp_ps(__x);
596    else if constexpr (__is_avx512_pd<_Tp, _Np>())
597      return _mm512_getexp_pd(__x);
598    else
599      __assert_unreachable<_Tp>();
600  }
601
602template <typename _Tp, size_t _Np>
603  _GLIBCXX_SIMD_INTRINSIC
604  _SimdWrapper<_Tp, _Np>
605  __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
606  {
607    if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
608      return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
609					   _MM_MANT_SIGN_src));
610    else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
611      return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
612					      _MM_MANT_NORM_p5_1,
613					      _MM_MANT_SIGN_src));
614    else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
615      return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
616    else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
617      return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
618				       _MM_MANT_SIGN_src));
619    else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
620      return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
621    else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
622      return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
623				       _MM_MANT_SIGN_src));
624    else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
625      return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
626    else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
627      return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
628				       _MM_MANT_SIGN_src));
629    else if constexpr (__is_avx512_ps<_Tp, _Np>())
630      return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
631    else if constexpr (__is_avx512_pd<_Tp, _Np>())
632      return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
633    else
634      __assert_unreachable<_Tp>();
635  }
636#endif // _GLIBCXX_SIMD_X86INTRIN
637
638/**
639 * splits @p __v into exponent and mantissa, the sign is kept with the mantissa
640 *
641 * The return value will be in the range [0.5, 1.0[
642 * The @p __e value will be an integer defining the power-of-two exponent
643 */
644template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
645  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
646  frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
647  {
648    if constexpr (simd_size_v<_Tp, _Abi> == 1)
649      {
650	int __tmp;
651	const auto __r = std::frexp(__x[0], &__tmp);
652	(*__exp)[0] = __tmp;
653	return __r;
654      }
655    else if constexpr (__is_fixed_size_abi_v<_Abi>)
656      return {__private_init, _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
657#if _GLIBCXX_SIMD_X86INTRIN
658    else if constexpr (__have_avx512f)
659      {
660	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
661	constexpr size_t _NI = _Np < 4 ? 4 : _Np;
662	const auto __v = __data(__x);
663	const auto __isnonzero
664	  = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
665	const _SimdWrapper<int, _NI> __exp_plus1
666	  = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
667	const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
668	  _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
669				      _SimdWrapper<int, _NI>(), __exp_plus1));
670	simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
671	return {__private_init,
672		_Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
673					      __isnonzero),
674					    __v, __getmant_avx512(__v))};
675      }
676#endif // _GLIBCXX_SIMD_X86INTRIN
677    else
678      {
679	// fallback implementation
680	static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
681	using _V = simd<_Tp, _Abi>;
682	using _IV = rebind_simd_t<int, _V>;
683	using namespace std::experimental::__proposed;
684	using namespace std::experimental::__float_bitwise_operators;
685
686	constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
687	constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
688	constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
689	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
690	  = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
691	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
692	  = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
693
694	_V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1)
695	const _IV __exponent_bits = __extract_exponent_as_int(__x);
696	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
697	  {
698	    *__exp
699	      = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
700	    return __mant;
701	  }
702
703#if __FINITE_MATH_ONLY__
704	// at least one element of __x is 0 or subnormal, the rest is normal
705	// (inf and NaN are excluded by -ffinite-math-only)
706	const auto __iszero_inf_nan = __x == 0;
707#else
708	using _Ip = __int_for_sizeof_t<_Tp>;
709	const auto __as_int = simd_bit_cast<rebind_simd_t<_Ip, _V>>(abs(__x));
710	const auto __inf = simd_bit_cast<rebind_simd_t<_Ip, _V>>(_V(__infinity_v<_Tp>));
711	const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
712	  __as_int == 0 || __as_int >= __inf);
713#endif
714
715	const _V __scaled_subnormal = __x * __subnorm_scale;
716	const _V __mant_subnormal
717	  = __p5_1_exponent & (__exponent_mask | __scaled_subnormal);
718	where(!isnormal(__x), __mant) = __mant_subnormal;
719	where(__iszero_inf_nan, __mant) = __x;
720	_IV __e = __extract_exponent_as_int(__scaled_subnormal);
721	using _MaskType =
722	  typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
723				 _V, _IV>::mask_type;
724	const _MaskType __value_isnormal = isnormal(__x).__cvt();
725	where(__value_isnormal.__cvt(), __e) = __exponent_bits;
726	static_assert(sizeof(_IV) == sizeof(__value_isnormal));
727	const _IV __offset
728	  = (simd_bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
729	      | (simd_bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
730				      & static_simd_cast<_MaskType>(__x != 0))
731		   & _IV(__exp_adjust + __exp_offset));
732	*__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
733	return __mant;
734      }
735  }
736
737// }}}
738_GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
739_GLIBCXX_SIMD_MATH_CALL_(ilogb)
740
741// logarithms {{{
742_GLIBCXX_SIMD_MATH_CALL_(log)
743_GLIBCXX_SIMD_MATH_CALL_(log10)
744_GLIBCXX_SIMD_MATH_CALL_(log1p)
745_GLIBCXX_SIMD_MATH_CALL_(log2)
746
747//}}}
748// logb{{{
749template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
750  enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
751  logb(const simd<_Tp, _Abi>& __x)
752  {
753    constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
754    if constexpr (_Np == 1)
755      return std::logb(__x[0]);
756    else if constexpr (__is_fixed_size_abi_v<_Abi>)
757      return {__private_init, _Abi::_SimdImpl::_S_logb(__data(__x))};
758#if _GLIBCXX_SIMD_X86INTRIN // {{{
759    else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
760      return {__private_init,
761	      __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
762    else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
763      return {__private_init, _mm_getexp_pd(__data(__x))};
764    else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
765      return {__private_init, _mm256_getexp_ps(__data(__x))};
766    else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
767      return {__private_init, _mm256_getexp_pd(__data(__x))};
768    else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
769      return {__private_init,
770	      __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
771    else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
772      return {__private_init,
773	      __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
774    else if constexpr (__is_avx512_ps<_Tp, _Np>())
775      return {__private_init, _mm512_getexp_ps(__data(__x))};
776    else if constexpr (__is_avx512_pd<_Tp, _Np>())
777      return {__private_init, _mm512_getexp_pd(__data(__x))};
778#endif // _GLIBCXX_SIMD_X86INTRIN }}}
779    else
780      {
781	using _V = simd<_Tp, _Abi>;
782	using namespace std::experimental::__proposed;
783	auto __is_normal = isnormal(__x);
784
785	// work on abs(__x) to reflect the return value on Linux for negative
786	// inputs (domain-error => implementation-defined value is returned)
787	const _V abs_x = abs(__x);
788
789	// __exponent(__x) returns the exponent value (bias removed) as
790	// simd<_Up> with integral _Up
791	auto&& __exponent = [](const _V& __v) {
792	  using namespace std::experimental::__proposed;
793	  using _IV = rebind_simd_t<
794	    conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
795	  return (simd_bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
796		 - (__max_exponent_v<_Tp> - 1);
797	};
798	_V __r = static_simd_cast<_V>(__exponent(abs_x));
799	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
800	  // without corner cases (nan, inf, subnormal, zero) we have our
801	  // answer:
802	  return __r;
803	const auto __is_zero = __x == 0;
804	const auto __is_nan = isnan(__x);
805	const auto __is_inf = isinf(__x);
806	where(__is_zero, __r) = -__infinity_v<_Tp>;
807	where(__is_nan, __r) = __x;
808	where(__is_inf, __r) = __infinity_v<_Tp>;
809	__is_normal |= __is_zero || __is_nan || __is_inf;
810	if (all_of(__is_normal))
811	  // at this point everything but subnormals is handled
812	  return __r;
813	// subnormals repeat the exponent extraction after multiplication of the
814	// input with __a floating point value that has 112 (0x70) in its exponent
815	// (not too big for sp and large enough for dp)
816	const _V __scaled = abs_x * _Tp(0x1p112);
817	_V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
818	where(__is_normal, __scaled_exp) = __r;
819	return __scaled_exp;
820      }
821  }
822
823//}}}
824template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
825  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
826  modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
827  {
828    if constexpr (simd_size_v<_Tp, _Abi> == 1)
829      {
830	_Tp __tmp;
831	_Tp __r = std::modf(__x[0], &__tmp);
832	__iptr[0] = __tmp;
833	return __r;
834      }
835    else
836      {
837	const auto __integral = trunc(__x);
838	*__iptr = __integral;
839	auto __r = __x - __integral;
840#if !__FINITE_MATH_ONLY__
841	where(isinf(__x), __r) = _Tp();
842#endif
843	return copysign(__r, __x);
844      }
845  }
846
847_GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
848_GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
849
850_GLIBCXX_SIMD_MATH_CALL_(cbrt)
851
852_GLIBCXX_SIMD_MATH_CALL_(abs)
853_GLIBCXX_SIMD_MATH_CALL_(fabs)
854
855// [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
856// allow signed integral _Tp
857template <typename _Tp, typename _Abi>
858  _GLIBCXX_SIMD_ALWAYS_INLINE
859  enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
860  abs(const simd<_Tp, _Abi>& __x)
861  { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
862
863#define _GLIBCXX_SIMD_CVTING2(_NAME)                                           \
864template <typename _Tp, typename _Abi>                                         \
865  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
866    const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
867  {                                                                            \
868    return _NAME(__x, __y);                                                    \
869  }                                                                            \
870                                                                               \
871template <typename _Tp, typename _Abi>                                         \
872  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
873    const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
874  {                                                                            \
875    return _NAME(__x, __y);                                                    \
876  }
877
878#define _GLIBCXX_SIMD_CVTING3(_NAME)                                           \
879template <typename _Tp, typename _Abi>                                         \
880  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
881    const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
882    const simd<_Tp, _Abi>& __z)                                                \
883  {                                                                            \
884    return _NAME(__x, __y, __z);                                               \
885  }                                                                            \
886                                                                               \
887template <typename _Tp, typename _Abi>                                         \
888  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
889    const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
890    const simd<_Tp, _Abi>& __z)                                                \
891  {                                                                            \
892    return _NAME(__x, __y, __z);                                               \
893  }                                                                            \
894                                                                               \
895template <typename _Tp, typename _Abi>                                         \
896  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
897    const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,                    \
898    const __type_identity_t<simd<_Tp, _Abi>>& __z)                             \
899  {                                                                            \
900    return _NAME(__x, __y, __z);                                               \
901  }                                                                            \
902                                                                               \
903template <typename _Tp, typename _Abi>                                         \
904  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
905    const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
906    const __type_identity_t<simd<_Tp, _Abi>>& __z)                             \
907  {                                                                            \
908    return _NAME(__x, __y, __z);                                               \
909  }                                                                            \
910                                                                               \
911template <typename _Tp, typename _Abi>                                         \
912  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
913    const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
914    const __type_identity_t<simd<_Tp, _Abi>>& __z)                             \
915  {                                                                            \
916    return _NAME(__x, __y, __z);                                               \
917  }                                                                            \
918                                                                               \
919template <typename _Tp, typename _Abi>                                         \
920  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
921    const __type_identity_t<simd<_Tp, _Abi>>& __x,                             \
922    const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
923  {                                                                            \
924    return _NAME(__x, __y, __z);                                               \
925  }
926
927template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
928  _GLIBCXX_SIMD_INTRINSIC _R
929  __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
930		     const _Tps&... __args)
931  {
932    return {__private_init,
933	    __data(__arg0)._M_apply_per_chunk(
934	      [&](auto __impl, const auto&... __inner) {
935		using _V = typename decltype(__impl)::simd_type;
936		return __data(__apply(_V(__private_init, __inner)...));
937	      },
938	      __data(__args)...)};
939  }
940
941template <typename _VV, typename = __detail::__odr_helper>
942  __remove_cvref_t<_VV>
943  __hypot(_VV __x, _VV __y)
944  {
945    using _V = __remove_cvref_t<_VV>;
946    using _Tp = typename _V::value_type;
947    if constexpr (_V::size() == 1)
948      return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
949    else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
950      {
951	return __fixed_size_apply<_V>([](auto __a,
952					 auto __b) { return hypot(__a, __b); },
953				      __x, __y);
954      }
955    else
956      {
957	// A simple solution for _Tp == float would be to cast to double and
958	// simply calculate sqrt(x��+y��) as it can't over-/underflow anymore with
959	// dp. It still needs the Annex F fixups though and isn't faster on
960	// Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
961	// AVX-512).
962	using namespace __float_bitwise_operators;
963	using namespace __proposed;
964	_V __absx = abs(__x);          // no error
965	_V __absy = abs(__y);          // no error
966	_V __hi = max(__absx, __absy); // no error
967	_V __lo = min(__absy, __absx); // no error
968
969	// round __hi down to the next power-of-2:
970	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
971
972#ifndef __FAST_MATH__
973	if constexpr (__have_neon && !__have_neon_a32)
974	  { // With ARMv7 NEON, we have no subnormals and must use slightly
975	    // different strategy
976	    const _V __hi_exp = __hi & __inf;
977	    _V __scale_back = __hi_exp;
978	    // For large exponents (max & max/2) the inversion comes too close
979	    // to subnormals. Subtract 3 from the exponent:
980	    where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
981	    // Invert and adjust for the off-by-one error of inversion via xor:
982	    const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
983	    const _V __h1 = __hi * __scale;
984	    const _V __l1 = __lo * __scale;
985	    _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
986	    // Fix up hypot(0, 0) to not be NaN:
987	    where(__hi == 0, __r) = 0;
988	    return __r;
989	  }
990#endif
991
992#ifdef __FAST_MATH__
993	// With fast-math, ignore precision of subnormals and inputs from
994	// __finite_max_v/2 to __finite_max_v. This removes all
995	// branching/masking.
996	if constexpr (true)
997#else
998	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
999				    && all_of(isnormal(__y))))
1000#endif
1001	  {
1002	    const _V __hi_exp = __hi & __inf;
1003	    //((__hi + __hi) & __inf) ^ __inf almost works for computing
1004	    //__scale,
1005	    // except when (__hi + __hi) & __inf == __inf, in which case __scale
1006	    // becomes 0 (should be min/2 instead) and thus loses the
1007	    // information from __lo.
1008#ifdef __FAST_MATH__
1009	    using _Ip = __int_for_sizeof_t<_Tp>;
1010	    using _IV = rebind_simd_t<_Ip, _V>;
1011	    const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
1012	    const _V __scale
1013	      = simd_bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
1014#else
1015	    const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1016#endif
1017	    _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
1018	      = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1019	    const _V __h1 = (__hi & __mant_mask) | _V(1);
1020	    const _V __l1 = __lo * __scale;
1021	    return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1022	  }
1023	else
1024	  {
1025	    // slower path to support subnormals
1026	    // if __hi is subnormal, avoid scaling by inf & final mul by 0
1027	    // (which yields NaN) by using min()
1028	    _V __scale = _V(1 / __norm_min_v<_Tp>);
1029	    // invert exponent w/o error and w/o using the slow divider unit:
1030	    // xor inverts the exponent but off by 1. Multiplication with .5
1031	    // adjusts for the discrepancy.
1032	    where(__hi >= __norm_min_v<_Tp>, __scale)
1033	      = ((__hi & __inf) ^ __inf) * _Tp(.5);
1034	    // adjust final exponent for subnormal inputs
1035	    _V __hi_exp = __norm_min_v<_Tp>;
1036	    where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1037	      = __hi & __inf;         // no error
1038	    _V __h1 = __hi * __scale; // no error
1039	    _V __l1 = __lo * __scale; // no error
1040
1041	    // sqrt(x��+y��) = e*sqrt((x/e)��+(y/e)��):
1042	    // this ensures no overflow in the argument to sqrt
1043	    _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1044#ifdef __STDC_IEC_559__
1045	    // fixup for Annex F requirements
1046	    // the naive fixup goes like this:
1047	    //
1048	    // where(__l1 == 0, __r)                      = __hi;
1049	    // where(isunordered(__x, __y), __r)          = __quiet_NaN_v<_Tp>;
1050	    // where(isinf(__absx) || isinf(__absy), __r) = __inf;
1051	    //
1052	    // The fixup can be prepared in parallel with the sqrt, requiring a
1053	    // single blend step after hi_exp * sqrt, reducing latency and
1054	    // throughput:
1055	    _V __fixup = __hi; // __lo == 0
1056	    where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
1057	    where(isinf(__absx) || isinf(__absy), __fixup) = __inf;
1058	    where(!(__lo == 0 || isunordered(__x, __y)
1059		    || (isinf(__absx) || isinf(__absy))),
1060		  __fixup)
1061	      = __r;
1062	    __r = __fixup;
1063#endif
1064	    return __r;
1065	  }
1066      }
1067  }
1068
1069template <typename _Tp, typename _Abi>
1070  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1071  hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1072  {
1073    return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1074				 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1075									   __y);
1076  }
1077
1078_GLIBCXX_SIMD_CVTING2(hypot)
1079
1080  template <typename _VV, typename = __detail::__odr_helper>
1081  __remove_cvref_t<_VV>
1082  __hypot(_VV __x, _VV __y, _VV __z)
1083  {
1084    using _V = __remove_cvref_t<_VV>;
1085    using _Abi = typename _V::abi_type;
1086    using _Tp = typename _V::value_type;
1087    /* FIXME: enable after PR77776 is resolved
1088    if constexpr (_V::size() == 1)
1089      return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
1090    else
1091    */
1092    if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
1093      {
1094	return __fixed_size_apply<simd<_Tp, _Abi>>(
1095	  [](auto __a, auto __b, auto __c) { return hypot(__a, __b, __c); },
1096	  __x, __y, __z);
1097      }
1098    else
1099      {
1100	using namespace __float_bitwise_operators;
1101	using namespace __proposed;
1102	const _V __absx = abs(__x);                 // no error
1103	const _V __absy = abs(__y);                 // no error
1104	const _V __absz = abs(__z);                 // no error
1105	_V __hi = max(max(__absx, __absy), __absz); // no error
1106	_V __l0 = min(__absz, max(__absx, __absy)); // no error
1107	_V __l1 = min(__absy, __absx);              // no error
1108	if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
1109		      && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
1110	  { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
1111	    // NaN. In this case the bit-tricks don't work, they require IEC559
1112	    // binary32 or binary64 format.
1113#ifdef __STDC_IEC_559__
1114	    // fixup for Annex F requirements
1115	    if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0]))
1116	      return __infinity_v<_Tp>;
1117	    else if (isunordered(__absx[0], __absy[0] + __absz[0]))
1118	      return __quiet_NaN_v<_Tp>;
1119	    else if (__l0[0] == 0 && __l1[0] == 0)
1120	      return __hi;
1121#endif
1122	    _V __hi_exp = __hi;
1123	    const _ULLong __tmp = 0x8000'0000'0000'0000ull;
1124	    __builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
1125	    const _V __scale = 1 / __hi_exp;
1126	    __hi *= __scale;
1127	    __l0 *= __scale;
1128	    __l1 *= __scale;
1129	    return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
1130	  }
1131	else
1132	  {
1133	    // round __hi down to the next power-of-2:
1134	    _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
1135
1136#ifndef __FAST_MATH__
1137	    if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32)
1138	      { // With ARMv7 NEON, we have no subnormals and must use slightly
1139		// different strategy
1140		const _V __hi_exp = __hi & __inf;
1141		_V __scale_back = __hi_exp;
1142		// For large exponents (max & max/2) the inversion comes too
1143		// close to subnormals. Subtract 3 from the exponent:
1144		where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
1145		// Invert and adjust for the off-by-one error of inversion via
1146		// xor:
1147		const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
1148		const _V __h1 = __hi * __scale;
1149		__l0 *= __scale;
1150		__l1 *= __scale;
1151		_V __lo = __l0 * __l0
1152			  + __l1 * __l1; // add the two smaller values first
1153		asm("" : "+m"(__lo));
1154		_V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
1155		// Fix up hypot(0, 0, 0) to not be NaN:
1156		where(__hi == 0, __r) = 0;
1157		return __r;
1158	      }
1159#endif
1160
1161#ifdef __FAST_MATH__
1162	    // With fast-math, ignore precision of subnormals and inputs from
1163	    // __finite_max_v/2 to __finite_max_v. This removes all
1164	    // branching/masking.
1165	    if constexpr (true)
1166#else
1167	    if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1168					&& all_of(isnormal(__y))
1169					&& all_of(isnormal(__z))))
1170#endif
1171	      {
1172		const _V __hi_exp = __hi & __inf;
1173		//((__hi + __hi) & __inf) ^ __inf almost works for computing
1174		//__scale, except when (__hi + __hi) & __inf == __inf, in which
1175		// case __scale
1176		// becomes 0 (should be min/2 instead) and thus loses the
1177		// information from __lo.
1178#ifdef __FAST_MATH__
1179		using _Ip = __int_for_sizeof_t<_Tp>;
1180		using _IV = rebind_simd_t<_Ip, _V>;
1181		const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
1182		const _V __scale
1183		  = simd_bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
1184#else
1185		const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1186#endif
1187		constexpr _Tp __mant_mask
1188		  = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1189		const _V __h1 = (__hi & _V(__mant_mask)) | _V(1);
1190		__l0 *= __scale;
1191		__l1 *= __scale;
1192		const _V __lo
1193		  = __l0 * __l0
1194		    + __l1 * __l1; // add the two smaller values first
1195		return __hi_exp * sqrt(__lo + __h1 * __h1);
1196	      }
1197	    else
1198	      {
1199		// slower path to support subnormals
1200		// if __hi is subnormal, avoid scaling by inf & final mul by 0
1201		// (which yields NaN) by using min()
1202		_V __scale = _V(1 / __norm_min_v<_Tp>);
1203		// invert exponent w/o error and w/o using the slow divider
1204		// unit: xor inverts the exponent but off by 1. Multiplication
1205		// with .5 adjusts for the discrepancy.
1206		where(__hi >= __norm_min_v<_Tp>, __scale)
1207		  = ((__hi & __inf) ^ __inf) * _Tp(.5);
1208		// adjust final exponent for subnormal inputs
1209		_V __hi_exp = __norm_min_v<_Tp>;
1210		where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1211		  = __hi & __inf;         // no error
1212		_V __h1 = __hi * __scale; // no error
1213		__l0 *= __scale;          // no error
1214		__l1 *= __scale;          // no error
1215		_V __lo = __l0 * __l0
1216			  + __l1 * __l1; // add the two smaller values first
1217		_V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
1218#ifdef __STDC_IEC_559__
1219		// fixup for Annex F requirements
1220		_V __fixup = __hi; // __lo == 0
1221		// where(__lo == 0, __fixup)                   = __hi;
1222		where(isunordered(__x, __y + __z), __fixup)
1223		  = __quiet_NaN_v<_Tp>;
1224		where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup)
1225		  = __inf;
1226		// Instead of __lo == 0, the following could depend on __h1�� ==
1227		// __h1�� + __lo (i.e. __hi is so much larger than the other two
1228		// inputs that the result is exactly __hi). While this may
1229		// improve precision, it is likely to reduce efficiency if the
1230		// ISA has FMAs (because __h1�� + __lo is an FMA, but the
1231		// intermediate
1232		// __h1�� must be kept)
1233		where(!(__lo == 0 || isunordered(__x, __y + __z)
1234			|| isinf(__absx) || isinf(__absy) || isinf(__absz)),
1235		      __fixup)
1236		  = __r;
1237		__r = __fixup;
1238#endif
1239		return __r;
1240	      }
1241	  }
1242      }
1243  }
1244
1245  template <typename _Tp, typename _Abi>
1246  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1247  hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
1248	const simd<_Tp, _Abi>& __z)
1249  {
1250    return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1251				 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1252									   __y,
1253									   __z);
1254  }
1255
1256_GLIBCXX_SIMD_CVTING3(hypot)
1257
1258_GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
1259
1260_GLIBCXX_SIMD_MATH_CALL_(sqrt)
1261_GLIBCXX_SIMD_MATH_CALL_(erf)
1262_GLIBCXX_SIMD_MATH_CALL_(erfc)
1263_GLIBCXX_SIMD_MATH_CALL_(lgamma)
1264_GLIBCXX_SIMD_MATH_CALL_(tgamma)
1265_GLIBCXX_SIMD_MATH_CALL_(ceil)
1266_GLIBCXX_SIMD_MATH_CALL_(floor)
1267_GLIBCXX_SIMD_MATH_CALL_(nearbyint)
1268_GLIBCXX_SIMD_MATH_CALL_(rint)
1269_GLIBCXX_SIMD_MATH_CALL_(lrint)
1270_GLIBCXX_SIMD_MATH_CALL_(llrint)
1271
1272_GLIBCXX_SIMD_MATH_CALL_(round)
1273_GLIBCXX_SIMD_MATH_CALL_(lround)
1274_GLIBCXX_SIMD_MATH_CALL_(llround)
1275
1276_GLIBCXX_SIMD_MATH_CALL_(trunc)
1277
1278_GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
1279_GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
1280_GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
1281
1282template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1283  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1284  copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1285  {
1286    if constexpr (simd_size_v<_Tp, _Abi> == 1)
1287      return std::copysign(__x[0], __y[0]);
1288    else if constexpr (__is_fixed_size_abi_v<_Abi>)
1289      return {__private_init, _Abi::_SimdImpl::_S_copysign(__data(__x), __data(__y))};
1290    else
1291      {
1292	using _V = simd<_Tp, _Abi>;
1293	using namespace std::experimental::__float_bitwise_operators;
1294	_GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
1295	return (__x & ~__signmask) | (__y & __signmask);
1296      }
1297  }
1298
1299_GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
1300// not covered in [parallel.simd.math]:
1301// _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
1302_GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
1303_GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
1304_GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
1305
1306_GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
1307_GLIBCXX_SIMD_MATH_CALL_(fpclassify)
1308_GLIBCXX_SIMD_MATH_CALL_(isfinite)
1309
1310// isnan and isinf require special treatment because old glibc may declare
1311// `int isinf(double)`.
1312template <typename _Tp, typename _Abi, typename...,
1313	  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1314  _GLIBCXX_SIMD_ALWAYS_INLINE
1315  enable_if_t<is_floating_point_v<_Tp>, _R>
1316  isinf(simd<_Tp, _Abi> __x)
1317  { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
1318
1319template <typename _Tp, typename _Abi, typename...,
1320	  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1321  _GLIBCXX_SIMD_ALWAYS_INLINE
1322  enable_if_t<is_floating_point_v<_Tp>, _R>
1323  isnan(simd<_Tp, _Abi> __x)
1324  { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
1325
1326_GLIBCXX_SIMD_MATH_CALL_(isnormal)
1327
1328template <typename..., typename _Tp, typename _Abi>
1329  _GLIBCXX_SIMD_ALWAYS_INLINE
1330  simd_mask<_Tp, _Abi>
1331  signbit(simd<_Tp, _Abi> __x)
1332  {
1333    if constexpr (is_integral_v<_Tp>)
1334      {
1335	if constexpr (is_unsigned_v<_Tp>)
1336	  return simd_mask<_Tp, _Abi>{}; // false
1337	else
1338	  return __x < 0;
1339      }
1340    else
1341      return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
1342  }
1343
1344_GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
1345_GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
1346_GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
1347_GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
1348_GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
1349_GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
1350
1351/* not covered in [parallel.simd.math]
1352template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
1353template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
1354template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
1355
1356template <typename _V> struct simd_div_t {
1357    _V quot, rem;
1358};
1359
1360template <typename _Abi>
1361simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
1362					 _SCharv<_Abi> denom);
1363template <typename _Abi>
1364simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
1365					 __shortv<_Abi> denom);
1366template <typename _Abi>
1367simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
1368template <typename _Abi>
1369simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
1370					__longv<_Abi> denom);
1371template <typename _Abi>
1372simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
1373					 __llongv<_Abi> denom);
1374*/
1375
1376// special math {{{
1377template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1378  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1379  assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1380		 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1381		 const simd<_Tp, _Abi>& __x)
1382  {
1383    return simd<_Tp, _Abi>([&](auto __i) {
1384      return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
1385    });
1386  }
1387
1388template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1389  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1390  assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1391		 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1392		 const simd<_Tp, _Abi>& __x)
1393  {
1394    return simd<_Tp, _Abi>([&](auto __i) {
1395      return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
1396    });
1397  }
1398
1399_GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
1400_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
1401_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
1402_GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
1403_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
1404_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
1405_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
1406_GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
1407_GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
1408_GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
1409_GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
1410_GLIBCXX_SIMD_MATH_CALL_(expint)
1411
1412template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1413  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1414  hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1415	  const simd<_Tp, _Abi>& __x)
1416  {
1417    return simd<_Tp, _Abi>(
1418      [&](auto __i) { return std::hermite(__n[__i], __x[__i]); });
1419  }
1420
1421template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1422  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1423  laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1424	   const simd<_Tp, _Abi>& __x)
1425  {
1426    return simd<_Tp, _Abi>(
1427      [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); });
1428  }
1429
1430template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1431  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1432  legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1433	   const simd<_Tp, _Abi>& __x)
1434  {
1435    return simd<_Tp, _Abi>(
1436      [&](auto __i) { return std::legendre(__n[__i], __x[__i]); });
1437  }
1438
1439_GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
1440
1441template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1442  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1443  sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1444	     const simd<_Tp, _Abi>& __x)
1445  {
1446    return simd<_Tp, _Abi>(
1447      [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); });
1448  }
1449
1450template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1451  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1452  sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
1453	       const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1454	       const simd<_Tp, _Abi>& theta)
1455  {
1456    return simd<_Tp, _Abi>([&](auto __i) {
1457      return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
1458    });
1459  }
1460
1461template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1462  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1463  sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1464	      const simd<_Tp, _Abi>& __x)
1465  {
1466    return simd<_Tp, _Abi>(
1467      [&](auto __i) { return std::sph_neumann(__n[__i], __x[__i]); });
1468  }
1469// }}}
1470
1471#undef _GLIBCXX_SIMD_CVTING2
1472#undef _GLIBCXX_SIMD_CVTING3
1473#undef _GLIBCXX_SIMD_MATH_CALL_
1474#undef _GLIBCXX_SIMD_MATH_CALL2_
1475#undef _GLIBCXX_SIMD_MATH_CALL3_
1476
1477_GLIBCXX_SIMD_END_NAMESPACE
1478
1479#endif // __cplusplus >= 201703L
1480#endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
1481
1482// vim: foldmethod=marker sw=2 ts=8 noet sts=2
1483