1/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __AMMINTRIN_H 11#define __AMMINTRIN_H 12 13#include <pmmintrin.h> 14 15/* Define the default attributes for the functions in this file. */ 16#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128))) 17 18/// Extracts the specified bits from the lower 64 bits of the 128-bit 19/// integer vector operand at the index \a idx and of the length \a len. 20/// 21/// \headerfile <x86intrin.h> 22/// 23/// \code 24/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx); 25/// \endcode 26/// 27/// This intrinsic corresponds to the <c> EXTRQ </c> instruction. 28/// 29/// \param x 30/// The value from which bits are extracted. 31/// \param len 32/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 33/// are zero, the length is interpreted as 64. 34/// \param idx 35/// Bits [5:0] specify the index of the least significant bit; the other 36/// bits are ignored. If the sum of the index and length is greater than 64, 37/// the result is undefined. If the length and index are both zero, bits 38/// [63:0] of parameter \a x are extracted. If the length is zero but the 39/// index is non-zero, the result is undefined. 40/// \returns A 128-bit integer vector whose lower 64 bits contain the bits 41/// extracted from the source operand. 42#define _mm_extracti_si64(x, len, idx) \ 43 ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ 44 (char)(len), (char)(idx))) 45 46/// Extracts the specified bits from the lower 64 bits of the 128-bit 47/// integer vector operand at the index and of the length specified by 48/// \a __y. 49/// 50/// \headerfile <x86intrin.h> 51/// 52/// This intrinsic corresponds to the <c> EXTRQ </c> instruction. 53/// 54/// \param __x 55/// The value from which bits are extracted. 56/// \param __y 57/// Specifies the index of the least significant bit at [13:8] and the 58/// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the 59/// length is interpreted as 64. If the sum of the index and length is 60/// greater than 64, the result is undefined. If the length and index are 61/// both zero, bits [63:0] of parameter \a __x are extracted. If the length 62/// is zero but the index is non-zero, the result is undefined. 63/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted 64/// from the source operand. 65static __inline__ __m128i __DEFAULT_FN_ATTRS 66_mm_extract_si64(__m128i __x, __m128i __y) 67{ 68 return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); 69} 70 71/// Inserts bits of a specified length from the source integer vector 72/// \a y into the lower 64 bits of the destination integer vector \a x at 73/// the index \a idx and of the length \a len. 74/// 75/// \headerfile <x86intrin.h> 76/// 77/// \code 78/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len, 79/// const int idx); 80/// \endcode 81/// 82/// This intrinsic corresponds to the <c> INSERTQ </c> instruction. 83/// 84/// \param x 85/// The destination operand where bits will be inserted. The inserted bits 86/// are defined by the length \a len and by the index \a idx specifying the 87/// least significant bit. 88/// \param y 89/// The source operand containing the bits to be extracted. The extracted 90/// bits are the least significant bits of operand \a y of length \a len. 91/// \param len 92/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 93/// are zero, the length is interpreted as 64. 94/// \param idx 95/// Bits [5:0] specify the index of the least significant bit; the other 96/// bits are ignored. If the sum of the index and length is greater than 64, 97/// the result is undefined. If the length and index are both zero, bits 98/// [63:0] of parameter \a y are inserted into parameter \a x. If the length 99/// is zero but the index is non-zero, the result is undefined. 100/// \returns A 128-bit integer vector containing the original lower 64-bits of 101/// destination operand \a x with the specified bitfields replaced by the 102/// lower bits of source operand \a y. The upper 64 bits of the return value 103/// are undefined. 104#define _mm_inserti_si64(x, y, len, idx) \ 105 ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ 106 (__v2di)(__m128i)(y), \ 107 (char)(len), (char)(idx))) 108 109/// Inserts bits of a specified length from the source integer vector 110/// \a __y into the lower 64 bits of the destination integer vector \a __x 111/// at the index and of the length specified by \a __y. 112/// 113/// \headerfile <x86intrin.h> 114/// 115/// This intrinsic corresponds to the <c> INSERTQ </c> instruction. 116/// 117/// \param __x 118/// The destination operand where bits will be inserted. The inserted bits 119/// are defined by the length and by the index of the least significant bit 120/// specified by operand \a __y. 121/// \param __y 122/// The source operand containing the bits to be extracted. The extracted 123/// bits are the least significant bits of operand \a __y with length 124/// specified by bits [69:64]. These are inserted into the destination at the 125/// index specified by bits [77:72]; all other bits are ignored. If bits 126/// [69:64] are zero, the length is interpreted as 64. If the sum of the 127/// index and length is greater than 64, the result is undefined. If the 128/// length and index are both zero, bits [63:0] of parameter \a __y are 129/// inserted into parameter \a __x. If the length is zero but the index is 130/// non-zero, the result is undefined. 131/// \returns A 128-bit integer vector containing the original lower 64-bits of 132/// destination operand \a __x with the specified bitfields replaced by the 133/// lower bits of source operand \a __y. The upper 64 bits of the return 134/// value are undefined. 135static __inline__ __m128i __DEFAULT_FN_ATTRS 136_mm_insert_si64(__m128i __x, __m128i __y) 137{ 138 return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); 139} 140 141/// Stores a 64-bit double-precision value in a 64-bit memory location. 142/// To minimize caching, the data is flagged as non-temporal (unlikely to be 143/// used again soon). 144/// 145/// \headerfile <x86intrin.h> 146/// 147/// This intrinsic corresponds to the <c> MOVNTSD </c> instruction. 148/// 149/// \param __p 150/// The 64-bit memory location used to store the register value. 151/// \param __a 152/// The 64-bit double-precision floating-point register value to be stored. 153static __inline__ void __DEFAULT_FN_ATTRS 154_mm_stream_sd(double *__p, __m128d __a) 155{ 156 __builtin_ia32_movntsd(__p, (__v2df)__a); 157} 158 159/// Stores a 32-bit single-precision floating-point value in a 32-bit 160/// memory location. To minimize caching, the data is flagged as 161/// non-temporal (unlikely to be used again soon). 162/// 163/// \headerfile <x86intrin.h> 164/// 165/// This intrinsic corresponds to the <c> MOVNTSS </c> instruction. 166/// 167/// \param __p 168/// The 32-bit memory location used to store the register value. 169/// \param __a 170/// The 32-bit single-precision floating-point register value to be stored. 171static __inline__ void __DEFAULT_FN_ATTRS 172_mm_stream_ss(float *__p, __m128 __a) 173{ 174 __builtin_ia32_movntss(__p, (__v4sf)__a); 175} 176 177#undef __DEFAULT_FN_ATTRS 178 179#endif /* __AMMINTRIN_H */ 180