1/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __AMMINTRIN_H 11#define __AMMINTRIN_H 12 13#if !defined(__i386__) && !defined(__x86_64__) 14#error "This header is only meant to be used on x86 and x64 architecture" 15#endif 16 17#include <pmmintrin.h> 18 19/* Define the default attributes for the functions in this file. */ 20#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128))) 21 22/// Extracts the specified bits from the lower 64 bits of the 128-bit 23/// integer vector operand at the index \a idx and of the length \a len. 24/// 25/// \headerfile <x86intrin.h> 26/// 27/// \code 28/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx); 29/// \endcode 30/// 31/// This intrinsic corresponds to the <c> EXTRQ </c> instruction. 32/// 33/// \param x 34/// The value from which bits are extracted. 35/// \param len 36/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 37/// are zero, the length is interpreted as 64. 38/// \param idx 39/// Bits [5:0] specify the index of the least significant bit; the other 40/// bits are ignored. If the sum of the index and length is greater than 64, 41/// the result is undefined. If the length and index are both zero, bits 42/// [63:0] of parameter \a x are extracted. If the length is zero but the 43/// index is non-zero, the result is undefined. 44/// \returns A 128-bit integer vector whose lower 64 bits contain the bits 45/// extracted from the source operand. 46#define _mm_extracti_si64(x, len, idx) \ 47 ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ 48 (char)(len), (char)(idx))) 49 50/// Extracts the specified bits from the lower 64 bits of the 128-bit 51/// integer vector operand at the index and of the length specified by 52/// \a __y. 53/// 54/// \headerfile <x86intrin.h> 55/// 56/// This intrinsic corresponds to the <c> EXTRQ </c> instruction. 57/// 58/// \param __x 59/// The value from which bits are extracted. 60/// \param __y 61/// Specifies the index of the least significant bit at [13:8] and the 62/// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the 63/// length is interpreted as 64. If the sum of the index and length is 64/// greater than 64, the result is undefined. If the length and index are 65/// both zero, bits [63:0] of parameter \a __x are extracted. If the length 66/// is zero but the index is non-zero, the result is undefined. 67/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted 68/// from the source operand. 69static __inline__ __m128i __DEFAULT_FN_ATTRS 70_mm_extract_si64(__m128i __x, __m128i __y) 71{ 72 return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); 73} 74 75/// Inserts bits of a specified length from the source integer vector 76/// \a y into the lower 64 bits of the destination integer vector \a x at 77/// the index \a idx and of the length \a len. 78/// 79/// \headerfile <x86intrin.h> 80/// 81/// \code 82/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len, 83/// const int idx); 84/// \endcode 85/// 86/// This intrinsic corresponds to the <c> INSERTQ </c> instruction. 87/// 88/// \param x 89/// The destination operand where bits will be inserted. The inserted bits 90/// are defined by the length \a len and by the index \a idx specifying the 91/// least significant bit. 92/// \param y 93/// The source operand containing the bits to be extracted. The extracted 94/// bits are the least significant bits of operand \a y of length \a len. 95/// \param len 96/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 97/// are zero, the length is interpreted as 64. 98/// \param idx 99/// Bits [5:0] specify the index of the least significant bit; the other 100/// bits are ignored. If the sum of the index and length is greater than 64, 101/// the result is undefined. If the length and index are both zero, bits 102/// [63:0] of parameter \a y are inserted into parameter \a x. If the length 103/// is zero but the index is non-zero, the result is undefined. 104/// \returns A 128-bit integer vector containing the original lower 64-bits of 105/// destination operand \a x with the specified bitfields replaced by the 106/// lower bits of source operand \a y. The upper 64 bits of the return value 107/// are undefined. 108#define _mm_inserti_si64(x, y, len, idx) \ 109 ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ 110 (__v2di)(__m128i)(y), \ 111 (char)(len), (char)(idx))) 112 113/// Inserts bits of a specified length from the source integer vector 114/// \a __y into the lower 64 bits of the destination integer vector \a __x 115/// at the index and of the length specified by \a __y. 116/// 117/// \headerfile <x86intrin.h> 118/// 119/// This intrinsic corresponds to the <c> INSERTQ </c> instruction. 120/// 121/// \param __x 122/// The destination operand where bits will be inserted. The inserted bits 123/// are defined by the length and by the index of the least significant bit 124/// specified by operand \a __y. 125/// \param __y 126/// The source operand containing the bits to be extracted. The extracted 127/// bits are the least significant bits of operand \a __y with length 128/// specified by bits [69:64]. These are inserted into the destination at the 129/// index specified by bits [77:72]; all other bits are ignored. If bits 130/// [69:64] are zero, the length is interpreted as 64. If the sum of the 131/// index and length is greater than 64, the result is undefined. If the 132/// length and index are both zero, bits [63:0] of parameter \a __y are 133/// inserted into parameter \a __x. If the length is zero but the index is 134/// non-zero, the result is undefined. 135/// \returns A 128-bit integer vector containing the original lower 64-bits of 136/// destination operand \a __x with the specified bitfields replaced by the 137/// lower bits of source operand \a __y. The upper 64 bits of the return 138/// value are undefined. 139static __inline__ __m128i __DEFAULT_FN_ATTRS 140_mm_insert_si64(__m128i __x, __m128i __y) 141{ 142 return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); 143} 144 145/// Stores a 64-bit double-precision value in a 64-bit memory location. 146/// To minimize caching, the data is flagged as non-temporal (unlikely to be 147/// used again soon). 148/// 149/// \headerfile <x86intrin.h> 150/// 151/// This intrinsic corresponds to the <c> MOVNTSD </c> instruction. 152/// 153/// \param __p 154/// The 64-bit memory location used to store the register value. 155/// \param __a 156/// The 64-bit double-precision floating-point register value to be stored. 157static __inline__ void __DEFAULT_FN_ATTRS 158_mm_stream_sd(void *__p, __m128d __a) 159{ 160 __builtin_ia32_movntsd((double *)__p, (__v2df)__a); 161} 162 163/// Stores a 32-bit single-precision floating-point value in a 32-bit 164/// memory location. To minimize caching, the data is flagged as 165/// non-temporal (unlikely to be used again soon). 166/// 167/// \headerfile <x86intrin.h> 168/// 169/// This intrinsic corresponds to the <c> MOVNTSS </c> instruction. 170/// 171/// \param __p 172/// The 32-bit memory location used to store the register value. 173/// \param __a 174/// The 32-bit single-precision floating-point register value to be stored. 175static __inline__ void __DEFAULT_FN_ATTRS 176_mm_stream_ss(void *__p, __m128 __a) 177{ 178 __builtin_ia32_movntss((float *)__p, (__v4sf)__a); 179} 180 181#undef __DEFAULT_FN_ATTRS 182 183#endif /* __AMMINTRIN_H */ 184