1239313Sdim/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== 2239313Sdim * 3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim * See https://llvm.org/LICENSE.txt for license information. 5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6239313Sdim * 7239313Sdim *===-----------------------------------------------------------------------=== 8239313Sdim */ 9239313Sdim 10239313Sdim#ifndef __AMMINTRIN_H 11239313Sdim#define __AMMINTRIN_H 12239313Sdim 13239313Sdim#include <pmmintrin.h> 14239313Sdim 15288943Sdim/* Define the default attributes for the functions in this file. */ 16341825Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128))) 17288943Sdim 18341825Sdim/// Extracts the specified bits from the lower 64 bits of the 128-bit 19314564Sdim/// integer vector operand at the index \a idx and of the length \a len. 20288943Sdim/// 21288943Sdim/// \headerfile <x86intrin.h> 22288943Sdim/// 23296417Sdim/// \code 24288943Sdim/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx); 25296417Sdim/// \endcode 26288943Sdim/// 27314564Sdim/// This intrinsic corresponds to the <c> EXTRQ </c> instruction. 28288943Sdim/// 29288943Sdim/// \param x 30288943Sdim/// The value from which bits are extracted. 31288943Sdim/// \param len 32288943Sdim/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 33288943Sdim/// are zero, the length is interpreted as 64. 34288943Sdim/// \param idx 35296417Sdim/// Bits [5:0] specify the index of the least significant bit; the other 36309124Sdim/// bits are ignored. If the sum of the index and length is greater than 64, 37309124Sdim/// the result is undefined. If the length and index are both zero, bits 38314564Sdim/// [63:0] of parameter \a x are extracted. If the length is zero but the 39314564Sdim/// index is non-zero, the result is undefined. 40288943Sdim/// \returns A 128-bit integer vector whose lower 64 bits contain the bits 41288943Sdim/// extracted from the source operand. 42239313Sdim#define _mm_extracti_si64(x, len, idx) \ 43239313Sdim ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ 44239313Sdim (char)(len), (char)(idx))) 45239313Sdim 46341825Sdim/// Extracts the specified bits from the lower 64 bits of the 128-bit 47314564Sdim/// integer vector operand at the index and of the length specified by 48314564Sdim/// \a __y. 49288943Sdim/// 50288943Sdim/// \headerfile <x86intrin.h> 51288943Sdim/// 52314564Sdim/// This intrinsic corresponds to the <c> EXTRQ </c> instruction. 53288943Sdim/// 54288943Sdim/// \param __x 55288943Sdim/// The value from which bits are extracted. 56288943Sdim/// \param __y 57309124Sdim/// Specifies the index of the least significant bit at [13:8] and the 58309124Sdim/// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the 59309124Sdim/// length is interpreted as 64. If the sum of the index and length is 60309124Sdim/// greater than 64, the result is undefined. If the length and index are 61314564Sdim/// both zero, bits [63:0] of parameter \a __x are extracted. If the length 62314564Sdim/// is zero but the index is non-zero, the result is undefined. 63296417Sdim/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted 64288943Sdim/// from the source operand. 65288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 66239313Sdim_mm_extract_si64(__m128i __x, __m128i __y) 67239313Sdim{ 68239313Sdim return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); 69239313Sdim} 70239313Sdim 71341825Sdim/// Inserts bits of a specified length from the source integer vector 72314564Sdim/// \a y into the lower 64 bits of the destination integer vector \a x at 73314564Sdim/// the index \a idx and of the length \a len. 74288943Sdim/// 75288943Sdim/// \headerfile <x86intrin.h> 76288943Sdim/// 77296417Sdim/// \code 78288943Sdim/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len, 79288943Sdim/// const int idx); 80296417Sdim/// \endcode 81288943Sdim/// 82314564Sdim/// This intrinsic corresponds to the <c> INSERTQ </c> instruction. 83288943Sdim/// 84288943Sdim/// \param x 85296417Sdim/// The destination operand where bits will be inserted. The inserted bits 86314564Sdim/// are defined by the length \a len and by the index \a idx specifying the 87314564Sdim/// least significant bit. 88288943Sdim/// \param y 89296417Sdim/// The source operand containing the bits to be extracted. The extracted 90314564Sdim/// bits are the least significant bits of operand \a y of length \a len. 91288943Sdim/// \param len 92288943Sdim/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 93288943Sdim/// are zero, the length is interpreted as 64. 94288943Sdim/// \param idx 95296417Sdim/// Bits [5:0] specify the index of the least significant bit; the other 96309124Sdim/// bits are ignored. If the sum of the index and length is greater than 64, 97309124Sdim/// the result is undefined. If the length and index are both zero, bits 98314564Sdim/// [63:0] of parameter \a y are inserted into parameter \a x. If the length 99314564Sdim/// is zero but the index is non-zero, the result is undefined. 100309124Sdim/// \returns A 128-bit integer vector containing the original lower 64-bits of 101314564Sdim/// destination operand \a x with the specified bitfields replaced by the 102314564Sdim/// lower bits of source operand \a y. The upper 64 bits of the return value 103314564Sdim/// are undefined. 104239313Sdim#define _mm_inserti_si64(x, y, len, idx) \ 105239313Sdim ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ 106239313Sdim (__v2di)(__m128i)(y), \ 107239313Sdim (char)(len), (char)(idx))) 108239313Sdim 109341825Sdim/// Inserts bits of a specified length from the source integer vector 110314564Sdim/// \a __y into the lower 64 bits of the destination integer vector \a __x 111314564Sdim/// at the index and of the length specified by \a __y. 112288943Sdim/// 113288943Sdim/// \headerfile <x86intrin.h> 114288943Sdim/// 115314564Sdim/// This intrinsic corresponds to the <c> INSERTQ </c> instruction. 116288943Sdim/// 117288943Sdim/// \param __x 118296417Sdim/// The destination operand where bits will be inserted. The inserted bits 119296417Sdim/// are defined by the length and by the index of the least significant bit 120314564Sdim/// specified by operand \a __y. 121288943Sdim/// \param __y 122296417Sdim/// The source operand containing the bits to be extracted. The extracted 123314564Sdim/// bits are the least significant bits of operand \a __y with length 124314564Sdim/// specified by bits [69:64]. These are inserted into the destination at the 125314564Sdim/// index specified by bits [77:72]; all other bits are ignored. If bits 126314564Sdim/// [69:64] are zero, the length is interpreted as 64. If the sum of the 127314564Sdim/// index and length is greater than 64, the result is undefined. If the 128314564Sdim/// length and index are both zero, bits [63:0] of parameter \a __y are 129314564Sdim/// inserted into parameter \a __x. If the length is zero but the index is 130314564Sdim/// non-zero, the result is undefined. 131309124Sdim/// \returns A 128-bit integer vector containing the original lower 64-bits of 132314564Sdim/// destination operand \a __x with the specified bitfields replaced by the 133314564Sdim/// lower bits of source operand \a __y. The upper 64 bits of the return 134314564Sdim/// value are undefined. 135288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 136239313Sdim_mm_insert_si64(__m128i __x, __m128i __y) 137239313Sdim{ 138239313Sdim return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); 139239313Sdim} 140239313Sdim 141341825Sdim/// Stores a 64-bit double-precision value in a 64-bit memory location. 142288943Sdim/// To minimize caching, the data is flagged as non-temporal (unlikely to be 143288943Sdim/// used again soon). 144288943Sdim/// 145288943Sdim/// \headerfile <x86intrin.h> 146288943Sdim/// 147314564Sdim/// This intrinsic corresponds to the <c> MOVNTSD </c> instruction. 148288943Sdim/// 149288943Sdim/// \param __p 150288943Sdim/// The 64-bit memory location used to store the register value. 151288943Sdim/// \param __a 152309124Sdim/// The 64-bit double-precision floating-point register value to be stored. 153288943Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 154239313Sdim_mm_stream_sd(double *__p, __m128d __a) 155239313Sdim{ 156239313Sdim __builtin_ia32_movntsd(__p, (__v2df)__a); 157239313Sdim} 158239313Sdim 159341825Sdim/// Stores a 32-bit single-precision floating-point value in a 32-bit 160288943Sdim/// memory location. To minimize caching, the data is flagged as 161288943Sdim/// non-temporal (unlikely to be used again soon). 162288943Sdim/// 163288943Sdim/// \headerfile <x86intrin.h> 164288943Sdim/// 165314564Sdim/// This intrinsic corresponds to the <c> MOVNTSS </c> instruction. 166288943Sdim/// 167288943Sdim/// \param __p 168288943Sdim/// The 32-bit memory location used to store the register value. 169288943Sdim/// \param __a 170309124Sdim/// The 32-bit single-precision floating-point register value to be stored. 171288943Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 172239313Sdim_mm_stream_ss(float *__p, __m128 __a) 173239313Sdim{ 174239313Sdim __builtin_ia32_movntss(__p, (__v4sf)__a); 175239313Sdim} 176239313Sdim 177288943Sdim#undef __DEFAULT_FN_ATTRS 178288943Sdim 179239313Sdim#endif /* __AMMINTRIN_H */ 180